2026-01-11 22:56:02 +01:00

223 lines
8.0 KiB
Python

from fpdf import FPDF
from datetime import datetime
from io import BytesIO
import pandas as pd
import platform
import sklearn
import statsmodels
import os
import matplotlib
matplotlib.use('Agg') # Use non-GUI backend
import matplotlib.pyplot as plt
import tempfile
class AnalysisReport(FPDF):
def header(self):
try:
# Add Unicode font support for accented characters
self.set_font('Arial', 'B', 15)
self.set_text_color(79, 70, 229) # Indigo 600
self.cell(0, 10, 'Data_analysis - Rapport de Validation', 0, 1, 'L')
self.set_draw_color(226, 232, 240)
self.line(10, 22, 200, 22)
self.ln(10)
except Exception as e:
print(f"Header error: {e}")
def footer(self):
try:
self.set_y(-15)
self.set_font('Arial', 'I', 8)
self.set_text_color(148, 163, 184)
self.cell(0, 10, f'Page {self.page_no()} | Genere le {datetime.now().strftime("%Y-%m-%d %H:%M")}', 0, 0, 'C')
except Exception as e:
print(f"Footer error: {e}")
def create_pdf_report(project_name: str, results: dict, audit_trail: dict) -> bytes:
try:
pdf = AnalysisReport()
pdf.add_page()
# 1. Summary
pdf.set_font('Arial', 'B', 12)
pdf.set_text_color(51, 65, 85)
pdf.cell(0, 10, f"Projet : {project_name}", 0, 1)
pdf.ln(5)
# Handle missing model_type
model_type = results.get('model_type', 'Regression')
if isinstance(model_type, list):
model_type = model_type[0] if model_type else 'Regression'
pdf.set_font('Arial', '', 10)
pdf.cell(0, 8, f"Modele : {model_type}", 0, 1)
# Handle r_squared safely
r_squared = results.get('r_squared', 0)
if r_squared is None:
r_squared = 0
pdf.cell(0, 8, f"Precision (R²) : {float(r_squared):.4f}", 0, 1)
# Handle sample_size safely
sample_size = results.get('sample_size', 0)
if sample_size is None:
sample_size = 0
pdf.cell(0, 8, f"Taille de l'echantillon : {int(sample_size)}", 0, 1)
pdf.ln(10)
# 2. Coefficients Table
pdf.set_font('Arial', 'B', 11)
pdf.cell(0, 10, "Coefficients du Modele", 0, 1)
pdf.set_font('Arial', 'B', 9)
pdf.set_fill_color(248, 250, 252)
pdf.cell(80, 8, "Feature", 1, 0, 'L', True)
pdf.cell(50, 8, "Coefficient", 1, 0, 'R', True)
pdf.cell(50, 8, "P-Value", 1, 1, 'R', True)
# Get coefficients and p_values safely
coefficients = results.get('coefficients', {})
p_values = results.get('p_values', {})
if coefficients:
pdf.set_font('Arial', '', 9)
for name, coef in coefficients.items():
# Convert coef to float safely
try:
coef_val = float(coef)
except (TypeError, ValueError):
coef_val = 0.0
# Get p-value safely
p_val = p_values.get(name, 1.0)
try:
p_val = float(p_val)
except (TypeError, ValueError):
p_val = 1.0
pdf.cell(80, 8, str(name), 1)
pdf.cell(50, 8, f"{coef_val:.4f}", 1, 0, 'R')
if p_val < 0.05:
pdf.set_text_color(16, 185, 129) # Emerald
else:
pdf.set_text_color(244, 63, 94) # Rose
pdf.cell(50, 8, f"{p_val:.4f}", 1, 1, 'R')
pdf.set_text_color(51, 65, 85)
else:
pdf.set_font('Arial', '', 9)
pdf.cell(0, 8, "Aucun coefficient disponible", 0, 1)
pdf.ln(15)
# 3. Visualization Charts
if 'fit_plot' in results and len(results['fit_plot']) > 0:
pdf.set_font('Arial', 'B', 11)
pdf.cell(0, 10, "Courbe de Regression", 0, 1)
pdf.ln(5)
# Create fit plot
fit_data = results['fit_plot']
x_vals = [p['x'] for p in fit_data]
y_real = [p['real'] for p in fit_data]
y_pred = [p['pred'] for p in fit_data]
plt.figure(figsize=(10, 6))
plt.scatter(x_vals, y_real, alpha=0.6, color='#4f46e5', label='Données réelles', s=50)
plt.plot(x_vals, y_pred, color='#ef4444', linewidth=2, label='Courbe de régression')
plt.xlabel('Valeur X', fontsize=12)
plt.ylabel('Valeur Y', fontsize=12)
plt.title('Ajustement du Modèle', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
# Save plot to temp file and add to PDF
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
plt.savefig(tmp.name, dpi=150, bbox_inches='tight')
plt.close()
pdf.image(tmp.name, x=10, w=190)
os.unlink(tmp.name)
pdf.ln(10)
# Residuals plot
if 'diagnostic_plot' in results and len(results['diagnostic_plot']) > 0:
pdf.set_font('Arial', 'B', 11)
pdf.cell(0, 10, "Graphique des Residus", 0, 1)
pdf.ln(5)
residuals_data = results['diagnostic_plot']
fitted = [p['fitted'] for p in residuals_data]
residuals = [p['residual'] for p in residuals_data]
plt.figure(figsize=(10, 6))
plt.scatter(fitted, residuals, alpha=0.6, color='#4f46e5', s=50)
plt.axhline(y=0, color='#ef4444', linestyle='--', linewidth=2)
plt.xlabel('Valeurs Ajustees', fontsize=12)
plt.ylabel('Residus', fontsize=12)
plt.title('Graphique des Residus', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
plt.savefig(tmp.name, dpi=150, bbox_inches='tight')
plt.close()
pdf.image(tmp.name, x=10, w=190)
os.unlink(tmp.name)
pdf.ln(10)
# 4. Audit Trail (Reproducibility)
pdf.set_font('Arial', 'B', 11)
pdf.cell(0, 10, "Piste d'Audit & Reproductibilite", 0, 1)
pdf.set_font('Arial', '', 8)
pdf.set_text_color(100, 116, 139)
# Cleaning steps
excluded_count = audit_trail.get('excluded_rows_count', 0)
if excluded_count is None:
excluded_count = 0
pdf.multi_cell(0, 6, f"- Nettoyage : {int(excluded_count)} lignes exclues de l'analyse.")
# Environment
pdf.ln(5)
pdf.set_font('Arial', 'B', 8)
pdf.cell(0, 6, "Environnement Technique :", 0, 1)
pdf.set_font('Arial', '', 8)
pdf.cell(0, 5, f"- Python : {platform.python_version()}", 0, 1)
pdf.cell(0, 5, f"- Pandas : {pd.__version__}", 0, 1)
# Try to get sklearn version safely
try:
pdf.cell(0, 5, f"- Scikit-learn : {sklearn.__version__}", 0, 1)
except Exception:
pdf.cell(0, 5, "- Scikit-learn : Installé", 0, 1)
# Try to get statsmodels version safely
try:
pdf.cell(0, 5, f"- Statsmodels : {statsmodels.__version__}", 0, 1)
except Exception:
pdf.cell(0, 5, "- Statsmodels : Installé", 0, 1)
pdf.cell(0, 5, f"- Random Seed : 42 (Fixed)", 0, 1)
# Generate PDF bytes using BytesIO
pdf_buffer = BytesIO()
pdf.output(pdf_buffer)
return pdf_buffer.getvalue()
except Exception as e:
# Return error as PDF with message using BytesIO
error_pdf = FPDF()
error_pdf.add_page()
error_pdf.set_font('Arial', 'B', 16)
error_pdf.cell(0, 10, f"Erreur lors de la generation du PDF", 0, 1)
error_pdf.ln(10)
error_pdf.set_font('Arial', '', 12)
error_pdf.multi_cell(0, 10, f"Erreur: {str(e)}")
error_buffer = BytesIO()
error_pdf.output(error_buffer)
return error_buffer.getvalue()