from fpdf import FPDF from datetime import datetime from io import BytesIO import pandas as pd import platform import sklearn import statsmodels import os import matplotlib matplotlib.use('Agg') # Use non-GUI backend import matplotlib.pyplot as plt import tempfile class AnalysisReport(FPDF): def header(self): try: # Add Unicode font support for accented characters self.set_font('Arial', 'B', 15) self.set_text_color(79, 70, 229) # Indigo 600 self.cell(0, 10, 'Data_analysis - Rapport de Validation', 0, 1, 'L') self.set_draw_color(226, 232, 240) self.line(10, 22, 200, 22) self.ln(10) except Exception as e: print(f"Header error: {e}") def footer(self): try: self.set_y(-15) self.set_font('Arial', 'I', 8) self.set_text_color(148, 163, 184) self.cell(0, 10, f'Page {self.page_no()} | Genere le {datetime.now().strftime("%Y-%m-%d %H:%M")}', 0, 0, 'C') except Exception as e: print(f"Footer error: {e}") def create_pdf_report(project_name: str, results: dict, audit_trail: dict) -> bytes: try: pdf = AnalysisReport() pdf.add_page() # 1. Summary pdf.set_font('Arial', 'B', 12) pdf.set_text_color(51, 65, 85) pdf.cell(0, 10, f"Projet : {project_name}", 0, 1) pdf.ln(5) # Handle missing model_type model_type = results.get('model_type', 'Regression') if isinstance(model_type, list): model_type = model_type[0] if model_type else 'Regression' pdf.set_font('Arial', '', 10) pdf.cell(0, 8, f"Modele : {model_type}", 0, 1) # Handle r_squared safely r_squared = results.get('r_squared', 0) if r_squared is None: r_squared = 0 pdf.cell(0, 8, f"Precision (R²) : {float(r_squared):.4f}", 0, 1) # Handle sample_size safely sample_size = results.get('sample_size', 0) if sample_size is None: sample_size = 0 pdf.cell(0, 8, f"Taille de l'echantillon : {int(sample_size)}", 0, 1) pdf.ln(10) # 2. Coefficients Table pdf.set_font('Arial', 'B', 11) pdf.cell(0, 10, "Coefficients du Modele", 0, 1) pdf.set_font('Arial', 'B', 9) pdf.set_fill_color(248, 250, 252) pdf.cell(80, 8, "Feature", 1, 0, 'L', True) pdf.cell(50, 8, "Coefficient", 1, 0, 'R', True) pdf.cell(50, 8, "P-Value", 1, 1, 'R', True) # Get coefficients and p_values safely coefficients = results.get('coefficients', {}) p_values = results.get('p_values', {}) if coefficients: pdf.set_font('Arial', '', 9) for name, coef in coefficients.items(): # Convert coef to float safely try: coef_val = float(coef) except (TypeError, ValueError): coef_val = 0.0 # Get p-value safely p_val = p_values.get(name, 1.0) try: p_val = float(p_val) except (TypeError, ValueError): p_val = 1.0 pdf.cell(80, 8, str(name), 1) pdf.cell(50, 8, f"{coef_val:.4f}", 1, 0, 'R') if p_val < 0.05: pdf.set_text_color(16, 185, 129) # Emerald else: pdf.set_text_color(244, 63, 94) # Rose pdf.cell(50, 8, f"{p_val:.4f}", 1, 1, 'R') pdf.set_text_color(51, 65, 85) else: pdf.set_font('Arial', '', 9) pdf.cell(0, 8, "Aucun coefficient disponible", 0, 1) pdf.ln(15) # 3. Visualization Charts if 'fit_plot' in results and len(results['fit_plot']) > 0: pdf.set_font('Arial', 'B', 11) pdf.cell(0, 10, "Courbe de Regression", 0, 1) pdf.ln(5) # Create fit plot fit_data = results['fit_plot'] x_vals = [p['x'] for p in fit_data] y_real = [p['real'] for p in fit_data] y_pred = [p['pred'] for p in fit_data] plt.figure(figsize=(10, 6)) plt.scatter(x_vals, y_real, alpha=0.6, color='#4f46e5', label='Données réelles', s=50) plt.plot(x_vals, y_pred, color='#ef4444', linewidth=2, label='Courbe de régression') plt.xlabel('Valeur X', fontsize=12) plt.ylabel('Valeur Y', fontsize=12) plt.title('Ajustement du Modèle', fontsize=14, fontweight='bold') plt.legend() plt.grid(True, alpha=0.3) plt.tight_layout() # Save plot to temp file and add to PDF with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: plt.savefig(tmp.name, dpi=150, bbox_inches='tight') plt.close() pdf.image(tmp.name, x=10, w=190) os.unlink(tmp.name) pdf.ln(10) # Residuals plot if 'diagnostic_plot' in results and len(results['diagnostic_plot']) > 0: pdf.set_font('Arial', 'B', 11) pdf.cell(0, 10, "Graphique des Residus", 0, 1) pdf.ln(5) residuals_data = results['diagnostic_plot'] fitted = [p['fitted'] for p in residuals_data] residuals = [p['residual'] for p in residuals_data] plt.figure(figsize=(10, 6)) plt.scatter(fitted, residuals, alpha=0.6, color='#4f46e5', s=50) plt.axhline(y=0, color='#ef4444', linestyle='--', linewidth=2) plt.xlabel('Valeurs Ajustees', fontsize=12) plt.ylabel('Residus', fontsize=12) plt.title('Graphique des Residus', fontsize=14, fontweight='bold') plt.grid(True, alpha=0.3) plt.tight_layout() with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: plt.savefig(tmp.name, dpi=150, bbox_inches='tight') plt.close() pdf.image(tmp.name, x=10, w=190) os.unlink(tmp.name) pdf.ln(10) # 4. Audit Trail (Reproducibility) pdf.set_font('Arial', 'B', 11) pdf.cell(0, 10, "Piste d'Audit & Reproductibilite", 0, 1) pdf.set_font('Arial', '', 8) pdf.set_text_color(100, 116, 139) # Cleaning steps excluded_count = audit_trail.get('excluded_rows_count', 0) if excluded_count is None: excluded_count = 0 pdf.multi_cell(0, 6, f"- Nettoyage : {int(excluded_count)} lignes exclues de l'analyse.") # Environment pdf.ln(5) pdf.set_font('Arial', 'B', 8) pdf.cell(0, 6, "Environnement Technique :", 0, 1) pdf.set_font('Arial', '', 8) pdf.cell(0, 5, f"- Python : {platform.python_version()}", 0, 1) pdf.cell(0, 5, f"- Pandas : {pd.__version__}", 0, 1) # Try to get sklearn version safely try: pdf.cell(0, 5, f"- Scikit-learn : {sklearn.__version__}", 0, 1) except Exception: pdf.cell(0, 5, "- Scikit-learn : Installé", 0, 1) # Try to get statsmodels version safely try: pdf.cell(0, 5, f"- Statsmodels : {statsmodels.__version__}", 0, 1) except Exception: pdf.cell(0, 5, "- Statsmodels : Installé", 0, 1) pdf.cell(0, 5, f"- Random Seed : 42 (Fixed)", 0, 1) # Generate PDF bytes using BytesIO pdf_buffer = BytesIO() pdf.output(pdf_buffer) return pdf_buffer.getvalue() except Exception as e: # Return error as PDF with message using BytesIO error_pdf = FPDF() error_pdf.add_page() error_pdf.set_font('Arial', 'B', 16) error_pdf.cell(0, 10, f"Erreur lors de la generation du PDF", 0, 1) error_pdf.ln(10) error_pdf.set_font('Arial', '', 12) error_pdf.multi_cell(0, 10, f"Erreur: {str(e)}") error_buffer = BytesIO() error_pdf.output(error_buffer) return error_buffer.getvalue()