223 lines
8.0 KiB
Python
223 lines
8.0 KiB
Python
from fpdf import FPDF
|
|
from datetime import datetime
|
|
from io import BytesIO
|
|
import pandas as pd
|
|
import platform
|
|
import sklearn
|
|
import statsmodels
|
|
import os
|
|
import matplotlib
|
|
matplotlib.use('Agg') # Use non-GUI backend
|
|
import matplotlib.pyplot as plt
|
|
import tempfile
|
|
|
|
class AnalysisReport(FPDF):
|
|
def header(self):
|
|
try:
|
|
# Add Unicode font support for accented characters
|
|
self.set_font('Arial', 'B', 15)
|
|
self.set_text_color(79, 70, 229) # Indigo 600
|
|
self.cell(0, 10, 'Data_analysis - Rapport de Validation', 0, 1, 'L')
|
|
self.set_draw_color(226, 232, 240)
|
|
self.line(10, 22, 200, 22)
|
|
self.ln(10)
|
|
except Exception as e:
|
|
print(f"Header error: {e}")
|
|
|
|
def footer(self):
|
|
try:
|
|
self.set_y(-15)
|
|
self.set_font('Arial', 'I', 8)
|
|
self.set_text_color(148, 163, 184)
|
|
self.cell(0, 10, f'Page {self.page_no()} | Genere le {datetime.now().strftime("%Y-%m-%d %H:%M")}', 0, 0, 'C')
|
|
except Exception as e:
|
|
print(f"Footer error: {e}")
|
|
|
|
def create_pdf_report(project_name: str, results: dict, audit_trail: dict) -> bytes:
|
|
try:
|
|
pdf = AnalysisReport()
|
|
pdf.add_page()
|
|
|
|
# 1. Summary
|
|
pdf.set_font('Arial', 'B', 12)
|
|
pdf.set_text_color(51, 65, 85)
|
|
pdf.cell(0, 10, f"Projet : {project_name}", 0, 1)
|
|
pdf.ln(5)
|
|
|
|
# Handle missing model_type
|
|
model_type = results.get('model_type', 'Regression')
|
|
if isinstance(model_type, list):
|
|
model_type = model_type[0] if model_type else 'Regression'
|
|
|
|
pdf.set_font('Arial', '', 10)
|
|
pdf.cell(0, 8, f"Modele : {model_type}", 0, 1)
|
|
|
|
# Handle r_squared safely
|
|
r_squared = results.get('r_squared', 0)
|
|
if r_squared is None:
|
|
r_squared = 0
|
|
pdf.cell(0, 8, f"Precision (R²) : {float(r_squared):.4f}", 0, 1)
|
|
|
|
# Handle sample_size safely
|
|
sample_size = results.get('sample_size', 0)
|
|
if sample_size is None:
|
|
sample_size = 0
|
|
pdf.cell(0, 8, f"Taille de l'echantillon : {int(sample_size)}", 0, 1)
|
|
|
|
pdf.ln(10)
|
|
|
|
# 2. Coefficients Table
|
|
pdf.set_font('Arial', 'B', 11)
|
|
pdf.cell(0, 10, "Coefficients du Modele", 0, 1)
|
|
|
|
pdf.set_font('Arial', 'B', 9)
|
|
pdf.set_fill_color(248, 250, 252)
|
|
pdf.cell(80, 8, "Feature", 1, 0, 'L', True)
|
|
pdf.cell(50, 8, "Coefficient", 1, 0, 'R', True)
|
|
pdf.cell(50, 8, "P-Value", 1, 1, 'R', True)
|
|
|
|
# Get coefficients and p_values safely
|
|
coefficients = results.get('coefficients', {})
|
|
p_values = results.get('p_values', {})
|
|
|
|
if coefficients:
|
|
pdf.set_font('Arial', '', 9)
|
|
for name, coef in coefficients.items():
|
|
# Convert coef to float safely
|
|
try:
|
|
coef_val = float(coef)
|
|
except (TypeError, ValueError):
|
|
coef_val = 0.0
|
|
|
|
# Get p-value safely
|
|
p_val = p_values.get(name, 1.0)
|
|
try:
|
|
p_val = float(p_val)
|
|
except (TypeError, ValueError):
|
|
p_val = 1.0
|
|
|
|
pdf.cell(80, 8, str(name), 1)
|
|
pdf.cell(50, 8, f"{coef_val:.4f}", 1, 0, 'R')
|
|
|
|
if p_val < 0.05:
|
|
pdf.set_text_color(16, 185, 129) # Emerald
|
|
else:
|
|
pdf.set_text_color(244, 63, 94) # Rose
|
|
pdf.cell(50, 8, f"{p_val:.4f}", 1, 1, 'R')
|
|
pdf.set_text_color(51, 65, 85)
|
|
else:
|
|
pdf.set_font('Arial', '', 9)
|
|
pdf.cell(0, 8, "Aucun coefficient disponible", 0, 1)
|
|
|
|
pdf.ln(15)
|
|
|
|
# 3. Visualization Charts
|
|
if 'fit_plot' in results and len(results['fit_plot']) > 0:
|
|
pdf.set_font('Arial', 'B', 11)
|
|
pdf.cell(0, 10, "Courbe de Regression", 0, 1)
|
|
pdf.ln(5)
|
|
|
|
# Create fit plot
|
|
fit_data = results['fit_plot']
|
|
x_vals = [p['x'] for p in fit_data]
|
|
y_real = [p['real'] for p in fit_data]
|
|
y_pred = [p['pred'] for p in fit_data]
|
|
|
|
plt.figure(figsize=(10, 6))
|
|
plt.scatter(x_vals, y_real, alpha=0.6, color='#4f46e5', label='Données réelles', s=50)
|
|
plt.plot(x_vals, y_pred, color='#ef4444', linewidth=2, label='Courbe de régression')
|
|
plt.xlabel('Valeur X', fontsize=12)
|
|
plt.ylabel('Valeur Y', fontsize=12)
|
|
plt.title('Ajustement du Modèle', fontsize=14, fontweight='bold')
|
|
plt.legend()
|
|
plt.grid(True, alpha=0.3)
|
|
plt.tight_layout()
|
|
|
|
# Save plot to temp file and add to PDF
|
|
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
|
|
plt.savefig(tmp.name, dpi=150, bbox_inches='tight')
|
|
plt.close()
|
|
pdf.image(tmp.name, x=10, w=190)
|
|
os.unlink(tmp.name)
|
|
|
|
pdf.ln(10)
|
|
|
|
# Residuals plot
|
|
if 'diagnostic_plot' in results and len(results['diagnostic_plot']) > 0:
|
|
pdf.set_font('Arial', 'B', 11)
|
|
pdf.cell(0, 10, "Graphique des Residus", 0, 1)
|
|
pdf.ln(5)
|
|
|
|
residuals_data = results['diagnostic_plot']
|
|
fitted = [p['fitted'] for p in residuals_data]
|
|
residuals = [p['residual'] for p in residuals_data]
|
|
|
|
plt.figure(figsize=(10, 6))
|
|
plt.scatter(fitted, residuals, alpha=0.6, color='#4f46e5', s=50)
|
|
plt.axhline(y=0, color='#ef4444', linestyle='--', linewidth=2)
|
|
plt.xlabel('Valeurs Ajustees', fontsize=12)
|
|
plt.ylabel('Residus', fontsize=12)
|
|
plt.title('Graphique des Residus', fontsize=14, fontweight='bold')
|
|
plt.grid(True, alpha=0.3)
|
|
plt.tight_layout()
|
|
|
|
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
|
|
plt.savefig(tmp.name, dpi=150, bbox_inches='tight')
|
|
plt.close()
|
|
pdf.image(tmp.name, x=10, w=190)
|
|
os.unlink(tmp.name)
|
|
|
|
pdf.ln(10)
|
|
|
|
# 4. Audit Trail (Reproducibility)
|
|
pdf.set_font('Arial', 'B', 11)
|
|
pdf.cell(0, 10, "Piste d'Audit & Reproductibilite", 0, 1)
|
|
|
|
pdf.set_font('Arial', '', 8)
|
|
pdf.set_text_color(100, 116, 139)
|
|
|
|
# Cleaning steps
|
|
excluded_count = audit_trail.get('excluded_rows_count', 0)
|
|
if excluded_count is None:
|
|
excluded_count = 0
|
|
pdf.multi_cell(0, 6, f"- Nettoyage : {int(excluded_count)} lignes exclues de l'analyse.")
|
|
|
|
# Environment
|
|
pdf.ln(5)
|
|
pdf.set_font('Arial', 'B', 8)
|
|
pdf.cell(0, 6, "Environnement Technique :", 0, 1)
|
|
pdf.set_font('Arial', '', 8)
|
|
pdf.cell(0, 5, f"- Python : {platform.python_version()}", 0, 1)
|
|
pdf.cell(0, 5, f"- Pandas : {pd.__version__}", 0, 1)
|
|
|
|
# Try to get sklearn version safely
|
|
try:
|
|
pdf.cell(0, 5, f"- Scikit-learn : {sklearn.__version__}", 0, 1)
|
|
except Exception:
|
|
pdf.cell(0, 5, "- Scikit-learn : Installé", 0, 1)
|
|
|
|
# Try to get statsmodels version safely
|
|
try:
|
|
pdf.cell(0, 5, f"- Statsmodels : {statsmodels.__version__}", 0, 1)
|
|
except Exception:
|
|
pdf.cell(0, 5, "- Statsmodels : Installé", 0, 1)
|
|
|
|
pdf.cell(0, 5, f"- Random Seed : 42 (Fixed)", 0, 1)
|
|
|
|
# Generate PDF bytes using BytesIO
|
|
pdf_buffer = BytesIO()
|
|
pdf.output(pdf_buffer)
|
|
return pdf_buffer.getvalue()
|
|
|
|
except Exception as e:
|
|
# Return error as PDF with message using BytesIO
|
|
error_pdf = FPDF()
|
|
error_pdf.add_page()
|
|
error_pdf.set_font('Arial', 'B', 16)
|
|
error_pdf.cell(0, 10, f"Erreur lors de la generation du PDF", 0, 1)
|
|
error_pdf.ln(10)
|
|
error_pdf.set_font('Arial', '', 12)
|
|
error_pdf.multi_cell(0, 10, f"Erreur: {str(e)}")
|
|
error_buffer = BytesIO()
|
|
error_pdf.output(error_buffer)
|
|
return error_buffer.getvalue() |