Analysis/backend/app/core/engine/reports.py

from fpdf import FPDF
from datetime import datetime
from io import BytesIO
import pandas as pd
import platform
import sklearn
import statsmodels
import os
import matplotlib
matplotlib.use('Agg')  # Use non-GUI backend
import matplotlib.pyplot as plt
import tempfile

class AnalysisReport(FPDF):
    def header(self):
        try:
            # Add Unicode font support for accented characters
            self.set_font('Arial', 'B', 15)
            self.set_text_color(79, 70, 229) # Indigo 600
            self.cell(0, 10, 'Data_analysis - Rapport de Validation', 0, 1, 'L')
            self.set_draw_color(226, 232, 240)
            self.line(10, 22, 200, 22)
            self.ln(10)
        except Exception as e:
            print(f"Header error: {e}")

    def footer(self):
        try:
            self.set_y(-15)
            self.set_font('Arial', 'I', 8)
            self.set_text_color(148, 163, 184)
            self.cell(0, 10, f'Page {self.page_no()} | Genere le {datetime.now().strftime("%Y-%m-%d %H:%M")}', 0, 0, 'C')
        except Exception as e:
            print(f"Footer error: {e}")

def create_pdf_report(project_name: str, results: dict, audit_trail: dict) -> bytes:
    try:
        pdf = AnalysisReport()
        pdf.add_page()

        # 1. Summary
        pdf.set_font('Arial', 'B', 12)
        pdf.set_text_color(51, 65, 85)
        pdf.cell(0, 10, f"Projet : {project_name}", 0, 1)
        pdf.ln(5)

        # Handle missing model_type
        model_type = results.get('model_type', 'Regression')
        if isinstance(model_type, list):
            model_type = model_type[0] if model_type else 'Regression'

        pdf.set_font('Arial', '', 10)
        pdf.cell(0, 8, f"Modele : {model_type}", 0, 1)

        # Handle r_squared safely
        r_squared = results.get('r_squared', 0)
        if r_squared is None:
            r_squared = 0
        pdf.cell(0, 8, f"Precision (R²) : {float(r_squared):.4f}", 0, 1)

        # Handle sample_size safely
        sample_size = results.get('sample_size', 0)
        if sample_size is None:
            sample_size = 0
        pdf.cell(0, 8, f"Taille de l'echantillon : {int(sample_size)}", 0, 1)

        pdf.ln(10)

        # 2. Coefficients Table
        pdf.set_font('Arial', 'B', 11)
        pdf.cell(0, 10, "Coefficients du Modele", 0, 1)

        pdf.set_font('Arial', 'B', 9)
        pdf.set_fill_color(248, 250, 252)
        pdf.cell(80, 8, "Feature", 1, 0, 'L', True)
        pdf.cell(50, 8, "Coefficient", 1, 0, 'R', True)
        pdf.cell(50, 8, "P-Value", 1, 1, 'R', True)

        # Get coefficients and p_values safely
        coefficients = results.get('coefficients', {})
        p_values = results.get('p_values', {})

        if coefficients:
            pdf.set_font('Arial', '', 9)
            for name, coef in coefficients.items():
                # Convert coef to float safely
                try:
                    coef_val = float(coef)
                except (TypeError, ValueError):
                    coef_val = 0.0

                # Get p-value safely
                p_val = p_values.get(name, 1.0)
                try:
                    p_val = float(p_val)
                except (TypeError, ValueError):
                    p_val = 1.0

                pdf.cell(80, 8, str(name), 1)
                pdf.cell(50, 8, f"{coef_val:.4f}", 1, 0, 'R')

                if p_val < 0.05:
                    pdf.set_text_color(16, 185, 129) # Emerald
                else:
                    pdf.set_text_color(244, 63, 94) # Rose
                pdf.cell(50, 8, f"{p_val:.4f}", 1, 1, 'R')
                pdf.set_text_color(51, 65, 85)
        else:
            pdf.set_font('Arial', '', 9)
            pdf.cell(0, 8, "Aucun coefficient disponible", 0, 1)

        pdf.ln(15)

        # 3. Visualization Charts
        if 'fit_plot' in results and len(results['fit_plot']) > 0:
            pdf.set_font('Arial', 'B', 11)
            pdf.cell(0, 10, "Courbe de Regression", 0, 1)
            pdf.ln(5)

            # Create fit plot
            fit_data = results['fit_plot']
            x_vals = [p['x'] for p in fit_data]
            y_real = [p['real'] for p in fit_data]
            y_pred = [p['pred'] for p in fit_data]

            plt.figure(figsize=(10, 6))
            plt.scatter(x_vals, y_real, alpha=0.6, color='#4f46e5', label='Données réelles', s=50)
            plt.plot(x_vals, y_pred, color='#ef4444', linewidth=2, label='Courbe de régression')
            plt.xlabel('Valeur X', fontsize=12)
            plt.ylabel('Valeur Y', fontsize=12)
            plt.title('Ajustement du Modèle', fontsize=14, fontweight='bold')
            plt.legend()
            plt.grid(True, alpha=0.3)
            plt.tight_layout()

            # Save plot to temp file and add to PDF
            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
                plt.savefig(tmp.name, dpi=150, bbox_inches='tight')
                plt.close()
                pdf.image(tmp.name, x=10, w=190)
                os.unlink(tmp.name)

            pdf.ln(10)

        # Residuals plot
        if 'diagnostic_plot' in results and len(results['diagnostic_plot']) > 0:
            pdf.set_font('Arial', 'B', 11)
            pdf.cell(0, 10, "Graphique des Residus", 0, 1)
            pdf.ln(5)

            residuals_data = results['diagnostic_plot']
            fitted = [p['fitted'] for p in residuals_data]
            residuals = [p['residual'] for p in residuals_data]

            plt.figure(figsize=(10, 6))
            plt.scatter(fitted, residuals, alpha=0.6, color='#4f46e5', s=50)
            plt.axhline(y=0, color='#ef4444', linestyle='--', linewidth=2)
            plt.xlabel('Valeurs Ajustees', fontsize=12)
            plt.ylabel('Residus', fontsize=12)
            plt.title('Graphique des Residus', fontsize=14, fontweight='bold')
            plt.grid(True, alpha=0.3)
            plt.tight_layout()

            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
                plt.savefig(tmp.name, dpi=150, bbox_inches='tight')
                plt.close()
                pdf.image(tmp.name, x=10, w=190)
                os.unlink(tmp.name)

            pdf.ln(10)

        # 4. Audit Trail (Reproducibility)
        pdf.set_font('Arial', 'B', 11)
        pdf.cell(0, 10, "Piste d'Audit & Reproductibilite", 0, 1)

        pdf.set_font('Arial', '', 8)
        pdf.set_text_color(100, 116, 139)

        # Cleaning steps
        excluded_count = audit_trail.get('excluded_rows_count', 0)
        if excluded_count is None:
            excluded_count = 0
        pdf.multi_cell(0, 6, f"- Nettoyage : {int(excluded_count)} lignes exclues de l'analyse.")

        # Environment
        pdf.ln(5)
        pdf.set_font('Arial', 'B', 8)
        pdf.cell(0, 6, "Environnement Technique :", 0, 1)
        pdf.set_font('Arial', '', 8)
        pdf.cell(0, 5, f"- Python : {platform.python_version()}", 0, 1)
        pdf.cell(0, 5, f"- Pandas : {pd.__version__}", 0, 1)

        # Try to get sklearn version safely
        try:
            pdf.cell(0, 5, f"- Scikit-learn : {sklearn.__version__}", 0, 1)
        except Exception:
            pdf.cell(0, 5, "- Scikit-learn : Installé", 0, 1)

        # Try to get statsmodels version safely
        try:
            pdf.cell(0, 5, f"- Statsmodels : {statsmodels.__version__}", 0, 1)
        except Exception:
            pdf.cell(0, 5, "- Statsmodels : Installé", 0, 1)

        pdf.cell(0, 5, f"- Random Seed : 42 (Fixed)", 0, 1)

        # Generate PDF bytes using BytesIO
        pdf_buffer = BytesIO()
        pdf.output(pdf_buffer)
        return pdf_buffer.getvalue()

    except Exception as e:
        # Return error as PDF with message using BytesIO
        error_pdf = FPDF()
        error_pdf.add_page()
        error_pdf.set_font('Arial', 'B', 16)
        error_pdf.cell(0, 10, f"Erreur lors de la generation du PDF", 0, 1)
        error_pdf.ln(10)
        error_pdf.set_font('Arial', '', 12)
        error_pdf.multi_cell(0, 10, f"Erreur: {str(e)}")
        error_buffer = BytesIO()
        error_pdf.output(error_buffer)
        return error_buffer.getvalue()