from fastapi import APIRouter, HTTPException from pydantic import BaseModel from typing import List, Any, Dict, Optional import pandas as pd import numpy as np from app.core.engine.clean import detect_univariate_outliers, detect_multivariate_outliers, merge_outliers, merge_outliers_structured from app.core.engine.stats import calculate_correlation_matrix, calculate_feature_importance, run_regression_analysis router = APIRouter(prefix="/analysis", tags=["analysis"]) class TypeValidationRequest(BaseModel): data: List[Any] target_type: str class OutlierDetectionRequest(BaseModel): data: List[Dict[str, Optional[Any]]] columns: List[str] method: str = "both" excluded_indices: List[int] = [] # Rows to exclude from outlier detection class CorrelationRequest(BaseModel): data: List[Dict[str, Optional[Any]]] columns: List[str] method: str = "pearson" # pearson, spearman, kendall min_threshold: Optional[float] = None # Optional minimum correlation threshold include_pvalues: bool = True class FeatureImportanceRequest(BaseModel): data: List[Dict[str, Optional[Any]]] features: List[str] target: str class RegressionRequest(BaseModel): data: List[Dict[str, Optional[Any]]] x_features: List[str] y_target: str model_type: str = "linear" # New Engineering Parameters poly_degree: int = 1 # Default to linear include_interactions: bool = False @router.post("/validate-type") async def validate_type_conversion(request: TypeValidationRequest): s = pd.Series(request.data) try: if request.target_type == "numeric": pd.to_numeric(s, errors='raise') elif request.target_type == "date": pd.to_datetime(s, errors='raise') return {"status": "ok", "valid": True} except Exception as e: return {"status": "error", "valid": False, "message": str(e)} @router.post("/detect-outliers") async def detect_outliers(request: OutlierDetectionRequest): if not request.data: return {"outliers": []} df = pd.DataFrame(request.data).fillna(np.nan) # Pass excluded indices to detection functions uni_results = detect_univariate_outliers( df, request.columns, request.excluded_indices ) if request.method in ["univariate", "both"] else {} multi_results = detect_multivariate_outliers( df, request.columns, request.excluded_indices ) if request.method in ["multivariate", "both"] else {} # Use the new structured merge function structured = merge_outliers_structured(uni_results, multi_results) return { "status": "ok", "total_count": len(structured["all"]), "outliers": structured["all"], # Backwards compatibility "univariate": structured["univariate"], # New: Column-specific outliers "multivariate": structured["multivariate"] # New: Global outliers } @router.post("/correlation") async def get_correlation(request: CorrelationRequest): if not request.data or not request.columns: return { "status": "error", "message": "Data and columns are required", "result": {"matrix": [], "pvalues": [], "metadata": {}} } df = pd.DataFrame(request.data).fillna(np.nan) # Validate method parameter valid_methods = ['pearson', 'spearman', 'kendall'] if request.method not in valid_methods: raise HTTPException( status_code=400, detail=f"Invalid method. Choose from: {', '.join(valid_methods)}" ) try: result = calculate_correlation_matrix( df, request.columns, method=request.method, min_threshold=request.min_threshold, include_pvalues=request.include_pvalues ) # Add summary statistics from app.core.engine.stats import get_correlation_summary summary = get_correlation_summary(result) return { "status": "ok", "result": result, "summary": summary } except Exception as e: raise HTTPException(status_code=500, detail=f"Correlation calculation failed: {str(e)}") @router.post("/feature-importance") async def get_feature_importance(request: FeatureImportanceRequest): if not request.data or not request.features or not request.target: return {"importances": []} df = pd.DataFrame(request.data).fillna(np.nan) return {"status": "ok", "importances": calculate_feature_importance(df, request.features, request.target)} @router.post("/run-regression") async def run_regression(request: RegressionRequest): if not request.data or not request.x_features or not request.y_target: raise HTTPException(status_code=400, detail="Incomplete parameters.") df = pd.DataFrame(request.data).fillna(np.nan) try: results = run_regression_analysis( df, request.x_features, request.y_target, request.model_type, request.poly_degree, request.include_interactions ) return {"status": "ok", "results": results} except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: raise HTTPException(status_code=500, detail=f"Internal Analysis Error: {str(e)}")