Initial commit: Data Analysis application with FastAPI backend and Next.js frontend

This commit is contained in:
2026-01-11 21:54:33 +01:00
commit 7bdafb4fbf
549 changed files with 96211 additions and 0 deletions

View File

@@ -0,0 +1,147 @@
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import List, Any, Dict, Optional
import pandas as pd
import numpy as np
from app.core.engine.clean import detect_univariate_outliers, detect_multivariate_outliers, merge_outliers, merge_outliers_structured
from app.core.engine.stats import calculate_correlation_matrix, calculate_feature_importance, run_regression_analysis
router = APIRouter(prefix="/analysis", tags=["analysis"])
class TypeValidationRequest(BaseModel):
data: List[Any]
target_type: str
class OutlierDetectionRequest(BaseModel):
data: List[Dict[str, Optional[Any]]]
columns: List[str]
method: str = "both"
excluded_indices: List[int] = [] # Rows to exclude from outlier detection
class CorrelationRequest(BaseModel):
data: List[Dict[str, Optional[Any]]]
columns: List[str]
method: str = "pearson" # pearson, spearman, kendall
min_threshold: Optional[float] = None # Optional minimum correlation threshold
include_pvalues: bool = True
class FeatureImportanceRequest(BaseModel):
data: List[Dict[str, Optional[Any]]]
features: List[str]
target: str
class RegressionRequest(BaseModel):
data: List[Dict[str, Optional[Any]]]
x_features: List[str]
y_target: str
model_type: str = "linear"
# New Engineering Parameters
poly_degree: int = 1 # Default to linear
include_interactions: bool = False
@router.post("/validate-type")
async def validate_type_conversion(request: TypeValidationRequest):
s = pd.Series(request.data)
try:
if request.target_type == "numeric":
pd.to_numeric(s, errors='raise')
elif request.target_type == "date":
pd.to_datetime(s, errors='raise')
return {"status": "ok", "valid": True}
except Exception as e:
return {"status": "error", "valid": False, "message": str(e)}
@router.post("/detect-outliers")
async def detect_outliers(request: OutlierDetectionRequest):
if not request.data:
return {"outliers": []}
df = pd.DataFrame(request.data).fillna(np.nan)
# Pass excluded indices to detection functions
uni_results = detect_univariate_outliers(
df, request.columns, request.excluded_indices
) if request.method in ["univariate", "both"] else {}
multi_results = detect_multivariate_outliers(
df, request.columns, request.excluded_indices
) if request.method in ["multivariate", "both"] else {}
# Use the new structured merge function
structured = merge_outliers_structured(uni_results, multi_results)
return {
"status": "ok",
"total_count": len(structured["all"]),
"outliers": structured["all"], # Backwards compatibility
"univariate": structured["univariate"], # New: Column-specific outliers
"multivariate": structured["multivariate"] # New: Global outliers
}
@router.post("/correlation")
async def get_correlation(request: CorrelationRequest):
if not request.data or not request.columns:
return {
"status": "error",
"message": "Data and columns are required",
"result": {"matrix": [], "pvalues": [], "metadata": {}}
}
df = pd.DataFrame(request.data).fillna(np.nan)
# Validate method parameter
valid_methods = ['pearson', 'spearman', 'kendall']
if request.method not in valid_methods:
raise HTTPException(
status_code=400,
detail=f"Invalid method. Choose from: {', '.join(valid_methods)}"
)
try:
result = calculate_correlation_matrix(
df,
request.columns,
method=request.method,
min_threshold=request.min_threshold,
include_pvalues=request.include_pvalues
)
# Add summary statistics
from app.core.engine.stats import get_correlation_summary
summary = get_correlation_summary(result)
return {
"status": "ok",
"result": result,
"summary": summary
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Correlation calculation failed: {str(e)}")
@router.post("/feature-importance")
async def get_feature_importance(request: FeatureImportanceRequest):
if not request.data or not request.features or not request.target: return {"importances": []}
df = pd.DataFrame(request.data).fillna(np.nan)
return {"status": "ok", "importances": calculate_feature_importance(df, request.features, request.target)}
@router.post("/run-regression")
async def run_regression(request: RegressionRequest):
if not request.data or not request.x_features or not request.y_target:
raise HTTPException(status_code=400, detail="Incomplete parameters.")
df = pd.DataFrame(request.data).fillna(np.nan)
try:
results = run_regression_analysis(
df,
request.x_features,
request.y_target,
request.model_type,
request.poly_degree,
request.include_interactions
)
return {"status": "ok", "results": results}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"Internal Analysis Error: {str(e)}")