""" Backtesting Module. This module provides functions to run backtesting on historical match data, comparing predictions with actual results to calculate accuracy metrics. """ import json import csv from datetime import datetime from typing import Dict, List, Any, Optional from io import StringIO from app.ml.prediction_calculator import calculate_prediction # Validation thresholds ACCURACY_VALIDATED_THRESHOLD = 60.0 # >= 60%: System validated ACCURACY_ALERT_THRESHOLD = 55.0 # < 55%: Revision required def validate_accuracy(accuracy: float) -> str: """ Validate the accuracy of the prediction system. Args: accuracy: Accuracy percentage (0.0 - 100.0) Returns: 'VALIDATED' if accuracy >= 60%, 'REVISION_REQUIRED' if accuracy < 55%, 'BELOW_TARGET' if 55% <= accuracy < 60% Examples: >>> validate_accuracy(70.0) 'VALIDATED' >>> validate_accuracy(50.0) 'REVISION_REQUIRED' >>> validate_accuracy(58.0) 'BELOW_TARGET' """ if accuracy >= ACCURACY_VALIDATED_THRESHOLD: return 'VALIDATED' elif accuracy < ACCURACY_ALERT_THRESHOLD: return 'REVISION_REQUIRED' else: return 'BELOW_TARGET' def compare_prediction(predicted_winner: str, actual_winner: str) -> bool: """ Compare predicted winner with actual match result. Args: predicted_winner: 'home', 'away', or 'draw' actual_winner: 'home', 'away', or 'draw' Returns: True if prediction was correct, False otherwise Examples: >>> compare_prediction('home', 'home') True >>> compare_prediction('home', 'away') False """ return predicted_winner.lower() == actual_winner.lower() def run_backtesting_single_match( match_id: int, home_team: str, away_team: str, home_energy: float, away_energy: float, actual_winner: str ) -> Dict[str, Any]: """ Run backtesting for a single historical match. Calculates prediction and compares it with the actual result. Args: match_id: Unique match identifier home_team: Name of the home team away_team: Name of the away team home_energy: Energy score of the home team away_energy: Energy score of the away team actual_winner: Actual result ('home', 'away', or 'draw') Returns: Dictionary containing match details, prediction, and comparison result Examples: >>> result = run_backtesting_single_match(1, 'PSG', 'OM', 65.0, 45.0, 'home') >>> result['correct'] True """ # Calculate prediction prediction = calculate_prediction(home_energy, away_energy) # Compare with actual result is_correct = compare_prediction(prediction['predicted_winner'], actual_winner) return { 'match_id': match_id, 'home_team': home_team, 'away_team': away_team, 'home_energy': home_energy, 'away_energy': away_energy, 'prediction': prediction, 'actual_winner': actual_winner, 'correct': is_correct } def run_backtesting_batch(matches: List[Dict[str, Any]]) -> Dict[str, Any]: """ Run backtesting on a batch of historical matches. Processes multiple matches, calculates predictions, compares with actual results, and generates accuracy metrics and detailed report. Args: matches: List of match dictionaries with keys: - match_id (int) - home_team (str) - away_team (str) - home_energy (float) - away_energy (float) - actual_winner (str) - league (str, optional) - date (datetime, optional) Returns: Dictionary containing: - total_matches: Number of matches processed - correct_predictions: Number of correct predictions - incorrect_predictions: Number of incorrect predictions - accuracy: Accuracy percentage - status: Validation status (VALIDATED, REVISION_REQUIRED, BELOW_TARGET) - results: List of individual match results - metrics_by_league: Accuracy breakdown by league - timestamp: When the backtesting was run Examples: >>> matches = [ ... {'match_id': 1, 'home_team': 'PSG', 'away_team': 'OM', ... 'home_energy': 65.0, 'away_energy': 45.0, 'actual_winner': 'home'}, ... ] >>> result = run_backtesting_batch(matches) >>> result['accuracy'] 100.0 """ results = [] correct_predictions = 0 incorrect_predictions = 0 # Track metrics by league league_metrics: Dict[str, Dict[str, Any]] = {} for match in matches: # Validate required fields required_fields = ['match_id', 'home_team', 'away_team', 'home_energy', 'away_energy', 'actual_winner'] if not all(field in match for field in required_fields): raise ValueError(f"Match missing required fields: {match}") # Extract league and date if available league = match.get('league', 'unknown') match_date = match.get('date') # Run backtesting for this match result = run_backtesting_single_match( match_id=match['match_id'], home_team=match['home_team'], away_team=match['away_team'], home_energy=match['home_energy'], away_energy=match['away_energy'], actual_winner=match['actual_winner'] ) # Add league and date to result result['league'] = league result['date'] = match_date.isoformat() if match_date else None # Track correctness if result['correct']: correct_predictions += 1 else: incorrect_predictions += 1 # Update league metrics if league not in league_metrics: league_metrics[league] = { 'total': 0, 'correct': 0, 'accuracy': 0.0 } league_metrics[league]['total'] += 1 if result['correct']: league_metrics[league]['correct'] += 1 results.append(result) # Calculate overall accuracy total_matches = len(matches) accuracy = (correct_predictions / total_matches * 100.0) if total_matches > 0 else 0.0 # Calculate accuracy per league for league, metrics in league_metrics.items(): if metrics['total'] > 0: metrics['accuracy'] = (metrics['correct'] / metrics['total'] * 100.0) # Get validation status status = validate_accuracy(accuracy) return { 'total_matches': total_matches, 'correct_predictions': correct_predictions, 'incorrect_predictions': incorrect_predictions, 'accuracy': round(accuracy, 2), 'status': status, 'results': results, 'metrics_by_league': league_metrics, 'timestamp': datetime.utcnow().isoformat(), 'validation_thresholds': { 'validated': ACCURACY_VALIDATED_THRESHOLD, 'alert': ACCURACY_ALERT_THRESHOLD } } def export_to_json(backtesting_result: Dict[str, Any]) -> str: """ Export backtesting results to JSON format. Args: backtesting_result: Result from run_backtesting_batch Returns: JSON formatted string Examples: >>> result = run_backtesting_batch(matches) >>> json_output = export_to_json(result) >>> isinstance(json_output, str) True """ return json.dumps(backtesting_result, indent=2, default=str) def export_to_csv(backtesting_result: Dict[str, Any]) -> str: """ Export backtesting results to CSV format. Args: backtesting_result: Result from run_backtesting_batch Returns: CSV formatted string Examples: >>> result = run_backtesting_batch(matches) >>> csv_output = export_to_csv(result) >>> isinstance(csv_output, str) True """ output = StringIO() fieldnames = [ 'match_id', 'league', 'date', 'home_team', 'away_team', 'home_energy', 'away_energy', 'predicted_winner', 'confidence', 'actual_winner', 'correct' ] writer = csv.DictWriter(output, fieldnames=fieldnames) writer.writeheader() for result in backtesting_result.get('results', []): row = { 'match_id': result['match_id'], 'league': result.get('league', ''), 'date': result.get('date', ''), 'home_team': result['home_team'], 'away_team': result['away_team'], 'home_energy': result['home_energy'], 'away_energy': result['away_energy'], 'predicted_winner': result['prediction']['predicted_winner'], 'confidence': result['prediction']['confidence'], 'actual_winner': result['actual_winner'], 'correct': result['correct'] } writer.writerow(row) return output.getvalue() def export_to_html(backtesting_result: Dict[str, Any]) -> str: """ Export backtesting results to HTML format for publication. Args: backtesting_result: Result from run_backtesting_batch Returns: HTML formatted string with styling and charts Examples: >>> result = run_backtesting_batch(matches) >>> html_output = export_to_html(result) >>> '' in html_output True """ status_colors = { 'VALIDATED': '#10B981', # Green 'BELOW_TARGET': '#F59E0B', # Orange 'REVISION_REQUIRED': '#EF4444' # Red } status = backtesting_result['status'] accuracy = backtesting_result['accuracy'] total_matches = backtesting_result['total_matches'] correct_predictions = backtesting_result['correct_predictions'] incorrect_predictions = backtesting_result['incorrect_predictions'] # Build HTML html = f"""
ChartBastan Prediction System Performance Analysis
Generated: {backtesting_result.get('timestamp', 'N/A')}
| League | Matches | Correct | Accuracy |
|---|---|---|---|
| {league} | {metrics['total']} | {metrics['correct']} | {metrics['accuracy']:.2f}% |
| Match ID | League | Home vs Away | Prediction | Confidence | Actual | Result |
|---|---|---|---|---|---|---|
| {result['match_id']} | {result.get('league', 'N/A')} | {result['home_team']} vs {result['away_team']} | {result['prediction']['predicted_winner']} | {result['prediction']['confidence']:.1f}% | {result['actual_winner']} | {'✓ Correct' if result['correct'] else '✗ Incorrect'} |