620 lines
18 KiB
Python
620 lines
18 KiB
Python
"""
|
|
Backtesting Module.
|
|
|
|
This module provides functions to run backtesting on historical match data,
|
|
comparing predictions with actual results to calculate accuracy metrics.
|
|
"""
|
|
|
|
import json
|
|
import csv
|
|
from datetime import datetime
|
|
from typing import Dict, List, Any, Optional
|
|
from io import StringIO
|
|
|
|
from app.ml.prediction_calculator import calculate_prediction
|
|
|
|
|
|
# Validation thresholds
|
|
ACCURACY_VALIDATED_THRESHOLD = 60.0 # >= 60%: System validated
|
|
ACCURACY_ALERT_THRESHOLD = 55.0 # < 55%: Revision required
|
|
|
|
|
|
def validate_accuracy(accuracy: float) -> str:
|
|
"""
|
|
Validate the accuracy of the prediction system.
|
|
|
|
Args:
|
|
accuracy: Accuracy percentage (0.0 - 100.0)
|
|
|
|
Returns:
|
|
'VALIDATED' if accuracy >= 60%,
|
|
'REVISION_REQUIRED' if accuracy < 55%,
|
|
'BELOW_TARGET' if 55% <= accuracy < 60%
|
|
|
|
Examples:
|
|
>>> validate_accuracy(70.0)
|
|
'VALIDATED'
|
|
>>> validate_accuracy(50.0)
|
|
'REVISION_REQUIRED'
|
|
>>> validate_accuracy(58.0)
|
|
'BELOW_TARGET'
|
|
"""
|
|
if accuracy >= ACCURACY_VALIDATED_THRESHOLD:
|
|
return 'VALIDATED'
|
|
elif accuracy < ACCURACY_ALERT_THRESHOLD:
|
|
return 'REVISION_REQUIRED'
|
|
else:
|
|
return 'BELOW_TARGET'
|
|
|
|
|
|
def compare_prediction(predicted_winner: str, actual_winner: str) -> bool:
|
|
"""
|
|
Compare predicted winner with actual match result.
|
|
|
|
Args:
|
|
predicted_winner: 'home', 'away', or 'draw'
|
|
actual_winner: 'home', 'away', or 'draw'
|
|
|
|
Returns:
|
|
True if prediction was correct, False otherwise
|
|
|
|
Examples:
|
|
>>> compare_prediction('home', 'home')
|
|
True
|
|
>>> compare_prediction('home', 'away')
|
|
False
|
|
"""
|
|
return predicted_winner.lower() == actual_winner.lower()
|
|
|
|
|
|
def run_backtesting_single_match(
|
|
match_id: int,
|
|
home_team: str,
|
|
away_team: str,
|
|
home_energy: float,
|
|
away_energy: float,
|
|
actual_winner: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run backtesting for a single historical match.
|
|
|
|
Calculates prediction and compares it with the actual result.
|
|
|
|
Args:
|
|
match_id: Unique match identifier
|
|
home_team: Name of the home team
|
|
away_team: Name of the away team
|
|
home_energy: Energy score of the home team
|
|
away_energy: Energy score of the away team
|
|
actual_winner: Actual result ('home', 'away', or 'draw')
|
|
|
|
Returns:
|
|
Dictionary containing match details, prediction, and comparison result
|
|
|
|
Examples:
|
|
>>> result = run_backtesting_single_match(1, 'PSG', 'OM', 65.0, 45.0, 'home')
|
|
>>> result['correct']
|
|
True
|
|
"""
|
|
# Calculate prediction
|
|
prediction = calculate_prediction(home_energy, away_energy)
|
|
|
|
# Compare with actual result
|
|
is_correct = compare_prediction(prediction['predicted_winner'], actual_winner)
|
|
|
|
return {
|
|
'match_id': match_id,
|
|
'home_team': home_team,
|
|
'away_team': away_team,
|
|
'home_energy': home_energy,
|
|
'away_energy': away_energy,
|
|
'prediction': prediction,
|
|
'actual_winner': actual_winner,
|
|
'correct': is_correct
|
|
}
|
|
|
|
|
|
def run_backtesting_batch(matches: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Run backtesting on a batch of historical matches.
|
|
|
|
Processes multiple matches, calculates predictions, compares with actual
|
|
results, and generates accuracy metrics and detailed report.
|
|
|
|
Args:
|
|
matches: List of match dictionaries with keys:
|
|
- match_id (int)
|
|
- home_team (str)
|
|
- away_team (str)
|
|
- home_energy (float)
|
|
- away_energy (float)
|
|
- actual_winner (str)
|
|
- league (str, optional)
|
|
- date (datetime, optional)
|
|
|
|
Returns:
|
|
Dictionary containing:
|
|
- total_matches: Number of matches processed
|
|
- correct_predictions: Number of correct predictions
|
|
- incorrect_predictions: Number of incorrect predictions
|
|
- accuracy: Accuracy percentage
|
|
- status: Validation status (VALIDATED, REVISION_REQUIRED, BELOW_TARGET)
|
|
- results: List of individual match results
|
|
- metrics_by_league: Accuracy breakdown by league
|
|
- timestamp: When the backtesting was run
|
|
|
|
Examples:
|
|
>>> matches = [
|
|
... {'match_id': 1, 'home_team': 'PSG', 'away_team': 'OM',
|
|
... 'home_energy': 65.0, 'away_energy': 45.0, 'actual_winner': 'home'},
|
|
... ]
|
|
>>> result = run_backtesting_batch(matches)
|
|
>>> result['accuracy']
|
|
100.0
|
|
"""
|
|
results = []
|
|
correct_predictions = 0
|
|
incorrect_predictions = 0
|
|
|
|
# Track metrics by league
|
|
league_metrics: Dict[str, Dict[str, Any]] = {}
|
|
|
|
for match in matches:
|
|
# Validate required fields
|
|
required_fields = ['match_id', 'home_team', 'away_team',
|
|
'home_energy', 'away_energy', 'actual_winner']
|
|
if not all(field in match for field in required_fields):
|
|
raise ValueError(f"Match missing required fields: {match}")
|
|
|
|
# Extract league and date if available
|
|
league = match.get('league', 'unknown')
|
|
match_date = match.get('date')
|
|
|
|
# Run backtesting for this match
|
|
result = run_backtesting_single_match(
|
|
match_id=match['match_id'],
|
|
home_team=match['home_team'],
|
|
away_team=match['away_team'],
|
|
home_energy=match['home_energy'],
|
|
away_energy=match['away_energy'],
|
|
actual_winner=match['actual_winner']
|
|
)
|
|
|
|
# Add league and date to result
|
|
result['league'] = league
|
|
result['date'] = match_date.isoformat() if match_date else None
|
|
|
|
# Track correctness
|
|
if result['correct']:
|
|
correct_predictions += 1
|
|
else:
|
|
incorrect_predictions += 1
|
|
|
|
# Update league metrics
|
|
if league not in league_metrics:
|
|
league_metrics[league] = {
|
|
'total': 0,
|
|
'correct': 0,
|
|
'accuracy': 0.0
|
|
}
|
|
league_metrics[league]['total'] += 1
|
|
if result['correct']:
|
|
league_metrics[league]['correct'] += 1
|
|
|
|
results.append(result)
|
|
|
|
# Calculate overall accuracy
|
|
total_matches = len(matches)
|
|
accuracy = (correct_predictions / total_matches * 100.0) if total_matches > 0 else 0.0
|
|
|
|
# Calculate accuracy per league
|
|
for league, metrics in league_metrics.items():
|
|
if metrics['total'] > 0:
|
|
metrics['accuracy'] = (metrics['correct'] / metrics['total'] * 100.0)
|
|
|
|
# Get validation status
|
|
status = validate_accuracy(accuracy)
|
|
|
|
return {
|
|
'total_matches': total_matches,
|
|
'correct_predictions': correct_predictions,
|
|
'incorrect_predictions': incorrect_predictions,
|
|
'accuracy': round(accuracy, 2),
|
|
'status': status,
|
|
'results': results,
|
|
'metrics_by_league': league_metrics,
|
|
'timestamp': datetime.utcnow().isoformat(),
|
|
'validation_thresholds': {
|
|
'validated': ACCURACY_VALIDATED_THRESHOLD,
|
|
'alert': ACCURACY_ALERT_THRESHOLD
|
|
}
|
|
}
|
|
|
|
|
|
def export_to_json(backtesting_result: Dict[str, Any]) -> str:
|
|
"""
|
|
Export backtesting results to JSON format.
|
|
|
|
Args:
|
|
backtesting_result: Result from run_backtesting_batch
|
|
|
|
Returns:
|
|
JSON formatted string
|
|
|
|
Examples:
|
|
>>> result = run_backtesting_batch(matches)
|
|
>>> json_output = export_to_json(result)
|
|
>>> isinstance(json_output, str)
|
|
True
|
|
"""
|
|
return json.dumps(backtesting_result, indent=2, default=str)
|
|
|
|
|
|
def export_to_csv(backtesting_result: Dict[str, Any]) -> str:
|
|
"""
|
|
Export backtesting results to CSV format.
|
|
|
|
Args:
|
|
backtesting_result: Result from run_backtesting_batch
|
|
|
|
Returns:
|
|
CSV formatted string
|
|
|
|
Examples:
|
|
>>> result = run_backtesting_batch(matches)
|
|
>>> csv_output = export_to_csv(result)
|
|
>>> isinstance(csv_output, str)
|
|
True
|
|
"""
|
|
output = StringIO()
|
|
fieldnames = [
|
|
'match_id', 'league', 'date', 'home_team', 'away_team',
|
|
'home_energy', 'away_energy', 'predicted_winner',
|
|
'confidence', 'actual_winner', 'correct'
|
|
]
|
|
|
|
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
|
|
for result in backtesting_result.get('results', []):
|
|
row = {
|
|
'match_id': result['match_id'],
|
|
'league': result.get('league', ''),
|
|
'date': result.get('date', ''),
|
|
'home_team': result['home_team'],
|
|
'away_team': result['away_team'],
|
|
'home_energy': result['home_energy'],
|
|
'away_energy': result['away_energy'],
|
|
'predicted_winner': result['prediction']['predicted_winner'],
|
|
'confidence': result['prediction']['confidence'],
|
|
'actual_winner': result['actual_winner'],
|
|
'correct': result['correct']
|
|
}
|
|
writer.writerow(row)
|
|
|
|
return output.getvalue()
|
|
|
|
|
|
def export_to_html(backtesting_result: Dict[str, Any]) -> str:
|
|
"""
|
|
Export backtesting results to HTML format for publication.
|
|
|
|
Args:
|
|
backtesting_result: Result from run_backtesting_batch
|
|
|
|
Returns:
|
|
HTML formatted string with styling and charts
|
|
|
|
Examples:
|
|
>>> result = run_backtesting_batch(matches)
|
|
>>> html_output = export_to_html(result)
|
|
>>> '<html>' in html_output
|
|
True
|
|
"""
|
|
status_colors = {
|
|
'VALIDATED': '#10B981', # Green
|
|
'BELOW_TARGET': '#F59E0B', # Orange
|
|
'REVISION_REQUIRED': '#EF4444' # Red
|
|
}
|
|
|
|
status = backtesting_result['status']
|
|
accuracy = backtesting_result['accuracy']
|
|
total_matches = backtesting_result['total_matches']
|
|
correct_predictions = backtesting_result['correct_predictions']
|
|
incorrect_predictions = backtesting_result['incorrect_predictions']
|
|
|
|
# Build HTML
|
|
html = f"""
|
|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Backtesting Report - ChartBastan</title>
|
|
<style>
|
|
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
|
|
body {{
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
padding: 20px;
|
|
min-height: 100vh;
|
|
}}
|
|
.container {{
|
|
max-width: 1200px;
|
|
margin: 0 auto;
|
|
background: white;
|
|
border-radius: 20px;
|
|
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
|
|
padding: 40px;
|
|
}}
|
|
.header {{
|
|
text-align: center;
|
|
margin-bottom: 40px;
|
|
}}
|
|
.header h1 {{
|
|
font-size: 2.5em;
|
|
color: #667eea;
|
|
margin-bottom: 10px;
|
|
}}
|
|
.header p {{
|
|
color: #666;
|
|
font-size: 1.1em;
|
|
}}
|
|
.summary {{
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
gap: 20px;
|
|
margin-bottom: 40px;
|
|
}}
|
|
.card {{
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
color: white;
|
|
padding: 25px;
|
|
border-radius: 15px;
|
|
text-align: center;
|
|
}}
|
|
.card h3 {{
|
|
font-size: 0.9em;
|
|
opacity: 0.9;
|
|
margin-bottom: 10px;
|
|
text-transform: uppercase;
|
|
letter-spacing: 1px;
|
|
}}
|
|
.card .value {{
|
|
font-size: 2.5em;
|
|
font-weight: bold;
|
|
margin-bottom: 5px;
|
|
}}
|
|
.card .sub {{
|
|
font-size: 0.9em;
|
|
opacity: 0.9;
|
|
}}
|
|
.status-badge {{
|
|
display: inline-block;
|
|
padding: 10px 25px;
|
|
border-radius: 25px;
|
|
color: white;
|
|
font-weight: bold;
|
|
font-size: 1.2em;
|
|
margin: 20px 0;
|
|
}}
|
|
.section {{
|
|
margin-bottom: 40px;
|
|
}}
|
|
.section h2 {{
|
|
font-size: 1.8em;
|
|
color: #333;
|
|
margin-bottom: 20px;
|
|
padding-bottom: 10px;
|
|
border-bottom: 3px solid #667eea;
|
|
}}
|
|
table {{
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
margin-top: 20px;
|
|
}}
|
|
th, td {{
|
|
padding: 15px;
|
|
text-align: left;
|
|
border-bottom: 1px solid #ddd;
|
|
}}
|
|
th {{
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
color: white;
|
|
font-weight: 600;
|
|
text-transform: uppercase;
|
|
font-size: 0.85em;
|
|
letter-spacing: 0.5px;
|
|
}}
|
|
tr:hover {{
|
|
background: #f5f5f5;
|
|
}}
|
|
.correct {{
|
|
color: #10B981;
|
|
font-weight: bold;
|
|
}}
|
|
.incorrect {{
|
|
color: #EF4444;
|
|
font-weight: bold;
|
|
}}
|
|
.footer {{
|
|
text-align: center;
|
|
margin-top: 40px;
|
|
padding-top: 20px;
|
|
border-top: 2px solid #ddd;
|
|
color: #666;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<div class="header">
|
|
<h1>📊 Backtesting Report</h1>
|
|
<p>ChartBastan Prediction System Performance Analysis</p>
|
|
<p style="margin-top: 10px; font-size: 0.9em;">
|
|
Generated: {backtesting_result.get('timestamp', 'N/A')}
|
|
</p>
|
|
</div>
|
|
|
|
<div class="summary">
|
|
<div class="card">
|
|
<h3>Total Matches</h3>
|
|
<div class="value">{total_matches}</div>
|
|
<div class="sub">matches analyzed</div>
|
|
</div>
|
|
<div class="card">
|
|
<h3>Accuracy</h3>
|
|
<div class="value">{accuracy}%</div>
|
|
<div class="sub">prediction accuracy</div>
|
|
</div>
|
|
<div class="card">
|
|
<h3>Correct</h3>
|
|
<div class="value">{correct_predictions}</div>
|
|
<div class="sub">predictions</div>
|
|
</div>
|
|
<div class="card">
|
|
<h3>Incorrect</h3>
|
|
<div class="value">{incorrect_predictions}</div>
|
|
<div class="sub">predictions</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div style="text-align: center;">
|
|
<div class="status-badge" style="background-color: {status_colors.get(status, '#666')};">
|
|
Status: {status}
|
|
</div>
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>📈 Metrics by League</h2>
|
|
<table>
|
|
<thead>
|
|
<tr>
|
|
<th>League</th>
|
|
<th>Matches</th>
|
|
<th>Correct</th>
|
|
<th>Accuracy</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
"""
|
|
|
|
# Add league metrics
|
|
for league, metrics in backtesting_result.get('metrics_by_league', {}).items():
|
|
html += f"""
|
|
<tr>
|
|
<td>{league}</td>
|
|
<td>{metrics['total']}</td>
|
|
<td>{metrics['correct']}</td>
|
|
<td>{metrics['accuracy']:.2f}%</td>
|
|
</tr>
|
|
"""
|
|
|
|
html += """
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>📋 Detailed Results</h2>
|
|
<table>
|
|
<thead>
|
|
<tr>
|
|
<th>Match ID</th>
|
|
<th>League</th>
|
|
<th>Home vs Away</th>
|
|
<th>Prediction</th>
|
|
<th>Confidence</th>
|
|
<th>Actual</th>
|
|
<th>Result</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
"""
|
|
|
|
# Add detailed results
|
|
for result in backtesting_result.get('results', []):
|
|
result_class = 'correct' if result['correct'] else 'incorrect'
|
|
html += f"""
|
|
<tr>
|
|
<td>{result['match_id']}</td>
|
|
<td>{result.get('league', 'N/A')}</td>
|
|
<td>{result['home_team']} vs {result['away_team']}</td>
|
|
<td>{result['prediction']['predicted_winner']}</td>
|
|
<td>{result['prediction']['confidence']:.1f}%</td>
|
|
<td>{result['actual_winner']}</td>
|
|
<td class="{result_class}">{'✓ Correct' if result['correct'] else '✗ Incorrect'}</td>
|
|
</tr>
|
|
"""
|
|
|
|
html += """
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
<div class="footer">
|
|
<p>🎯 ChartBastan - Football Match Prediction System</p>
|
|
<p>© 2026 All rights reserved</p>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
return html
|
|
|
|
|
|
def filter_matches_by_league(matches: List[Dict[str, Any]], leagues: List[str]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Filter matches by league(s).
|
|
|
|
Args:
|
|
matches: List of match dictionaries
|
|
leagues: List of league names to include
|
|
|
|
Returns:
|
|
Filtered list of matches
|
|
|
|
Examples:
|
|
>>> matches = [{'league': 'Ligue 1', 'home_team': 'PSG', ...}]
|
|
>>> filtered = filter_matches_by_league(matches, ['Ligue 1'])
|
|
>>> len(filtered)
|
|
1
|
|
"""
|
|
if not leagues:
|
|
return matches
|
|
|
|
return [m for m in matches if m.get('league') in leagues]
|
|
|
|
|
|
def filter_matches_by_period(
|
|
matches: List[Dict[str, Any]],
|
|
start_date: Optional[datetime] = None,
|
|
end_date: Optional[datetime] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Filter matches by date period.
|
|
|
|
Args:
|
|
matches: List of match dictionaries
|
|
start_date: Start date (inclusive), or None for no lower bound
|
|
end_date: End date (inclusive), or None for no upper bound
|
|
|
|
Returns:
|
|
Filtered list of matches
|
|
|
|
Examples:
|
|
>>> from datetime import datetime
|
|
>>> matches = [{'date': datetime(2026, 1, 1), ...}]
|
|
>>> filtered = filter_matches_by_period(matches, datetime(2025, 1, 1))
|
|
"""
|
|
filtered = matches
|
|
|
|
if start_date:
|
|
filtered = [m for m in filtered if m.get('date') and m['date'] >= start_date]
|
|
|
|
if end_date:
|
|
filtered = [m for m in filtered if m.get('date') and m['date'] <= end_date]
|
|
|
|
return filtered
|