chartbastan/backend/scripts/generate_historical_matches.py
2026-02-01 09:31:38 +01:00

214 lines
6.6 KiB
Python

"""
Generate Historical Match Data.
This script generates 100+ historical matches with energy scores
and actual winners for backtesting purposes.
"""
import sys
import os
import random
from datetime import datetime, timedelta
# Add parent directory to path for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from sqlalchemy.orm import Session
from app.database import engine, SessionLocal
from app.models.match import Match
from app.models.energy_score import EnergyScore
LEAGUES = [
"Ligue 1",
"Premier League",
"La Liga",
"Serie A",
"Bundesliga",
"Champions League"
]
TEAMS = {
"Ligue 1": ["PSG", "OM", "Lyon", "Monaco", "Lille", "Nice", "Marseille", "Lens"],
"Premier League": ["Manchester City", "Liverpool", "Arsenal", "Chelsea", "Man United", "Tottenham", "Newcastle"],
"La Liga": ["Real Madrid", "Barcelona", "Atletico Madrid", "Sevilla", "Valencia", "Villarreal"],
"Serie A": ["Juventus", "Inter Milan", "AC Milan", "Napoli", "Roma", "Lazio"],
"Bundesliga": ["Bayern Munich", "Dortmund", "RB Leipzig", "Leverkusen", "Wolfsburg"],
"Champions League": ["Real Madrid", "PSG", "Bayern Munich", "Man City", "Barcelona", "Liverpool"]
}
def generate_match_date(base_date: datetime, offset_days: int) -> datetime:
"""Generate a match date with time."""
date = base_date - timedelta(days=offset_days)
# Set to a typical match time (e.g., 8 PM)
return date.replace(hour=20, minute=0, second=0, microsecond=0)
def generate_energy_score() -> float:
"""Generate a random energy score between 30 and 90."""
return round(random.uniform(30.0, 90.0), 2)
def determine_actual_winner(home_team: str, away_team: str, home_energy: float, away_energy: float) -> str:
"""
Determine actual winner based on energy difference with some randomness.
Higher energy has ~70% chance of winning.
"""
energy_diff = home_energy - away_energy
rand = random.random()
# If home has much higher energy, higher chance to win
if energy_diff > 15:
winner = "home" if rand < 0.7 else ("away" if rand < 0.9 else "draw")
elif energy_diff < -15:
winner = "away" if rand < 0.7 else ("home" if rand < 0.9 else "draw")
elif energy_diff > 5:
winner = "home" if rand < 0.6 else ("away" if rand < 0.85 else "draw")
elif energy_diff < -5:
winner = "away" if rand < 0.6 else ("home" if rand < 0.85 else "draw")
else:
# Close match - more random
winner = "home" if rand < 0.35 else ("away" if rand < 0.70 else "draw")
return winner
def generate_historical_matches(num_matches: int = 120) -> list:
"""
Generate historical match data.
Args:
num_matches: Number of matches to generate (default: 120)
Returns:
List of tuples (match_data, energy_score_data)
"""
matches_data = []
base_date = datetime.now()
for i in range(num_matches):
league = random.choice(LEAGUES)
teams = TEAMS[league]
home_team, away_team = random.sample(teams, 2)
home_energy = generate_energy_score()
away_energy = generate_energy_score()
match_date = generate_match_date(base_date, i)
actual_winner = determine_actual_winner(home_team, away_team, home_energy, away_energy)
match_data = {
'home_team': home_team,
'away_team': away_team,
'date': match_date,
'league': league,
'status': 'completed',
'actual_winner': actual_winner
}
energy_score_data = {
'home_energy': home_energy,
'away_energy': away_energy,
'collective_energy': (home_energy + away_energy) / 2,
'timestamp': match_date
}
matches_data.append((match_data, energy_score_data))
return matches_data
def insert_historical_matches(db: Session, matches_data: list):
"""
Insert historical matches into database.
Args:
db: Database session
matches_data: List of tuples (match_data, energy_score_data)
"""
for match_data, energy_data in matches_data:
# Insert match
match = Match(**match_data)
db.add(match)
db.flush() # Get the match ID
# Insert energy score
energy_score = EnergyScore(
match_id=match.id,
**energy_data
)
db.add(energy_score)
db.commit()
print(f"✅ Successfully inserted {len(matches_data)} historical matches")
def main():
"""Main function to generate and insert historical match data."""
print("🚀 Generating historical match data for backtesting...\n")
# Create database session
db = SessionLocal()
try:
# Check if matches already exist
existing_matches = db.query(Match).filter(
Match.actual_winner.isnot(None)
).count()
if existing_matches > 0:
print(f"⚠️ Warning: {existing_matches} historical matches already exist in database")
response = input("Do you want to add more matches? (y/n): ")
if response.lower() != 'y':
print("❌ Aborted")
return
# Generate matches
num_matches = 120
print(f"📊 Generating {num_matches} historical matches...")
matches_data = generate_historical_matches(num_matches)
# Insert matches
print("💾 Inserting matches into database...")
insert_historical_matches(db, matches_data)
# Display summary
print("\n📈 Summary:")
print(f" - Total matches: {num_matches}")
# Count by league
league_counts = {}
for match_data, _ in matches_data:
league = match_data['league']
league_counts[league] = league_counts.get(league, 0) + 1
print(f" - Matches by league:")
for league, count in sorted(league_counts.items()):
print(f"{league}: {count}")
# Count by result
winner_counts = {'home': 0, 'away': 0, 'draw': 0}
for match_data, _ in matches_data:
winner_counts[match_data['actual_winner']] += 1
print(f" - Results:")
print(f" • Home wins: {winner_counts['home']}")
print(f" • Away wins: {winner_counts['away']}")
print(f" • Draws: {winner_counts['draw']}")
print("\n✅ Historical match data generation complete!")
print("🎯 You can now run backtesting via: POST /api/v1/backtesting/run")
except Exception as e:
db.rollback()
print(f"❌ Error: {str(e)}")
raise
finally:
db.close()
if __name__ == "__main__":
main()