""" Generate Historical Match Data. This script generates 100+ historical matches with energy scores and actual winners for backtesting purposes. """ import sys import os import random from datetime import datetime, timedelta # Add parent directory to path for imports sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from sqlalchemy.orm import Session from app.database import engine, SessionLocal from app.models.match import Match from app.models.energy_score import EnergyScore LEAGUES = [ "Ligue 1", "Premier League", "La Liga", "Serie A", "Bundesliga", "Champions League" ] TEAMS = { "Ligue 1": ["PSG", "OM", "Lyon", "Monaco", "Lille", "Nice", "Marseille", "Lens"], "Premier League": ["Manchester City", "Liverpool", "Arsenal", "Chelsea", "Man United", "Tottenham", "Newcastle"], "La Liga": ["Real Madrid", "Barcelona", "Atletico Madrid", "Sevilla", "Valencia", "Villarreal"], "Serie A": ["Juventus", "Inter Milan", "AC Milan", "Napoli", "Roma", "Lazio"], "Bundesliga": ["Bayern Munich", "Dortmund", "RB Leipzig", "Leverkusen", "Wolfsburg"], "Champions League": ["Real Madrid", "PSG", "Bayern Munich", "Man City", "Barcelona", "Liverpool"] } def generate_match_date(base_date: datetime, offset_days: int) -> datetime: """Generate a match date with time.""" date = base_date - timedelta(days=offset_days) # Set to a typical match time (e.g., 8 PM) return date.replace(hour=20, minute=0, second=0, microsecond=0) def generate_energy_score() -> float: """Generate a random energy score between 30 and 90.""" return round(random.uniform(30.0, 90.0), 2) def determine_actual_winner(home_team: str, away_team: str, home_energy: float, away_energy: float) -> str: """ Determine actual winner based on energy difference with some randomness. Higher energy has ~70% chance of winning. """ energy_diff = home_energy - away_energy rand = random.random() # If home has much higher energy, higher chance to win if energy_diff > 15: winner = "home" if rand < 0.7 else ("away" if rand < 0.9 else "draw") elif energy_diff < -15: winner = "away" if rand < 0.7 else ("home" if rand < 0.9 else "draw") elif energy_diff > 5: winner = "home" if rand < 0.6 else ("away" if rand < 0.85 else "draw") elif energy_diff < -5: winner = "away" if rand < 0.6 else ("home" if rand < 0.85 else "draw") else: # Close match - more random winner = "home" if rand < 0.35 else ("away" if rand < 0.70 else "draw") return winner def generate_historical_matches(num_matches: int = 120) -> list: """ Generate historical match data. Args: num_matches: Number of matches to generate (default: 120) Returns: List of tuples (match_data, energy_score_data) """ matches_data = [] base_date = datetime.now() for i in range(num_matches): league = random.choice(LEAGUES) teams = TEAMS[league] home_team, away_team = random.sample(teams, 2) home_energy = generate_energy_score() away_energy = generate_energy_score() match_date = generate_match_date(base_date, i) actual_winner = determine_actual_winner(home_team, away_team, home_energy, away_energy) match_data = { 'home_team': home_team, 'away_team': away_team, 'date': match_date, 'league': league, 'status': 'completed', 'actual_winner': actual_winner } energy_score_data = { 'home_energy': home_energy, 'away_energy': away_energy, 'collective_energy': (home_energy + away_energy) / 2, 'timestamp': match_date } matches_data.append((match_data, energy_score_data)) return matches_data def insert_historical_matches(db: Session, matches_data: list): """ Insert historical matches into database. Args: db: Database session matches_data: List of tuples (match_data, energy_score_data) """ for match_data, energy_data in matches_data: # Insert match match = Match(**match_data) db.add(match) db.flush() # Get the match ID # Insert energy score energy_score = EnergyScore( match_id=match.id, **energy_data ) db.add(energy_score) db.commit() print(f"āœ… Successfully inserted {len(matches_data)} historical matches") def main(): """Main function to generate and insert historical match data.""" print("šŸš€ Generating historical match data for backtesting...\n") # Create database session db = SessionLocal() try: # Check if matches already exist existing_matches = db.query(Match).filter( Match.actual_winner.isnot(None) ).count() if existing_matches > 0: print(f"āš ļø Warning: {existing_matches} historical matches already exist in database") response = input("Do you want to add more matches? (y/n): ") if response.lower() != 'y': print("āŒ Aborted") return # Generate matches num_matches = 120 print(f"šŸ“Š Generating {num_matches} historical matches...") matches_data = generate_historical_matches(num_matches) # Insert matches print("šŸ’¾ Inserting matches into database...") insert_historical_matches(db, matches_data) # Display summary print("\nšŸ“ˆ Summary:") print(f" - Total matches: {num_matches}") # Count by league league_counts = {} for match_data, _ in matches_data: league = match_data['league'] league_counts[league] = league_counts.get(league, 0) + 1 print(f" - Matches by league:") for league, count in sorted(league_counts.items()): print(f" • {league}: {count}") # Count by result winner_counts = {'home': 0, 'away': 0, 'draw': 0} for match_data, _ in matches_data: winner_counts[match_data['actual_winner']] += 1 print(f" - Results:") print(f" • Home wins: {winner_counts['home']}") print(f" • Away wins: {winner_counts['away']}") print(f" • Draws: {winner_counts['draw']}") print("\nāœ… Historical match data generation complete!") print("šŸŽÆ You can now run backtesting via: POST /api/v1/backtesting/run") except Exception as e: db.rollback() print(f"āŒ Error: {str(e)}") raise finally: db.close() if __name__ == "__main__": main()