Initial commit
This commit is contained in:
213
backend/scripts/generate_historical_matches.py
Normal file
213
backend/scripts/generate_historical_matches.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""
|
||||
Generate Historical Match Data.
|
||||
|
||||
This script generates 100+ historical matches with energy scores
|
||||
and actual winners for backtesting purposes.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import random
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from app.database import engine, SessionLocal
|
||||
from app.models.match import Match
|
||||
from app.models.energy_score import EnergyScore
|
||||
|
||||
|
||||
LEAGUES = [
|
||||
"Ligue 1",
|
||||
"Premier League",
|
||||
"La Liga",
|
||||
"Serie A",
|
||||
"Bundesliga",
|
||||
"Champions League"
|
||||
]
|
||||
|
||||
TEAMS = {
|
||||
"Ligue 1": ["PSG", "OM", "Lyon", "Monaco", "Lille", "Nice", "Marseille", "Lens"],
|
||||
"Premier League": ["Manchester City", "Liverpool", "Arsenal", "Chelsea", "Man United", "Tottenham", "Newcastle"],
|
||||
"La Liga": ["Real Madrid", "Barcelona", "Atletico Madrid", "Sevilla", "Valencia", "Villarreal"],
|
||||
"Serie A": ["Juventus", "Inter Milan", "AC Milan", "Napoli", "Roma", "Lazio"],
|
||||
"Bundesliga": ["Bayern Munich", "Dortmund", "RB Leipzig", "Leverkusen", "Wolfsburg"],
|
||||
"Champions League": ["Real Madrid", "PSG", "Bayern Munich", "Man City", "Barcelona", "Liverpool"]
|
||||
}
|
||||
|
||||
|
||||
def generate_match_date(base_date: datetime, offset_days: int) -> datetime:
|
||||
"""Generate a match date with time."""
|
||||
date = base_date - timedelta(days=offset_days)
|
||||
# Set to a typical match time (e.g., 8 PM)
|
||||
return date.replace(hour=20, minute=0, second=0, microsecond=0)
|
||||
|
||||
|
||||
def generate_energy_score() -> float:
|
||||
"""Generate a random energy score between 30 and 90."""
|
||||
return round(random.uniform(30.0, 90.0), 2)
|
||||
|
||||
|
||||
def determine_actual_winner(home_team: str, away_team: str, home_energy: float, away_energy: float) -> str:
|
||||
"""
|
||||
Determine actual winner based on energy difference with some randomness.
|
||||
Higher energy has ~70% chance of winning.
|
||||
"""
|
||||
energy_diff = home_energy - away_energy
|
||||
rand = random.random()
|
||||
|
||||
# If home has much higher energy, higher chance to win
|
||||
if energy_diff > 15:
|
||||
winner = "home" if rand < 0.7 else ("away" if rand < 0.9 else "draw")
|
||||
elif energy_diff < -15:
|
||||
winner = "away" if rand < 0.7 else ("home" if rand < 0.9 else "draw")
|
||||
elif energy_diff > 5:
|
||||
winner = "home" if rand < 0.6 else ("away" if rand < 0.85 else "draw")
|
||||
elif energy_diff < -5:
|
||||
winner = "away" if rand < 0.6 else ("home" if rand < 0.85 else "draw")
|
||||
else:
|
||||
# Close match - more random
|
||||
winner = "home" if rand < 0.35 else ("away" if rand < 0.70 else "draw")
|
||||
|
||||
return winner
|
||||
|
||||
|
||||
def generate_historical_matches(num_matches: int = 120) -> list:
|
||||
"""
|
||||
Generate historical match data.
|
||||
|
||||
Args:
|
||||
num_matches: Number of matches to generate (default: 120)
|
||||
|
||||
Returns:
|
||||
List of tuples (match_data, energy_score_data)
|
||||
"""
|
||||
matches_data = []
|
||||
base_date = datetime.now()
|
||||
|
||||
for i in range(num_matches):
|
||||
league = random.choice(LEAGUES)
|
||||
teams = TEAMS[league]
|
||||
home_team, away_team = random.sample(teams, 2)
|
||||
|
||||
home_energy = generate_energy_score()
|
||||
away_energy = generate_energy_score()
|
||||
|
||||
match_date = generate_match_date(base_date, i)
|
||||
|
||||
actual_winner = determine_actual_winner(home_team, away_team, home_energy, away_energy)
|
||||
|
||||
match_data = {
|
||||
'home_team': home_team,
|
||||
'away_team': away_team,
|
||||
'date': match_date,
|
||||
'league': league,
|
||||
'status': 'completed',
|
||||
'actual_winner': actual_winner
|
||||
}
|
||||
|
||||
energy_score_data = {
|
||||
'home_energy': home_energy,
|
||||
'away_energy': away_energy,
|
||||
'collective_energy': (home_energy + away_energy) / 2,
|
||||
'timestamp': match_date
|
||||
}
|
||||
|
||||
matches_data.append((match_data, energy_score_data))
|
||||
|
||||
return matches_data
|
||||
|
||||
|
||||
def insert_historical_matches(db: Session, matches_data: list):
|
||||
"""
|
||||
Insert historical matches into database.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
matches_data: List of tuples (match_data, energy_score_data)
|
||||
"""
|
||||
for match_data, energy_data in matches_data:
|
||||
# Insert match
|
||||
match = Match(**match_data)
|
||||
db.add(match)
|
||||
db.flush() # Get the match ID
|
||||
|
||||
# Insert energy score
|
||||
energy_score = EnergyScore(
|
||||
match_id=match.id,
|
||||
**energy_data
|
||||
)
|
||||
db.add(energy_score)
|
||||
|
||||
db.commit()
|
||||
print(f"✅ Successfully inserted {len(matches_data)} historical matches")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to generate and insert historical match data."""
|
||||
print("🚀 Generating historical match data for backtesting...\n")
|
||||
|
||||
# Create database session
|
||||
db = SessionLocal()
|
||||
|
||||
try:
|
||||
# Check if matches already exist
|
||||
existing_matches = db.query(Match).filter(
|
||||
Match.actual_winner.isnot(None)
|
||||
).count()
|
||||
|
||||
if existing_matches > 0:
|
||||
print(f"⚠️ Warning: {existing_matches} historical matches already exist in database")
|
||||
response = input("Do you want to add more matches? (y/n): ")
|
||||
if response.lower() != 'y':
|
||||
print("❌ Aborted")
|
||||
return
|
||||
|
||||
# Generate matches
|
||||
num_matches = 120
|
||||
print(f"📊 Generating {num_matches} historical matches...")
|
||||
matches_data = generate_historical_matches(num_matches)
|
||||
|
||||
# Insert matches
|
||||
print("💾 Inserting matches into database...")
|
||||
insert_historical_matches(db, matches_data)
|
||||
|
||||
# Display summary
|
||||
print("\n📈 Summary:")
|
||||
print(f" - Total matches: {num_matches}")
|
||||
|
||||
# Count by league
|
||||
league_counts = {}
|
||||
for match_data, _ in matches_data:
|
||||
league = match_data['league']
|
||||
league_counts[league] = league_counts.get(league, 0) + 1
|
||||
|
||||
print(f" - Matches by league:")
|
||||
for league, count in sorted(league_counts.items()):
|
||||
print(f" • {league}: {count}")
|
||||
|
||||
# Count by result
|
||||
winner_counts = {'home': 0, 'away': 0, 'draw': 0}
|
||||
for match_data, _ in matches_data:
|
||||
winner_counts[match_data['actual_winner']] += 1
|
||||
|
||||
print(f" - Results:")
|
||||
print(f" • Home wins: {winner_counts['home']}")
|
||||
print(f" • Away wins: {winner_counts['away']}")
|
||||
print(f" • Draws: {winner_counts['draw']}")
|
||||
|
||||
print("\n✅ Historical match data generation complete!")
|
||||
print("🎯 You can now run backtesting via: POST /api/v1/backtesting/run")
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f"❌ Error: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user