chartbastan/backend/app/services/backtesting_service.py

"""
Backtesting Service.

This module provides the service layer for backtesting operations,
integrating with the database to run backtesting on historical matches.
"""

import logging
from datetime import datetime
from typing import Dict, List, Any, Optional

from sqlalchemy.orm import Session

from app.models.match import Match
from app.models.energy_score import EnergyScore
from app.ml.backtesting import (
    run_backtesting_batch,
    export_to_json,
    export_to_csv,
    export_to_html,
    filter_matches_by_league,
    filter_matches_by_period
)

logger = logging.getLogger(__name__)


class BacktestingService:
    """Service for running backtesting on historical match data."""

    def __init__(self, db: Session):
        """
        Initialize backtesting service.

        Args:
            db: SQLAlchemy database session
        """
        self.db = db

    def get_historical_matches(
        self,
        leagues: Optional[List[str]] = None,
        start_date: Optional[datetime] = None,
        end_date: Optional[datetime] = None
    ) -> List[Dict[str, Any]]:
        """
        Retrieve historical matches with energy scores and actual results.

        Args:
            leagues: Optional list of leagues to filter by
            start_date: Optional start date for filtering
            end_date: Optional end date for filtering

        Returns:
            List of match dictionaries with energy scores and actual winners

        Raises:
            ValueError: If no historical matches found
        """
        logger.info("Fetching historical matches from database")

        # Query matches that have actual_winner set (completed matches)
        query = self.db.query(Match).filter(
            Match.actual_winner.isnot(None)
        )

        # Apply filters
        if leagues:
            query = query.filter(Match.league.in_(leagues))

        if start_date:
            query = query.filter(Match.date >= start_date)

        if end_date:
            query = query.filter(Match.date <= end_date)

        matches = query.all()

        if not matches:
            raise ValueError(
                "No historical matches found. Please populate database with "
                "historical match data and actual winners before running backtesting."
            )

        logger.info(f"Found {len(matches)} historical matches")

        # Convert to list of dictionaries with energy scores
        match_data = []
        for match in matches:
            # Get energy scores for this match
            home_energy_score = self.db.query(EnergyScore).filter(
                EnergyScore.match_id == match.id
            ).first()

            if not home_energy_score:
                logger.warning(f"No energy score found for match {match.id}, skipping")
                continue

            match_dict = {
                'match_id': match.id,
                'home_team': match.home_team,
                'away_team': match.away_team,
                'date': match.date,
                'league': match.league,
                'home_energy': home_energy_score.home_energy,
                'away_energy': home_energy_score.away_energy,
                'actual_winner': match.actual_winner
            }

            match_data.append(match_dict)

        logger.info(f"Processed {len(match_data)} matches with energy scores")

        return match_data

    def run_backtesting(
        self,
        leagues: Optional[List[str]] = None,
        start_date: Optional[datetime] = None,
        end_date: Optional[datetime] = None
    ) -> Dict[str, Any]:
        """
        Run backtesting on historical matches with optional filters.

        Args:
            leagues: Optional list of leagues to filter by
            start_date: Optional start date for filtering
            end_date: Optional end date for filtering

        Returns:
            Dictionary containing backtesting results

        Raises:
            ValueError: If no matches found or matches lack required data
        """
        logger.info("Starting backtesting process")

        # Get historical matches
        matches = self.get_historical_matches(
            leagues=leagues,
            start_date=start_date,
            end_date=end_date
        )

        # Apply filters
        if leagues:
            matches = filter_matches_by_league(matches, leagues)

        if start_date or end_date:
            matches = filter_matches_by_period(matches, start_date, end_date)

        if not matches:
            raise ValueError("No matches match the specified filters")

        logger.info(f"Running backtesting on {len(matches)} matches")

        # Run backtesting
        result = run_backtesting_batch(matches)

        # Log results
        logger.info(
            f"Backtesting complete: {result['total_matches']} matches, "
            f"{result['correct_predictions']} correct, "
            f"{result['accuracy']:.2f}% accuracy, "
            f"status: {result['status']}"
        )

        return result

    def export_results(
        self,
        backtesting_result: Dict[str, Any],
        format: str = 'json'
    ) -> str:
        """
        Export backtesting results in specified format.

        Args:
            backtesting_result: Result from run_backtesting
            format: Export format ('json', 'csv', or 'html')

        Returns:
            Formatted string in specified format

        Raises:
            ValueError: If format is not supported
        """
        logger.info(f"Exporting backtesting results as {format}")

        if format == 'json':
            return export_to_json(backtesting_result)
        elif format == 'csv':
            return export_to_csv(backtesting_result)
        elif format == 'html':
            return export_to_html(backtesting_result)
        else:
            raise ValueError(
                f"Unsupported export format: {format}. "
                "Supported formats are: json, csv, html"
            )