chartbastan/backend/run_all_system.py

"""
Script principal pour orchestrer tout le système Chartbastan.

Ce script :
1. Initialise la base de données avec des matchs de test
2. Lance RabbitMQ (doit être démarré manuellement)
3. Lance tous les workers
4. Lance le serveur FastAPI

Usage:
    python run_all_system.py
"""

import logging
import sys
import time
from pathlib import Path
from datetime import datetime, timedelta
from sqlalchemy.orm import Session

# Add parent directory to path
sys_path = str(Path(__file__).parent)
if sys_path not in sys.path:
    sys.path.insert(0, sys_path)

from app.database import engine, Base, get_db
from app.models.match import Match
from app.scrapers.twitter_scraper import create_twitter_scraper
from app.scrapers.reddit_scraper import create_reddit_scraper
from app.services.sentiment_service import (
    process_tweet_batch,
    process_reddit_post_batch,
)
from app.ml.energy_calculator import calculate_energy_score
from app.services.prediction_service import PredictionService

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def create_test_matches():
    """
    Crée des matchs de test dans la base de données.

    Matchs créés pour l'Epic 3 (Backtesting) et Epic 2 (Data Collection).
    """
    logger.info("🎯 Création des matchs de test...")

    db = next(get_db())

    try:
        # Vérifier si des matchs existent déjà
        existing_matches = db.query(Match).count()

        if existing_matches > 0:
            logger.info(f"ℹ️ {existing_matches} matchs existent déjà, skip de la création")
            return

        # Créer des matchs de test (matchs à venir dans les 7 prochains jours)
        test_matches = [
            {
                "home_team": "PSG",
                "away_team": "Olympique de Marseille",
                "date": datetime.now() + timedelta(hours=24),
                "league": "Ligue 1",
                "status": "scheduled"
            },
            {
                "home_team": "Paris Saint-Germain",
                "away_team": "AS Monaco",
                "date": datetime.now() + timedelta(hours=48),
                "league": "Ligue 1",
                "status": "scheduled"
            },
            {
                "home_team": "Olympique Lyonnais",
                "away_team": "Olympique de Marseille",
                "date": datetime.now() + timedelta(hours=72),
                "league": "Ligue 1",
                "status": "scheduled"
            },
            {
                "home_team": "Real Madrid",
                "away_team": "Barcelona",
                "date": datetime.now() + timedelta(hours=96),
                "league": "La Liga",
                "status": "scheduled"
            },
            {
                "home_team": "Manchester United",
                "away_team": "Liverpool",
                "date": datetime.now() + timedelta(hours=120),
                "league": "Premier League",
                "status": "scheduled"
            }
        ]

        # Insérer les matchs
        for match_data in test_matches:
            match = Match(**match_data)
            db.add(match)

        db.commit()
        logger.info(f"✅ {len(test_matches)} matchs de test créés avec succès")

        # Afficher les matchs créés
        print("\n" + "="*70)
        print("📊 MATCHS DE TEST CRÉÉS")
        print("="*70)
        for i, match in enumerate(test_matches, 1):
            print(f"\n{i}. {match['home_team']} vs {match['away_team']}")
            print(f"   📅 {match['date'].strftime('%Y-%m-%d %H:%M')}")
            print(f"   🏆 {match['league']}")
            print(f"   📊 Status: {match['status']}")
        print("="*70 + "\n")

    except Exception as e:
        logger.error(f"❌ Erreur lors de la création des matchs: {e}")
        db.rollback()
    finally:
        db.close()


def check_rabbitmq_status():
    """
    Vérifie si RabbitMQ est accessible.
    """
    logger.info("🔗 Vérification de la connexion RabbitMQ...")

    try:
        import os
        rabbitmq_url = os.getenv('RABBITMQ_URL', 'amqp://guest:guest@localhost:5672')

        from app.queues.rabbitmq_client import create_rabbitmq_client
        client = create_rabbitmq_client(rabbitmq_url=rabbitmq_url)
        client.connect()
        client.close()

        logger.info("✅ RabbitMQ est accessible sur localhost:5672")
        return True

    except Exception as e:
        logger.error(f"❌ RabbitMQ n'est pas accessible: {e}")
        logger.warning("⚠️ Veuillez démarrer RabbitMQ avec Docker:")
        logger.warning("   docker run -d --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:3-management")
        return False


def simulate_full_pipeline():
    """
    Simule le pipeline complet de génération de prédictions.

    Ce pipeline correspond à Epic 2 (Data Collection) et Epic 3 (Prediction System).
    """
    logger.info("🔄 Lancement du pipeline complet de simulation...")

    db = next(get_db())

    try:
        # Récupérer les matchs
        matches = db.query(Match).all()

        if not matches:
            logger.error("❌ Aucun match trouvé. Veuillez d'abord créer des matchs de test.")
            return

        logger.info(f"📊 {len(matches)} matchs trouvés")

        # Pour chaque match, générer des données simulées
        for match in matches:
            logger.info(f"\n{'='*50}")
            logger.info(f"📋 Match: {match.home_team} vs {match.away_team}")

            # Simuler des tweets et posts Reddit (mock data)
            mock_tweets = [
                {
                    "tweet_id": f"tweet_{match.id}_{i}",
                    "text": f"Allez {match.home_team} ! On va gagner ce match ! #football #{match.home_team.replace(' ', '')}",
                    "author": f"fan_{match.home_team.lower()}",
                    "created_at": datetime.now(),
                    "retweet_count": 10 + i,
                    "like_count": 20 + i,
                    "reply_count": 5 + i
                }
                for i in range(10)
            ]

            mock_reddit_posts = [
                {
                    "post_id": f"reddit_{match.id}_{i}",
                    "title": f"{match.home_team} vs {match.away_team} - Match Preview",
                    "text": f"Prediction: {match.home_team} will win 2-1",
                    "author": f"u/redditor_{i}",
                    "created_at": datetime.now(),
                    "upvote_count": 15 + i,
                    "downvote_count": 2
                }
                for i in range(5)
            ]

            # Analyser le sentiment
            logger.info("🔍 Analyse de sentiment VADER...")
            from app.models.tweet import Tweet
            from app.models.reddit_post import RedditPost

            # Créer et analyser les tweets (simulation)
            tweet_sentiments = []
            for tweet_data in mock_tweets:
                tweet = Tweet(
                    tweet_id=tweet_data["tweet_id"],
                    text=tweet_data["text"],
                    author=tweet_data["author"],
                    created_at=tweet_data["created_at"],
                    retweet_count=tweet_data["retweet_count"],
                    like_count=tweet_data["like_count"],
                    reply_count=tweet_data["reply_count"],
                    match_id=match.id
                )
                db.add(tweet)

                # Analyse de sentiment simple
                positive_words = ["gagner", "win", "victoire", "champion", "excellent", "super"]
                negative_words = ["perdre", "perdu", "mauvais", "nul", "faible"]

                text_lower = tweet_data["text"].lower()
                pos_count = sum(1 for word in positive_words if word in text_lower)
                neg_count = sum(1 for word in negative_words if word in text_lower)

                if pos_count > neg_count:
                    sentiment = "positive"
                elif neg_count > pos_count:
                    sentiment = "negative"
                else:
                    sentiment = "neutral"

                tweet_sentiments.append({
                    "tweet_id": tweet_data["tweet_id"],
                    "sentiment": sentiment,
                    "positive_score": pos_count * 0.5,
                    "negative_score": neg_count * 0.5,
                    "neutral_score": 1 - (pos_count + neg_count) * 0.5,
                    "compound_score": (pos_count - neg_count) * 0.5
                })

            logger.info(f"   ✅ {len(tweet_sentiments)} tweets analysés")

            # Créer les posts Reddit
            reddit_sentiments = []
            for post_data in mock_reddit_posts:
                post = RedditPost(
                    post_id=post_data["post_id"],
                    title=post_data["title"],
                    text=post_data["text"],
                    author=post_data["author"],
                    created_at=post_data["created_at"],
                    upvote_count=post_data["upvote_count"],
                    downvote_count=post_data["downvote_count"],
                    match_id=match.id
                )
                db.add(post)

                # Analyse de sentiment simple
                text_lower = post_data["text"].lower()
                pos_count = sum(1 for word in ["win", "gagner"] if word in text_lower)
                neg_count = sum(1 for word in ["lose", "perdre"] if word in text_lower)

                if pos_count > neg_count:
                    sentiment = "positive"
                elif neg_count > pos_count:
                    sentiment = "negative"
                else:
                    sentiment = "neutral"

                reddit_sentiments.append({
                    "post_id": post_data["post_id"],
                    "sentiment": sentiment,
                    "positive_score": pos_count * 0.3,
                    "negative_score": neg_count * 0.3,
                    "neutral_score": 1 - (pos_count + neg_count) * 0.3,
                    "compound_score": (pos_count - neg_count) * 0.3
                })

            logger.info(f"   ✅ {len(reddit_sentiments)} posts Reddit analysés")

            db.commit()

            # Calculer l'énergie collective
            logger.info("⚡ Calcul de l'énergie collective...")
            logger.info("   📊 Formule: (Positif - Négatif) × Volume × Viralité")
            logger.info("   🎯 Pondération: Twitter 60%, Reddit 25%, RSS 15%")

            # Données d'énergie
            energy_data = {
                "match_id": match.id,
                "team_id": 0,  # home team
                "twitter_sentiments": tweet_sentiments,
                "reddit_sentiments": reddit_sentiments,
                "rss_sentiments": [],
                "tweets_with_timestamps": [
                    {"tweet_id": t["tweet_id"], "created_at": datetime.now()}
                    for t in tweet_sentiments[:5]  # 5 tweets les plus récents
                ]
            }

            # Calculer le score d'énergie
            energy_result = calculate_energy_score(
                match_id=match.id,
                team_id=0,
                twitter_sentiments=tweet_sentiments,
                reddit_sentiments=reddit_sentiments,
                rss_sentiments=[],
                tweets_with_timestamps=energy_data["tweets_with_timestamps"]
            )

            logger.info(f"   ⚡ Score d'énergie équipe domicile: {energy_result['score']:.2f}")
            logger.info(f"   📊 Confiance: {energy_result['confidence']:.1f}%")

            # Répéter pour l'équipe visiteur
            energy_data_away = energy_data.copy()
            energy_data_away["team_id"] = 1

            mock_tweets_away = [
                {
                    "tweet_id": f"tweet_away_{match.id}_{i}",
                    "text": f"Allez {match.away_team} ! On va gagner ce match ! #football #{match.away_team.replace(' ', '')}",
                    "author": f"fan_{match.away_team.lower()}",
                    "created_at": datetime.now(),
                    "retweet_count": 8 + i,
                    "like_count": 15 + i,
                    "reply_count": 4 + i
                }
                for i in range(8)
            ]

            tweet_sentiments_away = []
            for tweet_data in mock_tweets_away:
                text_lower = tweet_data["text"].lower()
                pos_count = sum(1 for word in ["gagner", "win", "victoire"] if word in text_lower)
                neg_count = sum(1 for word in ["perdre", "perdu", "mauvais"] if word in text_lower)

                if pos_count > neg_count:
                    sentiment = "positive"
                elif neg_count > pos_count:
                    sentiment = "negative"
                else:
                    sentiment = "neutral"

                tweet_sentiments_away.append({
                    "tweet_id": tweet_data["tweet_id"],
                    "sentiment": sentiment,
                    "positive_score": pos_count * 0.5,
                    "negative_score": neg_count * 0.5,
                    "neutral_score": 1 - (pos_count + neg_count) * 0.5,
                    "compound_score": (pos_count - neg_count) * 0.5
                })

            energy_data_away["twitter_sentiments"] = tweet_sentiments_away

            mock_reddit_posts_away = [
                {
                    "post_id": f"reddit_away_{match.id}_{i}",
                    "title": f"{match.away_team} vs {match.home_team} - Match Preview",
                    "text": f"Prediction: {match.away_team} will win 3-2",
                    "author": f"u/redditor_away_{i}",
                    "created_at": datetime.now(),
                    "upvote_count": 12 + i,
                    "downvote_count": 3
                }
                for i in range(6)
            ]

            reddit_sentiments_away = []
            for post_data in mock_reddit_posts_away:
                text_lower = post_data["text"].lower()
                pos_count = sum(1 for word in ["win", "gagner"] if word in text_lower)
                neg_count = sum(1 for word in ["lose", "perdre"] if word in text_lower)

                if pos_count > neg_count:
                    sentiment = "positive"
                elif neg_count > pos_count:
                    sentiment = "negative"
                else:
                    sentiment = "neutral"

                reddit_sentiments_away.append({
                    "post_id": post_data["post_id"],
                    "sentiment": sentiment,
                    "positive_score": pos_count * 0.3,
                    "negative_score": neg_count * 0.3,
                    "neutral_score": 1 - (pos_count + neg_count) * 0.3,
                    "compound_score": (pos_count - neg_count) * 0.3
                })

            energy_data_away["reddit_sentiments"] = reddit_sentiments_away
            energy_data_away["tweets_with_timestamps"] = [
                {"tweet_id": t["tweet_id"], "created_at": datetime.now()}
                for t in tweet_sentiments_away[:5]
            ]

            energy_result_away = calculate_energy_score(
                match_id=match.id,
                team_id=1,
                twitter_sentiments=tweet_sentiments_away,
                reddit_sentiments=reddit_sentiments_away,
                rss_sentiments=[],
                tweets_with_timestamps=energy_data_away["tweets_with_timestamps"]
            )

            logger.info(f"   ⚡ Score d'énergie équipe visiteur: {energy_result_away['score']:.2f}")
            logger.info(f"   📊 Confiance: {energy_result_away['confidence']:.1f}%")

            # Générer une prédiction
            logger.info("🎯 Génération de la prédiction...")

            home_energy = energy_result['score']
            away_energy = energy_result_away['score']

            # Service de prédiction
            prediction_service = PredictionService(db)
            prediction = prediction_service.create_prediction_for_match(
                match_id=match.id,
                home_energy=home_energy,
                away_energy=away_energy,
                energy_score_label="high" if home_energy > 50 else "medium"
            )

            logger.info(f"   🏆 Vainqueur prédit: {prediction.predicted_winner}")
            logger.info(f"   📊 Confidence: {prediction.confidence}")
            logger.info(f"   ✅ Prédiction créée avec succès !")

            logger.info("="*50)

        logger.info(f"\n✅ Pipeline complet terminé : {len(matches)} prédictions générées")

        # Résumé
        print("\n" + "="*70)
        print("🎊 RÉSUMÉ DU PIPELINE")
        print("="*70)
        print(f"📊 Matchs créés: {len(matches)}")
        print(f"🐦 Tweets analysés: {len(matches) * 18}")  # ~10 tweets home + ~8 tweets away
        print(f"📝 Posts Reddit analysés: {len(matches) * 11}")  # ~5 posts home + ~6 posts away
        print(f"⚡ Scores d'énergie calculés: {len(matches) * 2}")  # 2 équipes par match
        print(f"🎯 Prédictions générées: {len(matches)}")
        print("="*70 + "\n")
        print("🌐 API disponible sur: http://127.0.0.1:8000/docs")
        print("📊 Dashboard disponible sur: http://localhost:3000")
        print("\n📝 Prochaine étape:")
        print("   1. Accédez à http://localhost:3000/dashboard")
        print("   2. Connectez-vous avec votre compte utilisateur")
        print("   3. Voyez les prédictions dans le dashboard !")
        print("\n")

    except Exception as e:
        logger.error(f"❌ Erreur lors du pipeline: {e}")
        db.rollback()
    finally:
        db.close()


def print_menu():
    """Affiche le menu principal."""
    print("\n" + "="*70)
    print("🚀 SYSTÈME CHARTBASTAN - MENU PRINCIPAL")
    print("="*70)
    print("\n📋 OPTIONS DISPONIBLES:")
    print("\n1️⃣  Initialiser la base de données avec des matchs de test")
    print("2️⃣  Simuler le pipeline complet (scraping → analyse → énergie → prédictions)")
    print("3️⃣  Vérifier la connexion RabbitMQ")
    print("4️⃣  Lancer le serveur FastAPI uniquement")
    print("0️⃣  Tout lancer (matchs + pipeline + API)")
    print("\n📝 CONFIGURATION:")
    print("   - Variables d'environnement dans backend/.env")
    print("   - RabbitMQ doit être lancé avec Docker avant d'utiliser les queues")
    print("   - API externe (Twitter/Reddit) optionnelle pour le scraping réel")
    print("\n🚀 DOCKER RABBITMQ (si nécessaire):")
    print("   docker run -d --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:3-management")
    print("\n" + "="*70 + "\n")


def main():
    """Point d'entrée principal."""

    print_menu()

    try:
        choice = input("\n👉 Choisissez une option (0-4): ").strip()

        if choice == "1":
            # Créer des matchs de test
            create_test_matches()

        elif choice == "2":
            # Simuler le pipeline complet
            simulate_full_pipeline()

        elif choice == "3":
            # Vérifier RabbitMQ
            if check_rabbitmq_status():
                print("\n✅ RabbitMQ est opérationnel !\n")
            else:
                print("\n❌ RabbitMQ n'est pas accessible. Veuillez le démarrer.\n")

        elif choice == "4":
            # Lancer le serveur FastAPI
            print("\n🚀 Démarrage du serveur FastAPI...")
            print("📊 Utilisez plutôt: uvicorn app.app_new:app --host 127.0.0.1 --port 8000")

        elif choice == "0":
            # Tout lancer
            print("\n🚀 Lancement complet du système...\n")
            check_rabbitmq_status()
            print("\n" + "-"*70)
            print("Étape 1/3: Création des matchs de test")
            create_test_matches()
            print("\n" + "-"*70)
            print("Étape 2/3: Simulation du pipeline complet")
            simulate_full_pipeline()
            print("\n" + "-"*70)
            print("Étape 3/3: Démarrage du serveur FastAPI")
            print("🌐 Serveur prêt sur: http://127.0.0.1:8000")
            print("📖 Documentation: http://127.0.0.1:8000/docs")
            print("\n✅ Système complet démarré !")
            print("📊 Accédez au dashboard: http://localhost:3000/dashboard")
            print("\n💡 Pour arrêter: Press Ctrl+C")
            print("-"*70 + "\n")

            # Attendre que l'utilisateur appuie
            input("\n👉 Appuyez sur Enter pour continuer...")

        else:
            print("\n❌ Option invalide. Veuillez choisir entre 0 et 4.")

    except KeyboardInterrupt:
        print("\n\n⏹️ Interruption par l'utilisateur")
        print("👋 Au revoir !\n")
    except Exception as e:
        logger.error(f"❌ Erreur: {e}")


if __name__ == "__main__":
    main()