chartbastan/backend/run_all_system.py
2026-02-01 09:31:38 +01:00

545 lines
21 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Script principal pour orchestrer tout le système Chartbastan.
Ce script :
1. Initialise la base de données avec des matchs de test
2. Lance RabbitMQ (doit être démarré manuellement)
3. Lance tous les workers
4. Lance le serveur FastAPI
Usage:
python run_all_system.py
"""
import logging
import sys
import time
from pathlib import Path
from datetime import datetime, timedelta
from sqlalchemy.orm import Session
# Add parent directory to path
sys_path = str(Path(__file__).parent)
if sys_path not in sys.path:
sys.path.insert(0, sys_path)
from app.database import engine, Base, get_db
from app.models.match import Match
from app.scrapers.twitter_scraper import create_twitter_scraper
from app.scrapers.reddit_scraper import create_reddit_scraper
from app.services.sentiment_service import (
process_tweet_batch,
process_reddit_post_batch,
)
from app.ml.energy_calculator import calculate_energy_score
from app.services.prediction_service import PredictionService
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def create_test_matches():
"""
Crée des matchs de test dans la base de données.
Matchs créés pour l'Epic 3 (Backtesting) et Epic 2 (Data Collection).
"""
logger.info("🎯 Création des matchs de test...")
db = next(get_db())
try:
# Vérifier si des matchs existent déjà
existing_matches = db.query(Match).count()
if existing_matches > 0:
logger.info(f" {existing_matches} matchs existent déjà, skip de la création")
return
# Créer des matchs de test (matchs à venir dans les 7 prochains jours)
test_matches = [
{
"home_team": "PSG",
"away_team": "Olympique de Marseille",
"date": datetime.now() + timedelta(hours=24),
"league": "Ligue 1",
"status": "scheduled"
},
{
"home_team": "Paris Saint-Germain",
"away_team": "AS Monaco",
"date": datetime.now() + timedelta(hours=48),
"league": "Ligue 1",
"status": "scheduled"
},
{
"home_team": "Olympique Lyonnais",
"away_team": "Olympique de Marseille",
"date": datetime.now() + timedelta(hours=72),
"league": "Ligue 1",
"status": "scheduled"
},
{
"home_team": "Real Madrid",
"away_team": "Barcelona",
"date": datetime.now() + timedelta(hours=96),
"league": "La Liga",
"status": "scheduled"
},
{
"home_team": "Manchester United",
"away_team": "Liverpool",
"date": datetime.now() + timedelta(hours=120),
"league": "Premier League",
"status": "scheduled"
}
]
# Insérer les matchs
for match_data in test_matches:
match = Match(**match_data)
db.add(match)
db.commit()
logger.info(f"{len(test_matches)} matchs de test créés avec succès")
# Afficher les matchs créés
print("\n" + "="*70)
print("📊 MATCHS DE TEST CRÉÉS")
print("="*70)
for i, match in enumerate(test_matches, 1):
print(f"\n{i}. {match['home_team']} vs {match['away_team']}")
print(f" 📅 {match['date'].strftime('%Y-%m-%d %H:%M')}")
print(f" 🏆 {match['league']}")
print(f" 📊 Status: {match['status']}")
print("="*70 + "\n")
except Exception as e:
logger.error(f"❌ Erreur lors de la création des matchs: {e}")
db.rollback()
finally:
db.close()
def check_rabbitmq_status():
"""
Vérifie si RabbitMQ est accessible.
"""
logger.info("🔗 Vérification de la connexion RabbitMQ...")
try:
import os
rabbitmq_url = os.getenv('RABBITMQ_URL', 'amqp://guest:guest@localhost:5672')
from app.queues.rabbitmq_client import create_rabbitmq_client
client = create_rabbitmq_client(rabbitmq_url=rabbitmq_url)
client.connect()
client.close()
logger.info("✅ RabbitMQ est accessible sur localhost:5672")
return True
except Exception as e:
logger.error(f"❌ RabbitMQ n'est pas accessible: {e}")
logger.warning("⚠️ Veuillez démarrer RabbitMQ avec Docker:")
logger.warning(" docker run -d --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:3-management")
return False
def simulate_full_pipeline():
"""
Simule le pipeline complet de génération de prédictions.
Ce pipeline correspond à Epic 2 (Data Collection) et Epic 3 (Prediction System).
"""
logger.info("🔄 Lancement du pipeline complet de simulation...")
db = next(get_db())
try:
# Récupérer les matchs
matches = db.query(Match).all()
if not matches:
logger.error("❌ Aucun match trouvé. Veuillez d'abord créer des matchs de test.")
return
logger.info(f"📊 {len(matches)} matchs trouvés")
# Pour chaque match, générer des données simulées
for match in matches:
logger.info(f"\n{'='*50}")
logger.info(f"📋 Match: {match.home_team} vs {match.away_team}")
# Simuler des tweets et posts Reddit (mock data)
mock_tweets = [
{
"tweet_id": f"tweet_{match.id}_{i}",
"text": f"Allez {match.home_team} ! On va gagner ce match ! #football #{match.home_team.replace(' ', '')}",
"author": f"fan_{match.home_team.lower()}",
"created_at": datetime.now(),
"retweet_count": 10 + i,
"like_count": 20 + i,
"reply_count": 5 + i
}
for i in range(10)
]
mock_reddit_posts = [
{
"post_id": f"reddit_{match.id}_{i}",
"title": f"{match.home_team} vs {match.away_team} - Match Preview",
"text": f"Prediction: {match.home_team} will win 2-1",
"author": f"u/redditor_{i}",
"created_at": datetime.now(),
"upvote_count": 15 + i,
"downvote_count": 2
}
for i in range(5)
]
# Analyser le sentiment
logger.info("🔍 Analyse de sentiment VADER...")
from app.models.tweet import Tweet
from app.models.reddit_post import RedditPost
# Créer et analyser les tweets (simulation)
tweet_sentiments = []
for tweet_data in mock_tweets:
tweet = Tweet(
tweet_id=tweet_data["tweet_id"],
text=tweet_data["text"],
author=tweet_data["author"],
created_at=tweet_data["created_at"],
retweet_count=tweet_data["retweet_count"],
like_count=tweet_data["like_count"],
reply_count=tweet_data["reply_count"],
match_id=match.id
)
db.add(tweet)
# Analyse de sentiment simple
positive_words = ["gagner", "win", "victoire", "champion", "excellent", "super"]
negative_words = ["perdre", "perdu", "mauvais", "nul", "faible"]
text_lower = tweet_data["text"].lower()
pos_count = sum(1 for word in positive_words if word in text_lower)
neg_count = sum(1 for word in negative_words if word in text_lower)
if pos_count > neg_count:
sentiment = "positive"
elif neg_count > pos_count:
sentiment = "negative"
else:
sentiment = "neutral"
tweet_sentiments.append({
"tweet_id": tweet_data["tweet_id"],
"sentiment": sentiment,
"positive_score": pos_count * 0.5,
"negative_score": neg_count * 0.5,
"neutral_score": 1 - (pos_count + neg_count) * 0.5,
"compound_score": (pos_count - neg_count) * 0.5
})
logger.info(f"{len(tweet_sentiments)} tweets analysés")
# Créer les posts Reddit
reddit_sentiments = []
for post_data in mock_reddit_posts:
post = RedditPost(
post_id=post_data["post_id"],
title=post_data["title"],
text=post_data["text"],
author=post_data["author"],
created_at=post_data["created_at"],
upvote_count=post_data["upvote_count"],
downvote_count=post_data["downvote_count"],
match_id=match.id
)
db.add(post)
# Analyse de sentiment simple
text_lower = post_data["text"].lower()
pos_count = sum(1 for word in ["win", "gagner"] if word in text_lower)
neg_count = sum(1 for word in ["lose", "perdre"] if word in text_lower)
if pos_count > neg_count:
sentiment = "positive"
elif neg_count > pos_count:
sentiment = "negative"
else:
sentiment = "neutral"
reddit_sentiments.append({
"post_id": post_data["post_id"],
"sentiment": sentiment,
"positive_score": pos_count * 0.3,
"negative_score": neg_count * 0.3,
"neutral_score": 1 - (pos_count + neg_count) * 0.3,
"compound_score": (pos_count - neg_count) * 0.3
})
logger.info(f"{len(reddit_sentiments)} posts Reddit analysés")
db.commit()
# Calculer l'énergie collective
logger.info("⚡ Calcul de l'énergie collective...")
logger.info(" 📊 Formule: (Positif - Négatif) × Volume × Viralité")
logger.info(" 🎯 Pondération: Twitter 60%, Reddit 25%, RSS 15%")
# Données d'énergie
energy_data = {
"match_id": match.id,
"team_id": 0, # home team
"twitter_sentiments": tweet_sentiments,
"reddit_sentiments": reddit_sentiments,
"rss_sentiments": [],
"tweets_with_timestamps": [
{"tweet_id": t["tweet_id"], "created_at": datetime.now()}
for t in tweet_sentiments[:5] # 5 tweets les plus récents
]
}
# Calculer le score d'énergie
energy_result = calculate_energy_score(
match_id=match.id,
team_id=0,
twitter_sentiments=tweet_sentiments,
reddit_sentiments=reddit_sentiments,
rss_sentiments=[],
tweets_with_timestamps=energy_data["tweets_with_timestamps"]
)
logger.info(f" ⚡ Score d'énergie équipe domicile: {energy_result['score']:.2f}")
logger.info(f" 📊 Confiance: {energy_result['confidence']:.1f}%")
# Répéter pour l'équipe visiteur
energy_data_away = energy_data.copy()
energy_data_away["team_id"] = 1
mock_tweets_away = [
{
"tweet_id": f"tweet_away_{match.id}_{i}",
"text": f"Allez {match.away_team} ! On va gagner ce match ! #football #{match.away_team.replace(' ', '')}",
"author": f"fan_{match.away_team.lower()}",
"created_at": datetime.now(),
"retweet_count": 8 + i,
"like_count": 15 + i,
"reply_count": 4 + i
}
for i in range(8)
]
tweet_sentiments_away = []
for tweet_data in mock_tweets_away:
text_lower = tweet_data["text"].lower()
pos_count = sum(1 for word in ["gagner", "win", "victoire"] if word in text_lower)
neg_count = sum(1 for word in ["perdre", "perdu", "mauvais"] if word in text_lower)
if pos_count > neg_count:
sentiment = "positive"
elif neg_count > pos_count:
sentiment = "negative"
else:
sentiment = "neutral"
tweet_sentiments_away.append({
"tweet_id": tweet_data["tweet_id"],
"sentiment": sentiment,
"positive_score": pos_count * 0.5,
"negative_score": neg_count * 0.5,
"neutral_score": 1 - (pos_count + neg_count) * 0.5,
"compound_score": (pos_count - neg_count) * 0.5
})
energy_data_away["twitter_sentiments"] = tweet_sentiments_away
mock_reddit_posts_away = [
{
"post_id": f"reddit_away_{match.id}_{i}",
"title": f"{match.away_team} vs {match.home_team} - Match Preview",
"text": f"Prediction: {match.away_team} will win 3-2",
"author": f"u/redditor_away_{i}",
"created_at": datetime.now(),
"upvote_count": 12 + i,
"downvote_count": 3
}
for i in range(6)
]
reddit_sentiments_away = []
for post_data in mock_reddit_posts_away:
text_lower = post_data["text"].lower()
pos_count = sum(1 for word in ["win", "gagner"] if word in text_lower)
neg_count = sum(1 for word in ["lose", "perdre"] if word in text_lower)
if pos_count > neg_count:
sentiment = "positive"
elif neg_count > pos_count:
sentiment = "negative"
else:
sentiment = "neutral"
reddit_sentiments_away.append({
"post_id": post_data["post_id"],
"sentiment": sentiment,
"positive_score": pos_count * 0.3,
"negative_score": neg_count * 0.3,
"neutral_score": 1 - (pos_count + neg_count) * 0.3,
"compound_score": (pos_count - neg_count) * 0.3
})
energy_data_away["reddit_sentiments"] = reddit_sentiments_away
energy_data_away["tweets_with_timestamps"] = [
{"tweet_id": t["tweet_id"], "created_at": datetime.now()}
for t in tweet_sentiments_away[:5]
]
energy_result_away = calculate_energy_score(
match_id=match.id,
team_id=1,
twitter_sentiments=tweet_sentiments_away,
reddit_sentiments=reddit_sentiments_away,
rss_sentiments=[],
tweets_with_timestamps=energy_data_away["tweets_with_timestamps"]
)
logger.info(f" ⚡ Score d'énergie équipe visiteur: {energy_result_away['score']:.2f}")
logger.info(f" 📊 Confiance: {energy_result_away['confidence']:.1f}%")
# Générer une prédiction
logger.info("🎯 Génération de la prédiction...")
home_energy = energy_result['score']
away_energy = energy_result_away['score']
# Service de prédiction
prediction_service = PredictionService(db)
prediction = prediction_service.create_prediction_for_match(
match_id=match.id,
home_energy=home_energy,
away_energy=away_energy,
energy_score_label="high" if home_energy > 50 else "medium"
)
logger.info(f" 🏆 Vainqueur prédit: {prediction.predicted_winner}")
logger.info(f" 📊 Confidence: {prediction.confidence}")
logger.info(f" ✅ Prédiction créée avec succès !")
logger.info("="*50)
logger.info(f"\n✅ Pipeline complet terminé : {len(matches)} prédictions générées")
# Résumé
print("\n" + "="*70)
print("🎊 RÉSUMÉ DU PIPELINE")
print("="*70)
print(f"📊 Matchs créés: {len(matches)}")
print(f"🐦 Tweets analysés: {len(matches) * 18}") # ~10 tweets home + ~8 tweets away
print(f"📝 Posts Reddit analysés: {len(matches) * 11}") # ~5 posts home + ~6 posts away
print(f"⚡ Scores d'énergie calculés: {len(matches) * 2}") # 2 équipes par match
print(f"🎯 Prédictions générées: {len(matches)}")
print("="*70 + "\n")
print("🌐 API disponible sur: http://127.0.0.1:8000/docs")
print("📊 Dashboard disponible sur: http://localhost:3000")
print("\n📝 Prochaine étape:")
print(" 1. Accédez à http://localhost:3000/dashboard")
print(" 2. Connectez-vous avec votre compte utilisateur")
print(" 3. Voyez les prédictions dans le dashboard !")
print("\n")
except Exception as e:
logger.error(f"❌ Erreur lors du pipeline: {e}")
db.rollback()
finally:
db.close()
def print_menu():
"""Affiche le menu principal."""
print("\n" + "="*70)
print("🚀 SYSTÈME CHARTBASTAN - MENU PRINCIPAL")
print("="*70)
print("\n📋 OPTIONS DISPONIBLES:")
print("\n1⃣ Initialiser la base de données avec des matchs de test")
print("2⃣ Simuler le pipeline complet (scraping → analyse → énergie → prédictions)")
print("3⃣ Vérifier la connexion RabbitMQ")
print("4⃣ Lancer le serveur FastAPI uniquement")
print("0⃣ Tout lancer (matchs + pipeline + API)")
print("\n📝 CONFIGURATION:")
print(" - Variables d'environnement dans backend/.env")
print(" - RabbitMQ doit être lancé avec Docker avant d'utiliser les queues")
print(" - API externe (Twitter/Reddit) optionnelle pour le scraping réel")
print("\n🚀 DOCKER RABBITMQ (si nécessaire):")
print(" docker run -d --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:3-management")
print("\n" + "="*70 + "\n")
def main():
"""Point d'entrée principal."""
print_menu()
try:
choice = input("\n👉 Choisissez une option (0-4): ").strip()
if choice == "1":
# Créer des matchs de test
create_test_matches()
elif choice == "2":
# Simuler le pipeline complet
simulate_full_pipeline()
elif choice == "3":
# Vérifier RabbitMQ
if check_rabbitmq_status():
print("\n✅ RabbitMQ est opérationnel !\n")
else:
print("\n❌ RabbitMQ n'est pas accessible. Veuillez le démarrer.\n")
elif choice == "4":
# Lancer le serveur FastAPI
print("\n🚀 Démarrage du serveur FastAPI...")
print("📊 Utilisez plutôt: uvicorn app.app_new:app --host 127.0.0.1 --port 8000")
elif choice == "0":
# Tout lancer
print("\n🚀 Lancement complet du système...\n")
check_rabbitmq_status()
print("\n" + "-"*70)
print("Étape 1/3: Création des matchs de test")
create_test_matches()
print("\n" + "-"*70)
print("Étape 2/3: Simulation du pipeline complet")
simulate_full_pipeline()
print("\n" + "-"*70)
print("Étape 3/3: Démarrage du serveur FastAPI")
print("🌐 Serveur prêt sur: http://127.0.0.1:8000")
print("📖 Documentation: http://127.0.0.1:8000/docs")
print("\n✅ Système complet démarré !")
print("📊 Accédez au dashboard: http://localhost:3000/dashboard")
print("\n💡 Pour arrêter: Press Ctrl+C")
print("-"*70 + "\n")
# Attendre que l'utilisateur appuie
input("\n👉 Appuyez sur Enter pour continuer...")
else:
print("\n❌ Option invalide. Veuillez choisir entre 0 et 4.")
except KeyboardInterrupt:
print("\n\n⏹️ Interruption par l'utilisateur")
print("👋 Au revoir !\n")
except Exception as e:
logger.error(f"❌ Erreur: {e}")
if __name__ == "__main__":
main()