545 lines
21 KiB
Python
545 lines
21 KiB
Python
"""
|
||
Script principal pour orchestrer tout le système Chartbastan.
|
||
|
||
Ce script :
|
||
1. Initialise la base de données avec des matchs de test
|
||
2. Lance RabbitMQ (doit être démarré manuellement)
|
||
3. Lance tous les workers
|
||
4. Lance le serveur FastAPI
|
||
|
||
Usage:
|
||
python run_all_system.py
|
||
"""
|
||
|
||
import logging
|
||
import sys
|
||
import time
|
||
from pathlib import Path
|
||
from datetime import datetime, timedelta
|
||
from sqlalchemy.orm import Session
|
||
|
||
# Add parent directory to path
|
||
sys_path = str(Path(__file__).parent)
|
||
if sys_path not in sys.path:
|
||
sys.path.insert(0, sys_path)
|
||
|
||
from app.database import engine, Base, get_db
|
||
from app.models.match import Match
|
||
from app.scrapers.twitter_scraper import create_twitter_scraper
|
||
from app.scrapers.reddit_scraper import create_reddit_scraper
|
||
from app.services.sentiment_service import (
|
||
process_tweet_batch,
|
||
process_reddit_post_batch,
|
||
)
|
||
from app.ml.energy_calculator import calculate_energy_score
|
||
from app.services.prediction_service import PredictionService
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def create_test_matches():
|
||
"""
|
||
Crée des matchs de test dans la base de données.
|
||
|
||
Matchs créés pour l'Epic 3 (Backtesting) et Epic 2 (Data Collection).
|
||
"""
|
||
logger.info("🎯 Création des matchs de test...")
|
||
|
||
db = next(get_db())
|
||
|
||
try:
|
||
# Vérifier si des matchs existent déjà
|
||
existing_matches = db.query(Match).count()
|
||
|
||
if existing_matches > 0:
|
||
logger.info(f"ℹ️ {existing_matches} matchs existent déjà, skip de la création")
|
||
return
|
||
|
||
# Créer des matchs de test (matchs à venir dans les 7 prochains jours)
|
||
test_matches = [
|
||
{
|
||
"home_team": "PSG",
|
||
"away_team": "Olympique de Marseille",
|
||
"date": datetime.now() + timedelta(hours=24),
|
||
"league": "Ligue 1",
|
||
"status": "scheduled"
|
||
},
|
||
{
|
||
"home_team": "Paris Saint-Germain",
|
||
"away_team": "AS Monaco",
|
||
"date": datetime.now() + timedelta(hours=48),
|
||
"league": "Ligue 1",
|
||
"status": "scheduled"
|
||
},
|
||
{
|
||
"home_team": "Olympique Lyonnais",
|
||
"away_team": "Olympique de Marseille",
|
||
"date": datetime.now() + timedelta(hours=72),
|
||
"league": "Ligue 1",
|
||
"status": "scheduled"
|
||
},
|
||
{
|
||
"home_team": "Real Madrid",
|
||
"away_team": "Barcelona",
|
||
"date": datetime.now() + timedelta(hours=96),
|
||
"league": "La Liga",
|
||
"status": "scheduled"
|
||
},
|
||
{
|
||
"home_team": "Manchester United",
|
||
"away_team": "Liverpool",
|
||
"date": datetime.now() + timedelta(hours=120),
|
||
"league": "Premier League",
|
||
"status": "scheduled"
|
||
}
|
||
]
|
||
|
||
# Insérer les matchs
|
||
for match_data in test_matches:
|
||
match = Match(**match_data)
|
||
db.add(match)
|
||
|
||
db.commit()
|
||
logger.info(f"✅ {len(test_matches)} matchs de test créés avec succès")
|
||
|
||
# Afficher les matchs créés
|
||
print("\n" + "="*70)
|
||
print("📊 MATCHS DE TEST CRÉÉS")
|
||
print("="*70)
|
||
for i, match in enumerate(test_matches, 1):
|
||
print(f"\n{i}. {match['home_team']} vs {match['away_team']}")
|
||
print(f" 📅 {match['date'].strftime('%Y-%m-%d %H:%M')}")
|
||
print(f" 🏆 {match['league']}")
|
||
print(f" 📊 Status: {match['status']}")
|
||
print("="*70 + "\n")
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Erreur lors de la création des matchs: {e}")
|
||
db.rollback()
|
||
finally:
|
||
db.close()
|
||
|
||
|
||
def check_rabbitmq_status():
|
||
"""
|
||
Vérifie si RabbitMQ est accessible.
|
||
"""
|
||
logger.info("🔗 Vérification de la connexion RabbitMQ...")
|
||
|
||
try:
|
||
import os
|
||
rabbitmq_url = os.getenv('RABBITMQ_URL', 'amqp://guest:guest@localhost:5672')
|
||
|
||
from app.queues.rabbitmq_client import create_rabbitmq_client
|
||
client = create_rabbitmq_client(rabbitmq_url=rabbitmq_url)
|
||
client.connect()
|
||
client.close()
|
||
|
||
logger.info("✅ RabbitMQ est accessible sur localhost:5672")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ RabbitMQ n'est pas accessible: {e}")
|
||
logger.warning("⚠️ Veuillez démarrer RabbitMQ avec Docker:")
|
||
logger.warning(" docker run -d --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:3-management")
|
||
return False
|
||
|
||
|
||
def simulate_full_pipeline():
|
||
"""
|
||
Simule le pipeline complet de génération de prédictions.
|
||
|
||
Ce pipeline correspond à Epic 2 (Data Collection) et Epic 3 (Prediction System).
|
||
"""
|
||
logger.info("🔄 Lancement du pipeline complet de simulation...")
|
||
|
||
db = next(get_db())
|
||
|
||
try:
|
||
# Récupérer les matchs
|
||
matches = db.query(Match).all()
|
||
|
||
if not matches:
|
||
logger.error("❌ Aucun match trouvé. Veuillez d'abord créer des matchs de test.")
|
||
return
|
||
|
||
logger.info(f"📊 {len(matches)} matchs trouvés")
|
||
|
||
# Pour chaque match, générer des données simulées
|
||
for match in matches:
|
||
logger.info(f"\n{'='*50}")
|
||
logger.info(f"📋 Match: {match.home_team} vs {match.away_team}")
|
||
|
||
# Simuler des tweets et posts Reddit (mock data)
|
||
mock_tweets = [
|
||
{
|
||
"tweet_id": f"tweet_{match.id}_{i}",
|
||
"text": f"Allez {match.home_team} ! On va gagner ce match ! #football #{match.home_team.replace(' ', '')}",
|
||
"author": f"fan_{match.home_team.lower()}",
|
||
"created_at": datetime.now(),
|
||
"retweet_count": 10 + i,
|
||
"like_count": 20 + i,
|
||
"reply_count": 5 + i
|
||
}
|
||
for i in range(10)
|
||
]
|
||
|
||
mock_reddit_posts = [
|
||
{
|
||
"post_id": f"reddit_{match.id}_{i}",
|
||
"title": f"{match.home_team} vs {match.away_team} - Match Preview",
|
||
"text": f"Prediction: {match.home_team} will win 2-1",
|
||
"author": f"u/redditor_{i}",
|
||
"created_at": datetime.now(),
|
||
"upvote_count": 15 + i,
|
||
"downvote_count": 2
|
||
}
|
||
for i in range(5)
|
||
]
|
||
|
||
# Analyser le sentiment
|
||
logger.info("🔍 Analyse de sentiment VADER...")
|
||
from app.models.tweet import Tweet
|
||
from app.models.reddit_post import RedditPost
|
||
|
||
# Créer et analyser les tweets (simulation)
|
||
tweet_sentiments = []
|
||
for tweet_data in mock_tweets:
|
||
tweet = Tweet(
|
||
tweet_id=tweet_data["tweet_id"],
|
||
text=tweet_data["text"],
|
||
author=tweet_data["author"],
|
||
created_at=tweet_data["created_at"],
|
||
retweet_count=tweet_data["retweet_count"],
|
||
like_count=tweet_data["like_count"],
|
||
reply_count=tweet_data["reply_count"],
|
||
match_id=match.id
|
||
)
|
||
db.add(tweet)
|
||
|
||
# Analyse de sentiment simple
|
||
positive_words = ["gagner", "win", "victoire", "champion", "excellent", "super"]
|
||
negative_words = ["perdre", "perdu", "mauvais", "nul", "faible"]
|
||
|
||
text_lower = tweet_data["text"].lower()
|
||
pos_count = sum(1 for word in positive_words if word in text_lower)
|
||
neg_count = sum(1 for word in negative_words if word in text_lower)
|
||
|
||
if pos_count > neg_count:
|
||
sentiment = "positive"
|
||
elif neg_count > pos_count:
|
||
sentiment = "negative"
|
||
else:
|
||
sentiment = "neutral"
|
||
|
||
tweet_sentiments.append({
|
||
"tweet_id": tweet_data["tweet_id"],
|
||
"sentiment": sentiment,
|
||
"positive_score": pos_count * 0.5,
|
||
"negative_score": neg_count * 0.5,
|
||
"neutral_score": 1 - (pos_count + neg_count) * 0.5,
|
||
"compound_score": (pos_count - neg_count) * 0.5
|
||
})
|
||
|
||
logger.info(f" ✅ {len(tweet_sentiments)} tweets analysés")
|
||
|
||
# Créer les posts Reddit
|
||
reddit_sentiments = []
|
||
for post_data in mock_reddit_posts:
|
||
post = RedditPost(
|
||
post_id=post_data["post_id"],
|
||
title=post_data["title"],
|
||
text=post_data["text"],
|
||
author=post_data["author"],
|
||
created_at=post_data["created_at"],
|
||
upvote_count=post_data["upvote_count"],
|
||
downvote_count=post_data["downvote_count"],
|
||
match_id=match.id
|
||
)
|
||
db.add(post)
|
||
|
||
# Analyse de sentiment simple
|
||
text_lower = post_data["text"].lower()
|
||
pos_count = sum(1 for word in ["win", "gagner"] if word in text_lower)
|
||
neg_count = sum(1 for word in ["lose", "perdre"] if word in text_lower)
|
||
|
||
if pos_count > neg_count:
|
||
sentiment = "positive"
|
||
elif neg_count > pos_count:
|
||
sentiment = "negative"
|
||
else:
|
||
sentiment = "neutral"
|
||
|
||
reddit_sentiments.append({
|
||
"post_id": post_data["post_id"],
|
||
"sentiment": sentiment,
|
||
"positive_score": pos_count * 0.3,
|
||
"negative_score": neg_count * 0.3,
|
||
"neutral_score": 1 - (pos_count + neg_count) * 0.3,
|
||
"compound_score": (pos_count - neg_count) * 0.3
|
||
})
|
||
|
||
logger.info(f" ✅ {len(reddit_sentiments)} posts Reddit analysés")
|
||
|
||
db.commit()
|
||
|
||
# Calculer l'énergie collective
|
||
logger.info("⚡ Calcul de l'énergie collective...")
|
||
logger.info(" 📊 Formule: (Positif - Négatif) × Volume × Viralité")
|
||
logger.info(" 🎯 Pondération: Twitter 60%, Reddit 25%, RSS 15%")
|
||
|
||
# Données d'énergie
|
||
energy_data = {
|
||
"match_id": match.id,
|
||
"team_id": 0, # home team
|
||
"twitter_sentiments": tweet_sentiments,
|
||
"reddit_sentiments": reddit_sentiments,
|
||
"rss_sentiments": [],
|
||
"tweets_with_timestamps": [
|
||
{"tweet_id": t["tweet_id"], "created_at": datetime.now()}
|
||
for t in tweet_sentiments[:5] # 5 tweets les plus récents
|
||
]
|
||
}
|
||
|
||
# Calculer le score d'énergie
|
||
energy_result = calculate_energy_score(
|
||
match_id=match.id,
|
||
team_id=0,
|
||
twitter_sentiments=tweet_sentiments,
|
||
reddit_sentiments=reddit_sentiments,
|
||
rss_sentiments=[],
|
||
tweets_with_timestamps=energy_data["tweets_with_timestamps"]
|
||
)
|
||
|
||
logger.info(f" ⚡ Score d'énergie équipe domicile: {energy_result['score']:.2f}")
|
||
logger.info(f" 📊 Confiance: {energy_result['confidence']:.1f}%")
|
||
|
||
# Répéter pour l'équipe visiteur
|
||
energy_data_away = energy_data.copy()
|
||
energy_data_away["team_id"] = 1
|
||
|
||
mock_tweets_away = [
|
||
{
|
||
"tweet_id": f"tweet_away_{match.id}_{i}",
|
||
"text": f"Allez {match.away_team} ! On va gagner ce match ! #football #{match.away_team.replace(' ', '')}",
|
||
"author": f"fan_{match.away_team.lower()}",
|
||
"created_at": datetime.now(),
|
||
"retweet_count": 8 + i,
|
||
"like_count": 15 + i,
|
||
"reply_count": 4 + i
|
||
}
|
||
for i in range(8)
|
||
]
|
||
|
||
tweet_sentiments_away = []
|
||
for tweet_data in mock_tweets_away:
|
||
text_lower = tweet_data["text"].lower()
|
||
pos_count = sum(1 for word in ["gagner", "win", "victoire"] if word in text_lower)
|
||
neg_count = sum(1 for word in ["perdre", "perdu", "mauvais"] if word in text_lower)
|
||
|
||
if pos_count > neg_count:
|
||
sentiment = "positive"
|
||
elif neg_count > pos_count:
|
||
sentiment = "negative"
|
||
else:
|
||
sentiment = "neutral"
|
||
|
||
tweet_sentiments_away.append({
|
||
"tweet_id": tweet_data["tweet_id"],
|
||
"sentiment": sentiment,
|
||
"positive_score": pos_count * 0.5,
|
||
"negative_score": neg_count * 0.5,
|
||
"neutral_score": 1 - (pos_count + neg_count) * 0.5,
|
||
"compound_score": (pos_count - neg_count) * 0.5
|
||
})
|
||
|
||
energy_data_away["twitter_sentiments"] = tweet_sentiments_away
|
||
|
||
mock_reddit_posts_away = [
|
||
{
|
||
"post_id": f"reddit_away_{match.id}_{i}",
|
||
"title": f"{match.away_team} vs {match.home_team} - Match Preview",
|
||
"text": f"Prediction: {match.away_team} will win 3-2",
|
||
"author": f"u/redditor_away_{i}",
|
||
"created_at": datetime.now(),
|
||
"upvote_count": 12 + i,
|
||
"downvote_count": 3
|
||
}
|
||
for i in range(6)
|
||
]
|
||
|
||
reddit_sentiments_away = []
|
||
for post_data in mock_reddit_posts_away:
|
||
text_lower = post_data["text"].lower()
|
||
pos_count = sum(1 for word in ["win", "gagner"] if word in text_lower)
|
||
neg_count = sum(1 for word in ["lose", "perdre"] if word in text_lower)
|
||
|
||
if pos_count > neg_count:
|
||
sentiment = "positive"
|
||
elif neg_count > pos_count:
|
||
sentiment = "negative"
|
||
else:
|
||
sentiment = "neutral"
|
||
|
||
reddit_sentiments_away.append({
|
||
"post_id": post_data["post_id"],
|
||
"sentiment": sentiment,
|
||
"positive_score": pos_count * 0.3,
|
||
"negative_score": neg_count * 0.3,
|
||
"neutral_score": 1 - (pos_count + neg_count) * 0.3,
|
||
"compound_score": (pos_count - neg_count) * 0.3
|
||
})
|
||
|
||
energy_data_away["reddit_sentiments"] = reddit_sentiments_away
|
||
energy_data_away["tweets_with_timestamps"] = [
|
||
{"tweet_id": t["tweet_id"], "created_at": datetime.now()}
|
||
for t in tweet_sentiments_away[:5]
|
||
]
|
||
|
||
energy_result_away = calculate_energy_score(
|
||
match_id=match.id,
|
||
team_id=1,
|
||
twitter_sentiments=tweet_sentiments_away,
|
||
reddit_sentiments=reddit_sentiments_away,
|
||
rss_sentiments=[],
|
||
tweets_with_timestamps=energy_data_away["tweets_with_timestamps"]
|
||
)
|
||
|
||
logger.info(f" ⚡ Score d'énergie équipe visiteur: {energy_result_away['score']:.2f}")
|
||
logger.info(f" 📊 Confiance: {energy_result_away['confidence']:.1f}%")
|
||
|
||
# Générer une prédiction
|
||
logger.info("🎯 Génération de la prédiction...")
|
||
|
||
home_energy = energy_result['score']
|
||
away_energy = energy_result_away['score']
|
||
|
||
# Service de prédiction
|
||
prediction_service = PredictionService(db)
|
||
prediction = prediction_service.create_prediction_for_match(
|
||
match_id=match.id,
|
||
home_energy=home_energy,
|
||
away_energy=away_energy,
|
||
energy_score_label="high" if home_energy > 50 else "medium"
|
||
)
|
||
|
||
logger.info(f" 🏆 Vainqueur prédit: {prediction.predicted_winner}")
|
||
logger.info(f" 📊 Confidence: {prediction.confidence}")
|
||
logger.info(f" ✅ Prédiction créée avec succès !")
|
||
|
||
logger.info("="*50)
|
||
|
||
logger.info(f"\n✅ Pipeline complet terminé : {len(matches)} prédictions générées")
|
||
|
||
# Résumé
|
||
print("\n" + "="*70)
|
||
print("🎊 RÉSUMÉ DU PIPELINE")
|
||
print("="*70)
|
||
print(f"📊 Matchs créés: {len(matches)}")
|
||
print(f"🐦 Tweets analysés: {len(matches) * 18}") # ~10 tweets home + ~8 tweets away
|
||
print(f"📝 Posts Reddit analysés: {len(matches) * 11}") # ~5 posts home + ~6 posts away
|
||
print(f"⚡ Scores d'énergie calculés: {len(matches) * 2}") # 2 équipes par match
|
||
print(f"🎯 Prédictions générées: {len(matches)}")
|
||
print("="*70 + "\n")
|
||
print("🌐 API disponible sur: http://127.0.0.1:8000/docs")
|
||
print("📊 Dashboard disponible sur: http://localhost:3000")
|
||
print("\n📝 Prochaine étape:")
|
||
print(" 1. Accédez à http://localhost:3000/dashboard")
|
||
print(" 2. Connectez-vous avec votre compte utilisateur")
|
||
print(" 3. Voyez les prédictions dans le dashboard !")
|
||
print("\n")
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Erreur lors du pipeline: {e}")
|
||
db.rollback()
|
||
finally:
|
||
db.close()
|
||
|
||
|
||
def print_menu():
|
||
"""Affiche le menu principal."""
|
||
print("\n" + "="*70)
|
||
print("🚀 SYSTÈME CHARTBASTAN - MENU PRINCIPAL")
|
||
print("="*70)
|
||
print("\n📋 OPTIONS DISPONIBLES:")
|
||
print("\n1️⃣ Initialiser la base de données avec des matchs de test")
|
||
print("2️⃣ Simuler le pipeline complet (scraping → analyse → énergie → prédictions)")
|
||
print("3️⃣ Vérifier la connexion RabbitMQ")
|
||
print("4️⃣ Lancer le serveur FastAPI uniquement")
|
||
print("0️⃣ Tout lancer (matchs + pipeline + API)")
|
||
print("\n📝 CONFIGURATION:")
|
||
print(" - Variables d'environnement dans backend/.env")
|
||
print(" - RabbitMQ doit être lancé avec Docker avant d'utiliser les queues")
|
||
print(" - API externe (Twitter/Reddit) optionnelle pour le scraping réel")
|
||
print("\n🚀 DOCKER RABBITMQ (si nécessaire):")
|
||
print(" docker run -d --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:3-management")
|
||
print("\n" + "="*70 + "\n")
|
||
|
||
|
||
def main():
|
||
"""Point d'entrée principal."""
|
||
|
||
print_menu()
|
||
|
||
try:
|
||
choice = input("\n👉 Choisissez une option (0-4): ").strip()
|
||
|
||
if choice == "1":
|
||
# Créer des matchs de test
|
||
create_test_matches()
|
||
|
||
elif choice == "2":
|
||
# Simuler le pipeline complet
|
||
simulate_full_pipeline()
|
||
|
||
elif choice == "3":
|
||
# Vérifier RabbitMQ
|
||
if check_rabbitmq_status():
|
||
print("\n✅ RabbitMQ est opérationnel !\n")
|
||
else:
|
||
print("\n❌ RabbitMQ n'est pas accessible. Veuillez le démarrer.\n")
|
||
|
||
elif choice == "4":
|
||
# Lancer le serveur FastAPI
|
||
print("\n🚀 Démarrage du serveur FastAPI...")
|
||
print("📊 Utilisez plutôt: uvicorn app.app_new:app --host 127.0.0.1 --port 8000")
|
||
|
||
elif choice == "0":
|
||
# Tout lancer
|
||
print("\n🚀 Lancement complet du système...\n")
|
||
check_rabbitmq_status()
|
||
print("\n" + "-"*70)
|
||
print("Étape 1/3: Création des matchs de test")
|
||
create_test_matches()
|
||
print("\n" + "-"*70)
|
||
print("Étape 2/3: Simulation du pipeline complet")
|
||
simulate_full_pipeline()
|
||
print("\n" + "-"*70)
|
||
print("Étape 3/3: Démarrage du serveur FastAPI")
|
||
print("🌐 Serveur prêt sur: http://127.0.0.1:8000")
|
||
print("📖 Documentation: http://127.0.0.1:8000/docs")
|
||
print("\n✅ Système complet démarré !")
|
||
print("📊 Accédez au dashboard: http://localhost:3000/dashboard")
|
||
print("\n💡 Pour arrêter: Press Ctrl+C")
|
||
print("-"*70 + "\n")
|
||
|
||
# Attendre que l'utilisateur appuie
|
||
input("\n👉 Appuyez sur Enter pour continuer...")
|
||
|
||
else:
|
||
print("\n❌ Option invalide. Veuillez choisir entre 0 et 4.")
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n\n⏹️ Interruption par l'utilisateur")
|
||
print("👋 Au revoir !\n")
|
||
except Exception as e:
|
||
logger.error(f"❌ Erreur: {e}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|