Initial commit

2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions
--- a/backend/app/queues/init.py
+++ b/backend/app/queues/init.py
@@ -0,0 +1,36 @@
+"""
+Queue management module.
+
+This module provides functionality for managing RabbitMQ queues
+and asynchronous task processing.
+"""
+
+from app.queues.rabbitmq_client import (
+    RabbitMQClient,
+    create_rabbitmq_client
+)
+from app.queues.producers import (
+    publish_scraping_task,
+    publish_sentiment_analysis_task,
+    publish_energy_calculation_task,
+    publish_result
+)
+from app.queues.consumers import (
+    consume_scraping_tasks,
+    consume_sentiment_analysis_tasks,
+    consume_energy_calculation_tasks,
+    consume_results
+)
+
+__all__ = [
+    'RabbitMQClient',
+    'create_rabbitmq_client',
+    'publish_scraping_task',
+    'publish_sentiment_analysis_task',
+    'publish_energy_calculation_task',
+    'publish_result',
+    'consume_scraping_tasks',
+    'consume_sentiment_analysis_tasks',
+    'consume_energy_calculation_tasks',
+    'consume_results'
+]
--- a/backend/app/queues/consumers.py
+++ b/backend/app/queues/consumers.py
@@ -0,0 +1,246 @@
+"""
+Message consumers module.
+
+This module provides functions to consume and process tasks from RabbitMQ queues.
+"""
+
+import json
+import logging
+from typing import Callable, Dict
+from sqlalchemy.orm import Session
+
+from app.queues.rabbitmq_client import RabbitMQClient
+
+logger = logging.getLogger(__name__)
+
+
+def consume_scraping_tasks(
+    client: RabbitMQClient,
+    callback: Callable[[Dict], Dict],
+    db_session_factory: Callable[[], Session]
+) -> None:
+    """
+    Consume scraping tasks from queue and process them.
+    
+    Args:
+        client: RabbitMQ client instance
+        callback: Function to process scraping tasks
+        db_session_factory: Factory function to create DB sessions
+    """
+    def on_message(ch, method, properties, body):
+        try:
+            # Parse message
+            message = json.loads(body)
+            task_data = message.get('data', {})
+            
+            logger.info(
+                f"📥 Received scraping task: match_id={task_data.get('match_id')}, "
+                f"source={task_data.get('source')}"
+            )
+            
+            # Create database session
+            db = db_session_factory()
+            
+            try:
+                # Process task
+                result = callback(task_data, db)
+                
+                # Publish result
+                from app.queues.producers import publish_scraping_result
+                publish_scraping_result(
+                    client=client,
+                    match_id=task_data.get('match_id'),
+                    source=task_data.get('source'),
+                    collected_count=result.get('collected_count', 0),
+                    metadata=result.get('metadata', {})
+                )
+                
+                # Acknowledge message
+                ch.basic_ack(delivery_tag=method.delivery_tag)
+                logger.info(f"✅ Completed scraping task for match {task_data.get('match_id')}")
+                
+            except Exception as e:
+                logger.error(f"❌ Error processing scraping task: {e}")
+                ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
+            finally:
+                db.close()
+                
+        except Exception as e:
+            logger.error(f"❌ Error parsing scraping task message: {e}")
+            ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
+    
+    # Start consuming
+    client.consume_messages(queue_name='scraping_tasks', callback=on_message)
+
+
+def consume_sentiment_analysis_tasks(
+    client: RabbitMQClient,
+    callback: Callable[[Dict], Dict],
+    db_session_factory: Callable[[], Session]
+) -> None:
+    """
+    Consume sentiment analysis tasks from queue and process them.
+    
+    Args:
+        client: RabbitMQ client instance
+        callback: Function to process sentiment analysis tasks
+        db_session_factory: Factory function to create DB sessions
+    """
+    def on_message(ch, method, properties, body):
+        try:
+            # Parse message
+            message = json.loads(body)
+            task_data = message.get('data', {})
+            
+            logger.info(
+                f"📥 Received sentiment analysis task: "
+                f"match_id={task_data.get('match_id')}, "
+                f"source={task_data.get('source')}"
+            )
+            
+            # Create database session
+            db = db_session_factory()
+            
+            try:
+                # Process task
+                result = callback(task_data, db)
+                
+                # Publish result
+                from app.queues.producers import publish_sentiment_analysis_result
+                publish_sentiment_analysis_result(
+                    client=client,
+                    match_id=task_data.get('match_id'),
+                    source=task_data.get('source'),
+                    analyzed_count=result.get('analyzed_count', 0),
+                    metrics=result.get('metrics', {}),
+                    metadata=result.get('metadata', {})
+                )
+                
+                # Acknowledge message
+                ch.basic_ack(delivery_tag=method.delivery_tag)
+                logger.info(
+                    f"✅ Completed sentiment analysis task for "
+                    f"match {task_data.get('match_id')}"
+                )
+                
+            except Exception as e:
+                logger.error(f"❌ Error processing sentiment analysis task: {e}")
+                ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
+            finally:
+                db.close()
+                
+        except Exception as e:
+            logger.error(f"❌ Error parsing sentiment analysis task message: {e}")
+            ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
+    
+    # Start consuming
+    client.consume_messages(
+        queue_name='sentiment_analysis_tasks',
+        callback=on_message
+    )
+
+
+def consume_energy_calculation_tasks(
+    client: RabbitMQClient,
+    callback: Callable[[Dict], Dict],
+    db_session_factory: Callable[[], Session]
+) -> None:
+    """
+    Consume energy calculation tasks from queue and process them.
+    
+    Args:
+        client: RabbitMQ client instance
+        callback: Function to process energy calculation tasks
+        db_session_factory: Factory function to create DB sessions
+    """
+    def on_message(ch, method, properties, body):
+        try:
+            # Parse message
+            message = json.loads(body)
+            task_data = message.get('data', {})
+            
+            logger.info(
+                f"📥 Received energy calculation task: "
+                f"match_id={task_data.get('match_id')}, "
+                f"team_id={task_data.get('team_id')}"
+            )
+            
+            # Create database session
+            db = db_session_factory()
+            
+            try:
+                # Process task
+                result = callback(task_data, db)
+                
+                # Publish result
+                from app.queues.producers import publish_energy_calculation_result
+                publish_energy_calculation_result(
+                    client=client,
+                    match_id=task_data.get('match_id'),
+                    team_id=task_data.get('team_id'),
+                    energy_score=result.get('energy_score', 0.0),
+                    confidence=result.get('confidence', 0.0),
+                    sources_used=result.get('sources_used', []),
+                    metadata=result.get('metadata', {})
+                )
+                
+                # Acknowledge message
+                ch.basic_ack(delivery_tag=method.delivery_tag)
+                logger.info(
+                    f"✅ Completed energy calculation task for "
+                    f"match {task_data.get('match_id')}, "
+                    f"team {task_data.get('team_id')}"
+                )
+                
+            except Exception as e:
+                logger.error(f"❌ Error processing energy calculation task: {e}")
+                ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
+            finally:
+                db.close()
+                
+        except Exception as e:
+            logger.error(f"❌ Error parsing energy calculation task message: {e}")
+            ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
+    
+    # Start consuming
+    client.consume_messages(
+        queue_name='energy_calculation_tasks',
+        callback=on_message
+    )
+
+
+def consume_results(
+    client: RabbitMQClient,
+    callback: Callable[[Dict], None]
+) -> None:
+    """
+    Consume results from results queue.
+    
+    Args:
+        client: RabbitMQ client instance
+        callback: Function to process results
+    """
+    def on_message(ch, method, properties, body):
+        try:
+            # Parse message
+            message = json.loads(body)
+            result_data = message.get('data', {})
+            
+            logger.info(
+                f"📥 Received result: type={result_data.get('result_type')}, "
+                f"match_id={result_data.get('data', {}).get('match_id')}"
+            )
+            
+            # Process result
+            callback(result_data)
+            
+            # Acknowledge message
+            ch.basic_ack(delivery_tag=method.delivery_tag)
+            logger.info(f"✅ Processed {result_data.get('result_type')} result")
+            
+        except Exception as e:
+            logger.error(f"❌ Error processing result: {e}")
+            ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
+    
+    # Start consuming
+    client.consume_messages(queue_name='results', callback=on_message)
--- a/backend/app/queues/producers.py
+++ b/backend/app/queues/producers.py
@@ -0,0 +1,268 @@
+"""
+Message producers module.
+
+This module provides functions to publish tasks and results to RabbitMQ queues.
+"""
+
+import logging
+from typing import Dict, List, Optional
+from datetime import datetime
+
+from app.queues.rabbitmq_client import RabbitMQClient
+
+logger = logging.getLogger(__name__)
+
+
+def publish_scraping_task(
+    client: RabbitMQClient,
+    match_id: int,
+    source: str,
+    keywords: Optional[List[str]] = None,
+    priority: str = "normal"
+) -> None:
+    """
+    Publish a scraping task to the queue.
+    
+    Args:
+        client: RabbitMQ client instance
+        match_id: Match identifier
+        source: Source to scrape ('twitter', 'reddit', 'rss')
+        keywords: Optional list of keywords for filtering
+        priority: Task priority ('low', 'normal', 'high', 'vip')
+    """
+    task = {
+        "task_type": "scraping",
+        "match_id": match_id,
+        "source": source,
+        "keywords": keywords or [],
+        "priority": priority,
+        "created_at": datetime.utcnow().isoformat()
+    }
+    
+    client.publish_message(
+        queue_name='scraping_tasks',
+        data=task,
+        event_type="scraping.task.created"
+    )
+    
+    logger.info(
+        f"📤 Published scraping task for match {match_id} "
+        f"(source: {source}, priority: {priority})"
+    )
+
+
+def publish_sentiment_analysis_task(
+    client: RabbitMQClient,
+    match_id: int,
+    source: str,
+    entity_ids: List[str],
+    texts: Optional[List[str]] = None
+) -> None:
+    """
+    Publish a sentiment analysis task to the queue.
+    
+    Args:
+        client: RabbitMQ client instance
+        match_id: Match identifier
+        source: Source type ('twitter', 'reddit')
+        entity_ids: List of entity IDs to analyze
+        texts: Optional list of texts (if not fetched from DB)
+    """
+    task = {
+        "task_type": "sentiment_analysis",
+        "match_id": match_id,
+        "source": source,
+        "entity_ids": entity_ids,
+        "texts": texts or [],
+        "created_at": datetime.utcnow().isoformat()
+    }
+    
+    client.publish_message(
+        queue_name='sentiment_analysis_tasks',
+        data=task,
+        event_type="sentiment_analysis.task.created"
+    )
+    
+    logger.info(
+        f"📤 Published sentiment analysis task for match {match_id} "
+        f"(source: {source}, entities: {len(entity_ids)})"
+    )
+
+
+def publish_energy_calculation_task(
+    client: RabbitMQClient,
+    match_id: int,
+    team_id: int,
+    twitter_sentiments: Optional[List[Dict]] = None,
+    reddit_sentiments: Optional[List[Dict]] = None,
+    rss_sentiments: Optional[List[Dict]] = None,
+    tweets_with_timestamps: Optional[List[Dict]] = None
+) -> None:
+    """
+    Publish an energy calculation task to the queue.
+    
+    Args:
+        client: RabbitMQ client instance
+        match_id: Match identifier
+        team_id: Team identifier
+        twitter_sentiments: Optional list of Twitter sentiment scores
+        reddit_sentiments: Optional list of Reddit sentiment scores
+        rss_sentiments: Optional list of RSS sentiment scores
+        tweets_with_timestamps: Optional list of tweets with timestamps
+    """
+    task = {
+        "task_type": "energy_calculation",
+        "match_id": match_id,
+        "team_id": team_id,
+        "twitter_sentiments": twitter_sentiments or [],
+        "reddit_sentiments": reddit_sentiments or [],
+        "rss_sentiments": rss_sentiments or [],
+        "tweets_with_timestamps": tweets_with_timestamps or [],
+        "created_at": datetime.utcnow().isoformat()
+    }
+    
+    client.publish_message(
+        queue_name='energy_calculation_tasks',
+        data=task,
+        event_type="energy_calculation.task.created"
+    )
+    
+    logger.info(
+        f"📤 Published energy calculation task for match {match_id}, "
+        f"team {team_id}"
+    )
+
+
+def publish_result(
+    client: RabbitMQClient,
+    result_type: str,
+    data: Dict
+) -> None:
+    """
+    Publish a result to the results queue.
+    
+    Args:
+        client: RabbitMQ client instance
+        result_type: Type of result ('scraping', 'sentiment', 'energy')
+        data: Result data
+    """
+    result = {
+        "result_type": result_type,
+        "data": data,
+        "created_at": datetime.utcnow().isoformat()
+    }
+    
+    client.publish_message(
+        queue_name='results',
+        data=result,
+        event_type="result.published"
+    )
+    
+    logger.info(
+        f"📤 Published {result_type} result to results queue"
+    )
+
+
+def publish_scraping_result(
+    client: RabbitMQClient,
+    match_id: int,
+    source: str,
+    collected_count: int,
+    metadata: Optional[Dict] = None
+) -> None:
+    """
+    Publish a scraping result.
+    
+    Args:
+        client: RabbitMQ client instance
+        match_id: Match identifier
+        source: Source scraped ('twitter', 'reddit', 'rss')
+        collected_count: Number of items collected
+        metadata: Optional additional metadata
+    """
+    result_data = {
+        "match_id": match_id,
+        "source": source,
+        "collected_count": collected_count,
+        "status": "success",
+        "metadata": metadata or {}
+    }
+    
+    publish_result(
+        client=client,
+        result_type="scraping",
+        data=result_data
+    )
+
+
+def publish_sentiment_analysis_result(
+    client: RabbitMQClient,
+    match_id: int,
+    source: str,
+    analyzed_count: int,
+    metrics: Dict,
+    metadata: Optional[Dict] = None
+) -> None:
+    """
+    Publish a sentiment analysis result.
+    
+    Args:
+        client: RabbitMQ client instance
+        match_id: Match identifier
+        source: Source analyzed ('twitter', 'reddit')
+        analyzed_count: Number of items analyzed
+        metrics: Aggregated sentiment metrics
+        metadata: Optional additional metadata
+    """
+    result_data = {
+        "match_id": match_id,
+        "source": source,
+        "analyzed_count": analyzed_count,
+        "metrics": metrics,
+        "status": "success",
+        "metadata": metadata or {}
+    }
+    
+    publish_result(
+        client=client,
+        result_type="sentiment",
+        data=result_data
+    )
+
+
+def publish_energy_calculation_result(
+    client: RabbitMQClient,
+    match_id: int,
+    team_id: int,
+    energy_score: float,
+    confidence: float,
+    sources_used: List[str],
+    metadata: Optional[Dict] = None
+) -> None:
+    """
+    Publish an energy calculation result.
+    
+    Args:
+        client: RabbitMQ client instance
+        match_id: Match identifier
+        team_id: Team identifier
+        energy_score: Calculated energy score
+        confidence: Confidence level
+        sources_used: List of sources used in calculation
+        metadata: Optional additional metadata
+    """
+    result_data = {
+        "match_id": match_id,
+        "team_id": team_id,
+        "energy_score": energy_score,
+        "confidence": confidence,
+        "sources_used": sources_used,
+        "status": "success",
+        "metadata": metadata or {}
+    }
+    
+    publish_result(
+        client=client,
+        result_type="energy",
+        data=result_data
+    )
--- a/backend/app/queues/rabbitmq_client.py
+++ b/backend/app/queues/rabbitmq_client.py
@@ -0,0 +1,238 @@
+"""
+RabbitMQ client module.
+
+This module provides a RabbitMQ client with connection management
+and queue declaration functionality.
+"""
+
+import json
+import logging
+from datetime import datetime
+from typing import Dict, Optional, Callable
+import pika
+from pika.exceptions import AMQPConnectionError, AMQPChannelError
+
+logger = logging.getLogger(__name__)
+
+
+class RabbitMQClient:
+    """
+    RabbitMQ client with connection management and queue declaration.
+    
+    Features:
+    - Connection management with reconnection logic
+    - Queue declaration with durability
+    - Message publishing with standard event format
+    - Task consumption with error handling
+    - Automatic acknowledgment management
+    """
+    
+    def __init__(
+        self,
+        rabbitmq_url: str = "amqp://guest:guest@localhost:5672",
+        prefetch_count: int = 1
+    ):
+        """
+        Initialize RabbitMQ client.
+        
+        Args:
+            rabbitmq_url: RabbitMQ connection URL
+            prefetch_count: Number of unacknowledged messages to prefetch
+        """
+        self.rabbitmq_url = rabbitmq_url
+        self.prefetch_count = prefetch_count
+        self.connection: Optional[pika.BlockingConnection] = None
+        self.channel: Optional[pika.adapters.blocking_connection.BlockingChannel] = None
+        
+        # Queue names
+        self.queues = {
+            'scraping_tasks': 'scraping_tasks',
+            'sentiment_analysis_tasks': 'sentiment_analysis_tasks',
+            'energy_calculation_tasks': 'energy_calculation_tasks',
+            'results': 'results'
+        }
+    
+    def connect(self) -> None:
+        """
+        Establish connection to RabbitMQ server.
+        
+        Raises:
+            AMQPConnectionError: If connection fails
+        """
+        try:
+            logger.info(f"🔗 Connecting to RabbitMQ at {self.rabbitmq_url}")
+            
+            # Create connection
+            self.connection = pika.BlockingConnection(
+                pika.URLParameters(self.rabbitmq_url)
+            )
+            self.channel = self.connection.channel()
+            
+            # Set prefetch count for fair dispatch
+            self.channel.basic_qos(prefetch_count=self.prefetch_count)
+            
+            # Declare queues
+            self._declare_queues()
+            
+            logger.info("✅ Connected to RabbitMQ successfully")
+            
+        except AMQPConnectionError as e:
+            logger.error(f"❌ Failed to connect to RabbitMQ: {e}")
+            raise
+        except AMQPChannelError as e:
+            logger.error(f"❌ Failed to create RabbitMQ channel: {e}")
+            raise
+    
+    def _declare_queues(self) -> None:
+        """
+        Declare all required queues with durability.
+        """
+        for queue_name in self.queues.values():
+            self.channel.queue_declare(
+                queue=queue_name,
+                durable=True
+            )
+            logger.debug(f"✅ Declared queue: {queue_name}")
+    
+    def close(self) -> None:
+        """Close connection to RabbitMQ server."""
+        if self.channel:
+            self.channel.close()
+        if self.connection:
+            self.connection.close()
+        logger.info("🔌 Closed RabbitMQ connection")
+    
+    def publish_message(
+        self,
+        queue_name: str,
+        data: Dict,
+        event_type: str = "task.created",
+        version: str = "1.0"
+    ) -> None:
+        """
+        Publish a message to a queue with standard event format.
+        
+        Args:
+            queue_name: Target queue name
+            data: Message payload (will be JSON serialized)
+            event_type: Event type for message header
+            version: Event version
+        """
+        if not self.channel:
+            raise RuntimeError("RabbitMQ channel not initialized. Call connect() first.")
+        
+        # Create standard event message format
+        message = {
+            "event": event_type,
+            "version": version,
+            "timestamp": datetime.utcnow().isoformat(),
+            "data": data,
+            "metadata": {
+                "source": "api",
+                "queue": queue_name
+            }
+        }
+        
+        try:
+            self.channel.basic_publish(
+                exchange='',
+                routing_key=queue_name,
+                body=json.dumps(message, ensure_ascii=False, default=str),
+                properties=pika.BasicProperties(
+                    delivery_mode=2,  # Make message persistent
+                    content_type='application/json'
+                )
+            )
+            logger.debug(f"📤 Published message to {queue_name}: {event_type}")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to publish message to {queue_name}: {e}")
+            raise
+    
+    def consume_messages(
+        self,
+        queue_name: str,
+        callback: Callable
+    ) -> None:
+        """
+        Start consuming messages from a queue.
+        
+        Args:
+            queue_name: Queue to consume from
+            callback: Callback function to process messages
+        """
+        if not self.channel:
+            raise RuntimeError("RabbitMQ channel not initialized. Call connect() first.")
+        
+        try:
+            logger.info(f"👂 Starting to consume from queue: {queue_name}")
+            
+            self.channel.basic_consume(
+                queue=queue_name,
+                on_message_callback=callback,
+                auto_ack=False
+            )
+            
+            self.channel.start_consuming()
+            
+        except KeyboardInterrupt:
+            logger.info("⏹️ Stopping consumer...")
+            self.channel.stop_consuming()
+        except Exception as e:
+            logger.error(f"❌ Error consuming from {queue_name}: {e}")
+            raise
+    
+    def ack_message(self, delivery_tag: int) -> None:
+        """
+        Acknowledge a message as processed.
+        
+        Args:
+            delivery_tag: Message delivery tag
+        """
+        if not self.channel:
+            raise RuntimeError("RabbitMQ channel not initialized.")
+        
+        self.channel.basic_ack(delivery_tag=delivery_tag)
+    
+    def reject_message(
+        self,
+        delivery_tag: int,
+        requeue: bool = False
+    ) -> None:
+        """
+        Reject a message (e.g., on processing failure).
+        
+        Args:
+            delivery_tag: Message delivery tag
+            requeue: Whether to requeue the message
+        """
+        if not self.channel:
+            raise RuntimeError("RabbitMQ channel not initialized.")
+        
+        self.channel.basic_reject(delivery_tag=delivery_tag, requeue=requeue)
+
+
+def create_rabbitmq_client(
+    rabbitmq_url: Optional[str] = None,
+    prefetch_count: int = 1
+) -> RabbitMQClient:
+    """
+    Factory function to create a RabbitMQ client.
+    
+    Args:
+        rabbitmq_url: Optional RabbitMQ connection URL (defaults to localhost)
+        prefetch_count: Number of unacknowledged messages to prefetch
+        
+    Returns:
+        Configured RabbitMQClient instance
+    """
+    if rabbitmq_url is None:
+        # TODO: Load from environment variables or config file
+        rabbitmq_url = "amqp://guest:guest@localhost:5672"
+    
+    client = RabbitMQClient(
+        rabbitmq_url=rabbitmq_url,
+        prefetch_count=prefetch_count
+    )
+    
+    return client