Initial commit

This commit is contained in:
2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions

View File

@@ -0,0 +1,36 @@
"""
Queue management module.
This module provides functionality for managing RabbitMQ queues
and asynchronous task processing.
"""
from app.queues.rabbitmq_client import (
RabbitMQClient,
create_rabbitmq_client
)
from app.queues.producers import (
publish_scraping_task,
publish_sentiment_analysis_task,
publish_energy_calculation_task,
publish_result
)
from app.queues.consumers import (
consume_scraping_tasks,
consume_sentiment_analysis_tasks,
consume_energy_calculation_tasks,
consume_results
)
__all__ = [
'RabbitMQClient',
'create_rabbitmq_client',
'publish_scraping_task',
'publish_sentiment_analysis_task',
'publish_energy_calculation_task',
'publish_result',
'consume_scraping_tasks',
'consume_sentiment_analysis_tasks',
'consume_energy_calculation_tasks',
'consume_results'
]

View File

@@ -0,0 +1,246 @@
"""
Message consumers module.
This module provides functions to consume and process tasks from RabbitMQ queues.
"""
import json
import logging
from typing import Callable, Dict
from sqlalchemy.orm import Session
from app.queues.rabbitmq_client import RabbitMQClient
logger = logging.getLogger(__name__)
def consume_scraping_tasks(
client: RabbitMQClient,
callback: Callable[[Dict], Dict],
db_session_factory: Callable[[], Session]
) -> None:
"""
Consume scraping tasks from queue and process them.
Args:
client: RabbitMQ client instance
callback: Function to process scraping tasks
db_session_factory: Factory function to create DB sessions
"""
def on_message(ch, method, properties, body):
try:
# Parse message
message = json.loads(body)
task_data = message.get('data', {})
logger.info(
f"📥 Received scraping task: match_id={task_data.get('match_id')}, "
f"source={task_data.get('source')}"
)
# Create database session
db = db_session_factory()
try:
# Process task
result = callback(task_data, db)
# Publish result
from app.queues.producers import publish_scraping_result
publish_scraping_result(
client=client,
match_id=task_data.get('match_id'),
source=task_data.get('source'),
collected_count=result.get('collected_count', 0),
metadata=result.get('metadata', {})
)
# Acknowledge message
ch.basic_ack(delivery_tag=method.delivery_tag)
logger.info(f"✅ Completed scraping task for match {task_data.get('match_id')}")
except Exception as e:
logger.error(f"❌ Error processing scraping task: {e}")
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
finally:
db.close()
except Exception as e:
logger.error(f"❌ Error parsing scraping task message: {e}")
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
# Start consuming
client.consume_messages(queue_name='scraping_tasks', callback=on_message)
def consume_sentiment_analysis_tasks(
client: RabbitMQClient,
callback: Callable[[Dict], Dict],
db_session_factory: Callable[[], Session]
) -> None:
"""
Consume sentiment analysis tasks from queue and process them.
Args:
client: RabbitMQ client instance
callback: Function to process sentiment analysis tasks
db_session_factory: Factory function to create DB sessions
"""
def on_message(ch, method, properties, body):
try:
# Parse message
message = json.loads(body)
task_data = message.get('data', {})
logger.info(
f"📥 Received sentiment analysis task: "
f"match_id={task_data.get('match_id')}, "
f"source={task_data.get('source')}"
)
# Create database session
db = db_session_factory()
try:
# Process task
result = callback(task_data, db)
# Publish result
from app.queues.producers import publish_sentiment_analysis_result
publish_sentiment_analysis_result(
client=client,
match_id=task_data.get('match_id'),
source=task_data.get('source'),
analyzed_count=result.get('analyzed_count', 0),
metrics=result.get('metrics', {}),
metadata=result.get('metadata', {})
)
# Acknowledge message
ch.basic_ack(delivery_tag=method.delivery_tag)
logger.info(
f"✅ Completed sentiment analysis task for "
f"match {task_data.get('match_id')}"
)
except Exception as e:
logger.error(f"❌ Error processing sentiment analysis task: {e}")
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
finally:
db.close()
except Exception as e:
logger.error(f"❌ Error parsing sentiment analysis task message: {e}")
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
# Start consuming
client.consume_messages(
queue_name='sentiment_analysis_tasks',
callback=on_message
)
def consume_energy_calculation_tasks(
client: RabbitMQClient,
callback: Callable[[Dict], Dict],
db_session_factory: Callable[[], Session]
) -> None:
"""
Consume energy calculation tasks from queue and process them.
Args:
client: RabbitMQ client instance
callback: Function to process energy calculation tasks
db_session_factory: Factory function to create DB sessions
"""
def on_message(ch, method, properties, body):
try:
# Parse message
message = json.loads(body)
task_data = message.get('data', {})
logger.info(
f"📥 Received energy calculation task: "
f"match_id={task_data.get('match_id')}, "
f"team_id={task_data.get('team_id')}"
)
# Create database session
db = db_session_factory()
try:
# Process task
result = callback(task_data, db)
# Publish result
from app.queues.producers import publish_energy_calculation_result
publish_energy_calculation_result(
client=client,
match_id=task_data.get('match_id'),
team_id=task_data.get('team_id'),
energy_score=result.get('energy_score', 0.0),
confidence=result.get('confidence', 0.0),
sources_used=result.get('sources_used', []),
metadata=result.get('metadata', {})
)
# Acknowledge message
ch.basic_ack(delivery_tag=method.delivery_tag)
logger.info(
f"✅ Completed energy calculation task for "
f"match {task_data.get('match_id')}, "
f"team {task_data.get('team_id')}"
)
except Exception as e:
logger.error(f"❌ Error processing energy calculation task: {e}")
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
finally:
db.close()
except Exception as e:
logger.error(f"❌ Error parsing energy calculation task message: {e}")
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
# Start consuming
client.consume_messages(
queue_name='energy_calculation_tasks',
callback=on_message
)
def consume_results(
client: RabbitMQClient,
callback: Callable[[Dict], None]
) -> None:
"""
Consume results from results queue.
Args:
client: RabbitMQ client instance
callback: Function to process results
"""
def on_message(ch, method, properties, body):
try:
# Parse message
message = json.loads(body)
result_data = message.get('data', {})
logger.info(
f"📥 Received result: type={result_data.get('result_type')}, "
f"match_id={result_data.get('data', {}).get('match_id')}"
)
# Process result
callback(result_data)
# Acknowledge message
ch.basic_ack(delivery_tag=method.delivery_tag)
logger.info(f"✅ Processed {result_data.get('result_type')} result")
except Exception as e:
logger.error(f"❌ Error processing result: {e}")
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=False)
# Start consuming
client.consume_messages(queue_name='results', callback=on_message)

View File

@@ -0,0 +1,268 @@
"""
Message producers module.
This module provides functions to publish tasks and results to RabbitMQ queues.
"""
import logging
from typing import Dict, List, Optional
from datetime import datetime
from app.queues.rabbitmq_client import RabbitMQClient
logger = logging.getLogger(__name__)
def publish_scraping_task(
client: RabbitMQClient,
match_id: int,
source: str,
keywords: Optional[List[str]] = None,
priority: str = "normal"
) -> None:
"""
Publish a scraping task to the queue.
Args:
client: RabbitMQ client instance
match_id: Match identifier
source: Source to scrape ('twitter', 'reddit', 'rss')
keywords: Optional list of keywords for filtering
priority: Task priority ('low', 'normal', 'high', 'vip')
"""
task = {
"task_type": "scraping",
"match_id": match_id,
"source": source,
"keywords": keywords or [],
"priority": priority,
"created_at": datetime.utcnow().isoformat()
}
client.publish_message(
queue_name='scraping_tasks',
data=task,
event_type="scraping.task.created"
)
logger.info(
f"📤 Published scraping task for match {match_id} "
f"(source: {source}, priority: {priority})"
)
def publish_sentiment_analysis_task(
client: RabbitMQClient,
match_id: int,
source: str,
entity_ids: List[str],
texts: Optional[List[str]] = None
) -> None:
"""
Publish a sentiment analysis task to the queue.
Args:
client: RabbitMQ client instance
match_id: Match identifier
source: Source type ('twitter', 'reddit')
entity_ids: List of entity IDs to analyze
texts: Optional list of texts (if not fetched from DB)
"""
task = {
"task_type": "sentiment_analysis",
"match_id": match_id,
"source": source,
"entity_ids": entity_ids,
"texts": texts or [],
"created_at": datetime.utcnow().isoformat()
}
client.publish_message(
queue_name='sentiment_analysis_tasks',
data=task,
event_type="sentiment_analysis.task.created"
)
logger.info(
f"📤 Published sentiment analysis task for match {match_id} "
f"(source: {source}, entities: {len(entity_ids)})"
)
def publish_energy_calculation_task(
client: RabbitMQClient,
match_id: int,
team_id: int,
twitter_sentiments: Optional[List[Dict]] = None,
reddit_sentiments: Optional[List[Dict]] = None,
rss_sentiments: Optional[List[Dict]] = None,
tweets_with_timestamps: Optional[List[Dict]] = None
) -> None:
"""
Publish an energy calculation task to the queue.
Args:
client: RabbitMQ client instance
match_id: Match identifier
team_id: Team identifier
twitter_sentiments: Optional list of Twitter sentiment scores
reddit_sentiments: Optional list of Reddit sentiment scores
rss_sentiments: Optional list of RSS sentiment scores
tweets_with_timestamps: Optional list of tweets with timestamps
"""
task = {
"task_type": "energy_calculation",
"match_id": match_id,
"team_id": team_id,
"twitter_sentiments": twitter_sentiments or [],
"reddit_sentiments": reddit_sentiments or [],
"rss_sentiments": rss_sentiments or [],
"tweets_with_timestamps": tweets_with_timestamps or [],
"created_at": datetime.utcnow().isoformat()
}
client.publish_message(
queue_name='energy_calculation_tasks',
data=task,
event_type="energy_calculation.task.created"
)
logger.info(
f"📤 Published energy calculation task for match {match_id}, "
f"team {team_id}"
)
def publish_result(
client: RabbitMQClient,
result_type: str,
data: Dict
) -> None:
"""
Publish a result to the results queue.
Args:
client: RabbitMQ client instance
result_type: Type of result ('scraping', 'sentiment', 'energy')
data: Result data
"""
result = {
"result_type": result_type,
"data": data,
"created_at": datetime.utcnow().isoformat()
}
client.publish_message(
queue_name='results',
data=result,
event_type="result.published"
)
logger.info(
f"📤 Published {result_type} result to results queue"
)
def publish_scraping_result(
client: RabbitMQClient,
match_id: int,
source: str,
collected_count: int,
metadata: Optional[Dict] = None
) -> None:
"""
Publish a scraping result.
Args:
client: RabbitMQ client instance
match_id: Match identifier
source: Source scraped ('twitter', 'reddit', 'rss')
collected_count: Number of items collected
metadata: Optional additional metadata
"""
result_data = {
"match_id": match_id,
"source": source,
"collected_count": collected_count,
"status": "success",
"metadata": metadata or {}
}
publish_result(
client=client,
result_type="scraping",
data=result_data
)
def publish_sentiment_analysis_result(
client: RabbitMQClient,
match_id: int,
source: str,
analyzed_count: int,
metrics: Dict,
metadata: Optional[Dict] = None
) -> None:
"""
Publish a sentiment analysis result.
Args:
client: RabbitMQ client instance
match_id: Match identifier
source: Source analyzed ('twitter', 'reddit')
analyzed_count: Number of items analyzed
metrics: Aggregated sentiment metrics
metadata: Optional additional metadata
"""
result_data = {
"match_id": match_id,
"source": source,
"analyzed_count": analyzed_count,
"metrics": metrics,
"status": "success",
"metadata": metadata or {}
}
publish_result(
client=client,
result_type="sentiment",
data=result_data
)
def publish_energy_calculation_result(
client: RabbitMQClient,
match_id: int,
team_id: int,
energy_score: float,
confidence: float,
sources_used: List[str],
metadata: Optional[Dict] = None
) -> None:
"""
Publish an energy calculation result.
Args:
client: RabbitMQ client instance
match_id: Match identifier
team_id: Team identifier
energy_score: Calculated energy score
confidence: Confidence level
sources_used: List of sources used in calculation
metadata: Optional additional metadata
"""
result_data = {
"match_id": match_id,
"team_id": team_id,
"energy_score": energy_score,
"confidence": confidence,
"sources_used": sources_used,
"status": "success",
"metadata": metadata or {}
}
publish_result(
client=client,
result_type="energy",
data=result_data
)

View File

@@ -0,0 +1,238 @@
"""
RabbitMQ client module.
This module provides a RabbitMQ client with connection management
and queue declaration functionality.
"""
import json
import logging
from datetime import datetime
from typing import Dict, Optional, Callable
import pika
from pika.exceptions import AMQPConnectionError, AMQPChannelError
logger = logging.getLogger(__name__)
class RabbitMQClient:
"""
RabbitMQ client with connection management and queue declaration.
Features:
- Connection management with reconnection logic
- Queue declaration with durability
- Message publishing with standard event format
- Task consumption with error handling
- Automatic acknowledgment management
"""
def __init__(
self,
rabbitmq_url: str = "amqp://guest:guest@localhost:5672",
prefetch_count: int = 1
):
"""
Initialize RabbitMQ client.
Args:
rabbitmq_url: RabbitMQ connection URL
prefetch_count: Number of unacknowledged messages to prefetch
"""
self.rabbitmq_url = rabbitmq_url
self.prefetch_count = prefetch_count
self.connection: Optional[pika.BlockingConnection] = None
self.channel: Optional[pika.adapters.blocking_connection.BlockingChannel] = None
# Queue names
self.queues = {
'scraping_tasks': 'scraping_tasks',
'sentiment_analysis_tasks': 'sentiment_analysis_tasks',
'energy_calculation_tasks': 'energy_calculation_tasks',
'results': 'results'
}
def connect(self) -> None:
"""
Establish connection to RabbitMQ server.
Raises:
AMQPConnectionError: If connection fails
"""
try:
logger.info(f"🔗 Connecting to RabbitMQ at {self.rabbitmq_url}")
# Create connection
self.connection = pika.BlockingConnection(
pika.URLParameters(self.rabbitmq_url)
)
self.channel = self.connection.channel()
# Set prefetch count for fair dispatch
self.channel.basic_qos(prefetch_count=self.prefetch_count)
# Declare queues
self._declare_queues()
logger.info("✅ Connected to RabbitMQ successfully")
except AMQPConnectionError as e:
logger.error(f"❌ Failed to connect to RabbitMQ: {e}")
raise
except AMQPChannelError as e:
logger.error(f"❌ Failed to create RabbitMQ channel: {e}")
raise
def _declare_queues(self) -> None:
"""
Declare all required queues with durability.
"""
for queue_name in self.queues.values():
self.channel.queue_declare(
queue=queue_name,
durable=True
)
logger.debug(f"✅ Declared queue: {queue_name}")
def close(self) -> None:
"""Close connection to RabbitMQ server."""
if self.channel:
self.channel.close()
if self.connection:
self.connection.close()
logger.info("🔌 Closed RabbitMQ connection")
def publish_message(
self,
queue_name: str,
data: Dict,
event_type: str = "task.created",
version: str = "1.0"
) -> None:
"""
Publish a message to a queue with standard event format.
Args:
queue_name: Target queue name
data: Message payload (will be JSON serialized)
event_type: Event type for message header
version: Event version
"""
if not self.channel:
raise RuntimeError("RabbitMQ channel not initialized. Call connect() first.")
# Create standard event message format
message = {
"event": event_type,
"version": version,
"timestamp": datetime.utcnow().isoformat(),
"data": data,
"metadata": {
"source": "api",
"queue": queue_name
}
}
try:
self.channel.basic_publish(
exchange='',
routing_key=queue_name,
body=json.dumps(message, ensure_ascii=False, default=str),
properties=pika.BasicProperties(
delivery_mode=2, # Make message persistent
content_type='application/json'
)
)
logger.debug(f"📤 Published message to {queue_name}: {event_type}")
except Exception as e:
logger.error(f"❌ Failed to publish message to {queue_name}: {e}")
raise
def consume_messages(
self,
queue_name: str,
callback: Callable
) -> None:
"""
Start consuming messages from a queue.
Args:
queue_name: Queue to consume from
callback: Callback function to process messages
"""
if not self.channel:
raise RuntimeError("RabbitMQ channel not initialized. Call connect() first.")
try:
logger.info(f"👂 Starting to consume from queue: {queue_name}")
self.channel.basic_consume(
queue=queue_name,
on_message_callback=callback,
auto_ack=False
)
self.channel.start_consuming()
except KeyboardInterrupt:
logger.info("⏹️ Stopping consumer...")
self.channel.stop_consuming()
except Exception as e:
logger.error(f"❌ Error consuming from {queue_name}: {e}")
raise
def ack_message(self, delivery_tag: int) -> None:
"""
Acknowledge a message as processed.
Args:
delivery_tag: Message delivery tag
"""
if not self.channel:
raise RuntimeError("RabbitMQ channel not initialized.")
self.channel.basic_ack(delivery_tag=delivery_tag)
def reject_message(
self,
delivery_tag: int,
requeue: bool = False
) -> None:
"""
Reject a message (e.g., on processing failure).
Args:
delivery_tag: Message delivery tag
requeue: Whether to requeue the message
"""
if not self.channel:
raise RuntimeError("RabbitMQ channel not initialized.")
self.channel.basic_reject(delivery_tag=delivery_tag, requeue=requeue)
def create_rabbitmq_client(
rabbitmq_url: Optional[str] = None,
prefetch_count: int = 1
) -> RabbitMQClient:
"""
Factory function to create a RabbitMQ client.
Args:
rabbitmq_url: Optional RabbitMQ connection URL (defaults to localhost)
prefetch_count: Number of unacknowledged messages to prefetch
Returns:
Configured RabbitMQClient instance
"""
if rabbitmq_url is None:
# TODO: Load from environment variables or config file
rabbitmq_url = "amqp://guest:guest@localhost:5672"
client = RabbitMQClient(
rabbitmq_url=rabbitmq_url,
prefetch_count=prefetch_count
)
return client