chartbastan/backend/app/workers/sentiment_worker.py
2026-02-01 09:31:38 +01:00

303 lines
9.2 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Sentiment analysis worker module.
This module provides a worker that consumes sentiment analysis tasks
from RabbitMQ and executes sentiment analysis operations.
"""
import logging
from typing import Dict, List
from sqlalchemy.orm import Session
from app.services.sentiment_service import (
process_tweet_batch,
process_reddit_post_batch,
get_sentiment_by_entity
)
from app.models.tweet import Tweet
from app.models.reddit_post import RedditPost
logger = logging.getLogger(__name__)
class SentimentWorker:
"""
Worker for processing sentiment analysis tasks.
Features:
- Consumes tasks from sentiment_analysis_tasks queue
- Executes VADER sentiment analysis
- Processes batches of tweets and Reddit posts
- Publishes results to results queue
- Handles errors with retries
- Structured logging
"""
def __init__(self):
"""Initialize sentiment analysis worker."""
# No initialization needed for sentiment worker
# VADER analyzer is initialized in sentiment_service
pass
def execute_sentiment_analysis_task(
self,
task: Dict,
db: Session
) -> Dict:
"""
Execute a sentiment analysis task.
Args:
task: Sentiment analysis task data
db: Database session
Returns:
Dictionary with sentiment analysis results
"""
source = task.get('source')
match_id = task.get('match_id')
entity_ids = task.get('entity_ids', [])
logger.info(
f"🔧 Executing sentiment analysis task: "
f"match_id={match_id}, source={source}, "
f"entities={len(entity_ids)}"
)
try:
if source == 'twitter':
return self._execute_twitter_sentiment_analysis(
match_id, entity_ids, db
)
elif source == 'reddit':
return self._execute_reddit_sentiment_analysis(
match_id, entity_ids, db
)
else:
logger.error(f"❌ Unknown sentiment source: {source}")
return {
'analyzed_count': 0,
'status': 'error',
'error': f'Unknown source: {source}'
}
except Exception as e:
logger.error(f"❌ Sentiment analysis task failed: {e}")
return {
'analyzed_count': 0,
'status': 'error',
'error': str(e)
}
def _execute_twitter_sentiment_analysis(
self,
match_id: int,
entity_ids: List[str],
db: Session
) -> Dict:
"""
Execute sentiment analysis for Twitter data.
Args:
match_id: Match identifier
entity_ids: List of tweet IDs
db: Database session
Returns:
Dictionary with sentiment analysis results
"""
try:
# Fetch tweets from database
tweets = db.query(Tweet).filter(
Tweet.tweet_id.in_(entity_ids)
).all()
if not tweets:
logger.warning(f"⚠️ No tweets found for entities: {entity_ids}")
return {
'analyzed_count': 0,
'status': 'success',
'metrics': {
'total_count': 0,
'positive_count': 0,
'negative_count': 0,
'neutral_count': 0,
'average_compound': 0.0
}
}
# Check if already analyzed
unanalyzed_tweets = []
for tweet in tweets:
existing_sentiment = get_sentiment_by_entity(
db, tweet.tweet_id, 'tweet'
)
if not existing_sentiment:
unanalyzed_tweets.append(tweet)
if not unanalyzed_tweets:
logger.info(
f" All {len(tweets)} tweets already analyzed"
)
# Get metrics from existing sentiments
metrics = self._calculate_metrics_from_existing(db, match_id)
return {
'analyzed_count': 0,
'status': 'success',
'metrics': metrics
}
# Analyze batch
sentiment_scores = process_tweet_batch(db, unanalyzed_tweets)
# Calculate metrics
metrics = self._calculate_sentiment_metrics(db, match_id)
logger.info(
f"✅ Twitter sentiment analysis completed: "
f"{len(sentiment_scores)} tweets analyzed"
)
return {
'analyzed_count': len(sentiment_scores),
'status': 'success',
'metrics': metrics
}
except Exception as e:
logger.error(f"❌ Twitter sentiment analysis failed: {e}")
return {
'analyzed_count': 0,
'status': 'error',
'error': str(e)
}
def _execute_reddit_sentiment_analysis(
self,
match_id: int,
entity_ids: List[str],
db: Session
) -> Dict:
"""
Execute sentiment analysis for Reddit data.
Args:
match_id: Match identifier
entity_ids: List of Reddit post IDs
db: Database session
Returns:
Dictionary with sentiment analysis results
"""
try:
# Fetch Reddit posts from database
posts = db.query(RedditPost).filter(
RedditPost.post_id.in_(entity_ids)
).all()
if not posts:
logger.warning(f"⚠️ No Reddit posts found for entities: {entity_ids}")
return {
'analyzed_count': 0,
'status': 'success',
'metrics': {
'total_count': 0,
'positive_count': 0,
'negative_count': 0,
'neutral_count': 0,
'average_compound': 0.0
}
}
# Check if already analyzed
unanalyzed_posts = []
for post in posts:
existing_sentiment = get_sentiment_by_entity(
db, post.post_id, 'reddit_post'
)
if not existing_sentiment:
unanalyzed_posts.append(post)
if not unanalyzed_posts:
logger.info(
f" All {len(posts)} Reddit posts already analyzed"
)
# Get metrics from existing sentiments
metrics = self._calculate_metrics_from_existing(db, match_id)
return {
'analyzed_count': 0,
'status': 'success',
'metrics': metrics
}
# Analyze batch
sentiment_scores = process_reddit_post_batch(db, unanalyzed_posts)
# Calculate metrics
metrics = self._calculate_sentiment_metrics(db, match_id)
logger.info(
f"✅ Reddit sentiment analysis completed: "
f"{len(sentiment_scores)} posts analyzed"
)
return {
'analyzed_count': len(sentiment_scores),
'status': 'success',
'metrics': metrics
}
except Exception as e:
logger.error(f"❌ Reddit sentiment analysis failed: {e}")
return {
'analyzed_count': 0,
'status': 'error',
'error': str(e)
}
def _calculate_sentiment_metrics(
self,
db: Session,
match_id: int
) -> Dict:
"""
Calculate aggregated sentiment metrics for a match.
Args:
db: Database session
match_id: Match identifier
Returns:
Dictionary with aggregated metrics
"""
from app.services.sentiment_service import calculate_match_sentiment_metrics
return calculate_match_sentiment_metrics(db, match_id)
def _calculate_metrics_from_existing(
self,
db: Session,
match_id: int
) -> Dict:
"""
Calculate metrics from existing sentiment scores.
Args:
db: Database session
match_id: Match identifier
Returns:
Dictionary with aggregated metrics
"""
return self._calculate_sentiment_metrics(db, match_id)
def create_sentiment_worker() -> SentimentWorker:
"""
Factory function to create a sentiment analysis worker.
Returns:
Configured SentimentWorker instance
"""
return SentimentWorker()