303 lines
9.2 KiB
Python
303 lines
9.2 KiB
Python
"""
|
||
Sentiment analysis worker module.
|
||
|
||
This module provides a worker that consumes sentiment analysis tasks
|
||
from RabbitMQ and executes sentiment analysis operations.
|
||
"""
|
||
|
||
import logging
|
||
from typing import Dict, List
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.services.sentiment_service import (
|
||
process_tweet_batch,
|
||
process_reddit_post_batch,
|
||
get_sentiment_by_entity
|
||
)
|
||
from app.models.tweet import Tweet
|
||
from app.models.reddit_post import RedditPost
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class SentimentWorker:
|
||
"""
|
||
Worker for processing sentiment analysis tasks.
|
||
|
||
Features:
|
||
- Consumes tasks from sentiment_analysis_tasks queue
|
||
- Executes VADER sentiment analysis
|
||
- Processes batches of tweets and Reddit posts
|
||
- Publishes results to results queue
|
||
- Handles errors with retries
|
||
- Structured logging
|
||
"""
|
||
|
||
def __init__(self):
|
||
"""Initialize sentiment analysis worker."""
|
||
# No initialization needed for sentiment worker
|
||
# VADER analyzer is initialized in sentiment_service
|
||
pass
|
||
|
||
def execute_sentiment_analysis_task(
|
||
self,
|
||
task: Dict,
|
||
db: Session
|
||
) -> Dict:
|
||
"""
|
||
Execute a sentiment analysis task.
|
||
|
||
Args:
|
||
task: Sentiment analysis task data
|
||
db: Database session
|
||
|
||
Returns:
|
||
Dictionary with sentiment analysis results
|
||
"""
|
||
source = task.get('source')
|
||
match_id = task.get('match_id')
|
||
entity_ids = task.get('entity_ids', [])
|
||
|
||
logger.info(
|
||
f"🔧 Executing sentiment analysis task: "
|
||
f"match_id={match_id}, source={source}, "
|
||
f"entities={len(entity_ids)}"
|
||
)
|
||
|
||
try:
|
||
if source == 'twitter':
|
||
return self._execute_twitter_sentiment_analysis(
|
||
match_id, entity_ids, db
|
||
)
|
||
elif source == 'reddit':
|
||
return self._execute_reddit_sentiment_analysis(
|
||
match_id, entity_ids, db
|
||
)
|
||
else:
|
||
logger.error(f"❌ Unknown sentiment source: {source}")
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'error',
|
||
'error': f'Unknown source: {source}'
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Sentiment analysis task failed: {e}")
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'error',
|
||
'error': str(e)
|
||
}
|
||
|
||
def _execute_twitter_sentiment_analysis(
|
||
self,
|
||
match_id: int,
|
||
entity_ids: List[str],
|
||
db: Session
|
||
) -> Dict:
|
||
"""
|
||
Execute sentiment analysis for Twitter data.
|
||
|
||
Args:
|
||
match_id: Match identifier
|
||
entity_ids: List of tweet IDs
|
||
db: Database session
|
||
|
||
Returns:
|
||
Dictionary with sentiment analysis results
|
||
"""
|
||
try:
|
||
# Fetch tweets from database
|
||
tweets = db.query(Tweet).filter(
|
||
Tweet.tweet_id.in_(entity_ids)
|
||
).all()
|
||
|
||
if not tweets:
|
||
logger.warning(f"⚠️ No tweets found for entities: {entity_ids}")
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'success',
|
||
'metrics': {
|
||
'total_count': 0,
|
||
'positive_count': 0,
|
||
'negative_count': 0,
|
||
'neutral_count': 0,
|
||
'average_compound': 0.0
|
||
}
|
||
}
|
||
|
||
# Check if already analyzed
|
||
unanalyzed_tweets = []
|
||
for tweet in tweets:
|
||
existing_sentiment = get_sentiment_by_entity(
|
||
db, tweet.tweet_id, 'tweet'
|
||
)
|
||
if not existing_sentiment:
|
||
unanalyzed_tweets.append(tweet)
|
||
|
||
if not unanalyzed_tweets:
|
||
logger.info(
|
||
f"ℹ️ All {len(tweets)} tweets already analyzed"
|
||
)
|
||
# Get metrics from existing sentiments
|
||
metrics = self._calculate_metrics_from_existing(db, match_id)
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'success',
|
||
'metrics': metrics
|
||
}
|
||
|
||
# Analyze batch
|
||
sentiment_scores = process_tweet_batch(db, unanalyzed_tweets)
|
||
|
||
# Calculate metrics
|
||
metrics = self._calculate_sentiment_metrics(db, match_id)
|
||
|
||
logger.info(
|
||
f"✅ Twitter sentiment analysis completed: "
|
||
f"{len(sentiment_scores)} tweets analyzed"
|
||
)
|
||
|
||
return {
|
||
'analyzed_count': len(sentiment_scores),
|
||
'status': 'success',
|
||
'metrics': metrics
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Twitter sentiment analysis failed: {e}")
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'error',
|
||
'error': str(e)
|
||
}
|
||
|
||
def _execute_reddit_sentiment_analysis(
|
||
self,
|
||
match_id: int,
|
||
entity_ids: List[str],
|
||
db: Session
|
||
) -> Dict:
|
||
"""
|
||
Execute sentiment analysis for Reddit data.
|
||
|
||
Args:
|
||
match_id: Match identifier
|
||
entity_ids: List of Reddit post IDs
|
||
db: Database session
|
||
|
||
Returns:
|
||
Dictionary with sentiment analysis results
|
||
"""
|
||
try:
|
||
# Fetch Reddit posts from database
|
||
posts = db.query(RedditPost).filter(
|
||
RedditPost.post_id.in_(entity_ids)
|
||
).all()
|
||
|
||
if not posts:
|
||
logger.warning(f"⚠️ No Reddit posts found for entities: {entity_ids}")
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'success',
|
||
'metrics': {
|
||
'total_count': 0,
|
||
'positive_count': 0,
|
||
'negative_count': 0,
|
||
'neutral_count': 0,
|
||
'average_compound': 0.0
|
||
}
|
||
}
|
||
|
||
# Check if already analyzed
|
||
unanalyzed_posts = []
|
||
for post in posts:
|
||
existing_sentiment = get_sentiment_by_entity(
|
||
db, post.post_id, 'reddit_post'
|
||
)
|
||
if not existing_sentiment:
|
||
unanalyzed_posts.append(post)
|
||
|
||
if not unanalyzed_posts:
|
||
logger.info(
|
||
f"ℹ️ All {len(posts)} Reddit posts already analyzed"
|
||
)
|
||
# Get metrics from existing sentiments
|
||
metrics = self._calculate_metrics_from_existing(db, match_id)
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'success',
|
||
'metrics': metrics
|
||
}
|
||
|
||
# Analyze batch
|
||
sentiment_scores = process_reddit_post_batch(db, unanalyzed_posts)
|
||
|
||
# Calculate metrics
|
||
metrics = self._calculate_sentiment_metrics(db, match_id)
|
||
|
||
logger.info(
|
||
f"✅ Reddit sentiment analysis completed: "
|
||
f"{len(sentiment_scores)} posts analyzed"
|
||
)
|
||
|
||
return {
|
||
'analyzed_count': len(sentiment_scores),
|
||
'status': 'success',
|
||
'metrics': metrics
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Reddit sentiment analysis failed: {e}")
|
||
return {
|
||
'analyzed_count': 0,
|
||
'status': 'error',
|
||
'error': str(e)
|
||
}
|
||
|
||
def _calculate_sentiment_metrics(
|
||
self,
|
||
db: Session,
|
||
match_id: int
|
||
) -> Dict:
|
||
"""
|
||
Calculate aggregated sentiment metrics for a match.
|
||
|
||
Args:
|
||
db: Database session
|
||
match_id: Match identifier
|
||
|
||
Returns:
|
||
Dictionary with aggregated metrics
|
||
"""
|
||
from app.services.sentiment_service import calculate_match_sentiment_metrics
|
||
|
||
return calculate_match_sentiment_metrics(db, match_id)
|
||
|
||
def _calculate_metrics_from_existing(
|
||
self,
|
||
db: Session,
|
||
match_id: int
|
||
) -> Dict:
|
||
"""
|
||
Calculate metrics from existing sentiment scores.
|
||
|
||
Args:
|
||
db: Database session
|
||
match_id: Match identifier
|
||
|
||
Returns:
|
||
Dictionary with aggregated metrics
|
||
"""
|
||
return self._calculate_sentiment_metrics(db, match_id)
|
||
|
||
|
||
def create_sentiment_worker() -> SentimentWorker:
|
||
"""
|
||
Factory function to create a sentiment analysis worker.
|
||
|
||
Returns:
|
||
Configured SentimentWorker instance
|
||
"""
|
||
return SentimentWorker()
|