""" Sentiment analysis worker module. This module provides a worker that consumes sentiment analysis tasks from RabbitMQ and executes sentiment analysis operations. """ import logging from typing import Dict, List from sqlalchemy.orm import Session from app.services.sentiment_service import ( process_tweet_batch, process_reddit_post_batch, get_sentiment_by_entity ) from app.models.tweet import Tweet from app.models.reddit_post import RedditPost logger = logging.getLogger(__name__) class SentimentWorker: """ Worker for processing sentiment analysis tasks. Features: - Consumes tasks from sentiment_analysis_tasks queue - Executes VADER sentiment analysis - Processes batches of tweets and Reddit posts - Publishes results to results queue - Handles errors with retries - Structured logging """ def __init__(self): """Initialize sentiment analysis worker.""" # No initialization needed for sentiment worker # VADER analyzer is initialized in sentiment_service pass def execute_sentiment_analysis_task( self, task: Dict, db: Session ) -> Dict: """ Execute a sentiment analysis task. Args: task: Sentiment analysis task data db: Database session Returns: Dictionary with sentiment analysis results """ source = task.get('source') match_id = task.get('match_id') entity_ids = task.get('entity_ids', []) logger.info( f"🔧 Executing sentiment analysis task: " f"match_id={match_id}, source={source}, " f"entities={len(entity_ids)}" ) try: if source == 'twitter': return self._execute_twitter_sentiment_analysis( match_id, entity_ids, db ) elif source == 'reddit': return self._execute_reddit_sentiment_analysis( match_id, entity_ids, db ) else: logger.error(f"❌ Unknown sentiment source: {source}") return { 'analyzed_count': 0, 'status': 'error', 'error': f'Unknown source: {source}' } except Exception as e: logger.error(f"❌ Sentiment analysis task failed: {e}") return { 'analyzed_count': 0, 'status': 'error', 'error': str(e) } def _execute_twitter_sentiment_analysis( self, match_id: int, entity_ids: List[str], db: Session ) -> Dict: """ Execute sentiment analysis for Twitter data. Args: match_id: Match identifier entity_ids: List of tweet IDs db: Database session Returns: Dictionary with sentiment analysis results """ try: # Fetch tweets from database tweets = db.query(Tweet).filter( Tweet.tweet_id.in_(entity_ids) ).all() if not tweets: logger.warning(f"âš ī¸ No tweets found for entities: {entity_ids}") return { 'analyzed_count': 0, 'status': 'success', 'metrics': { 'total_count': 0, 'positive_count': 0, 'negative_count': 0, 'neutral_count': 0, 'average_compound': 0.0 } } # Check if already analyzed unanalyzed_tweets = [] for tweet in tweets: existing_sentiment = get_sentiment_by_entity( db, tweet.tweet_id, 'tweet' ) if not existing_sentiment: unanalyzed_tweets.append(tweet) if not unanalyzed_tweets: logger.info( f"â„šī¸ All {len(tweets)} tweets already analyzed" ) # Get metrics from existing sentiments metrics = self._calculate_metrics_from_existing(db, match_id) return { 'analyzed_count': 0, 'status': 'success', 'metrics': metrics } # Analyze batch sentiment_scores = process_tweet_batch(db, unanalyzed_tweets) # Calculate metrics metrics = self._calculate_sentiment_metrics(db, match_id) logger.info( f"✅ Twitter sentiment analysis completed: " f"{len(sentiment_scores)} tweets analyzed" ) return { 'analyzed_count': len(sentiment_scores), 'status': 'success', 'metrics': metrics } except Exception as e: logger.error(f"❌ Twitter sentiment analysis failed: {e}") return { 'analyzed_count': 0, 'status': 'error', 'error': str(e) } def _execute_reddit_sentiment_analysis( self, match_id: int, entity_ids: List[str], db: Session ) -> Dict: """ Execute sentiment analysis for Reddit data. Args: match_id: Match identifier entity_ids: List of Reddit post IDs db: Database session Returns: Dictionary with sentiment analysis results """ try: # Fetch Reddit posts from database posts = db.query(RedditPost).filter( RedditPost.post_id.in_(entity_ids) ).all() if not posts: logger.warning(f"âš ī¸ No Reddit posts found for entities: {entity_ids}") return { 'analyzed_count': 0, 'status': 'success', 'metrics': { 'total_count': 0, 'positive_count': 0, 'negative_count': 0, 'neutral_count': 0, 'average_compound': 0.0 } } # Check if already analyzed unanalyzed_posts = [] for post in posts: existing_sentiment = get_sentiment_by_entity( db, post.post_id, 'reddit_post' ) if not existing_sentiment: unanalyzed_posts.append(post) if not unanalyzed_posts: logger.info( f"â„šī¸ All {len(posts)} Reddit posts already analyzed" ) # Get metrics from existing sentiments metrics = self._calculate_metrics_from_existing(db, match_id) return { 'analyzed_count': 0, 'status': 'success', 'metrics': metrics } # Analyze batch sentiment_scores = process_reddit_post_batch(db, unanalyzed_posts) # Calculate metrics metrics = self._calculate_sentiment_metrics(db, match_id) logger.info( f"✅ Reddit sentiment analysis completed: " f"{len(sentiment_scores)} posts analyzed" ) return { 'analyzed_count': len(sentiment_scores), 'status': 'success', 'metrics': metrics } except Exception as e: logger.error(f"❌ Reddit sentiment analysis failed: {e}") return { 'analyzed_count': 0, 'status': 'error', 'error': str(e) } def _calculate_sentiment_metrics( self, db: Session, match_id: int ) -> Dict: """ Calculate aggregated sentiment metrics for a match. Args: db: Database session match_id: Match identifier Returns: Dictionary with aggregated metrics """ from app.services.sentiment_service import calculate_match_sentiment_metrics return calculate_match_sentiment_metrics(db, match_id) def _calculate_metrics_from_existing( self, db: Session, match_id: int ) -> Dict: """ Calculate metrics from existing sentiment scores. Args: db: Database session match_id: Match identifier Returns: Dictionary with aggregated metrics """ return self._calculate_sentiment_metrics(db, match_id) def create_sentiment_worker() -> SentimentWorker: """ Factory function to create a sentiment analysis worker. Returns: Configured SentimentWorker instance """ return SentimentWorker()