chartbastan/backend/app/workers/sentiment_worker.py

"""
Sentiment analysis worker module.

This module provides a worker that consumes sentiment analysis tasks
from RabbitMQ and executes sentiment analysis operations.
"""

import logging
from typing import Dict, List
from sqlalchemy.orm import Session

from app.services.sentiment_service import (
    process_tweet_batch,
    process_reddit_post_batch,
    get_sentiment_by_entity
)
from app.models.tweet import Tweet
from app.models.reddit_post import RedditPost

logger = logging.getLogger(__name__)


class SentimentWorker:
    """
    Worker for processing sentiment analysis tasks.

    Features:
    - Consumes tasks from sentiment_analysis_tasks queue
    - Executes VADER sentiment analysis
    - Processes batches of tweets and Reddit posts
    - Publishes results to results queue
    - Handles errors with retries
    - Structured logging
    """

    def __init__(self):
        """Initialize sentiment analysis worker."""
        # No initialization needed for sentiment worker
        # VADER analyzer is initialized in sentiment_service
        pass

    def execute_sentiment_analysis_task(
        self,
        task: Dict,
        db: Session
    ) -> Dict:
        """
        Execute a sentiment analysis task.

        Args:
            task: Sentiment analysis task data
            db: Database session

        Returns:
            Dictionary with sentiment analysis results
        """
        source = task.get('source')
        match_id = task.get('match_id')
        entity_ids = task.get('entity_ids', [])

        logger.info(
            f"🔧 Executing sentiment analysis task: "
            f"match_id={match_id}, source={source}, "
            f"entities={len(entity_ids)}"
        )

        try:
            if source == 'twitter':
                return self._execute_twitter_sentiment_analysis(
                    match_id, entity_ids, db
                )
            elif source == 'reddit':
                return self._execute_reddit_sentiment_analysis(
                    match_id, entity_ids, db
                )
            else:
                logger.error(f"❌ Unknown sentiment source: {source}")
                return {
                    'analyzed_count': 0,
                    'status': 'error',
                    'error': f'Unknown source: {source}'
                }

        except Exception as e:
            logger.error(f"❌ Sentiment analysis task failed: {e}")
            return {
                'analyzed_count': 0,
                'status': 'error',
                'error': str(e)
            }

    def _execute_twitter_sentiment_analysis(
        self,
        match_id: int,
        entity_ids: List[str],
        db: Session
    ) -> Dict:
        """
        Execute sentiment analysis for Twitter data.

        Args:
            match_id: Match identifier
            entity_ids: List of tweet IDs
            db: Database session

        Returns:
            Dictionary with sentiment analysis results
        """
        try:
            # Fetch tweets from database
            tweets = db.query(Tweet).filter(
                Tweet.tweet_id.in_(entity_ids)
            ).all()

            if not tweets:
                logger.warning(f"⚠️ No tweets found for entities: {entity_ids}")
                return {
                    'analyzed_count': 0,
                    'status': 'success',
                    'metrics': {
                        'total_count': 0,
                        'positive_count': 0,
                        'negative_count': 0,
                        'neutral_count': 0,
                        'average_compound': 0.0
                    }
                }

            # Check if already analyzed
            unanalyzed_tweets = []
            for tweet in tweets:
                existing_sentiment = get_sentiment_by_entity(
                    db, tweet.tweet_id, 'tweet'
                )
                if not existing_sentiment:
                    unanalyzed_tweets.append(tweet)

            if not unanalyzed_tweets:
                logger.info(
                    f"ℹ️ All {len(tweets)} tweets already analyzed"
                )
                # Get metrics from existing sentiments
                metrics = self._calculate_metrics_from_existing(db, match_id)
                return {
                    'analyzed_count': 0,
                    'status': 'success',
                    'metrics': metrics
                }

            # Analyze batch
            sentiment_scores = process_tweet_batch(db, unanalyzed_tweets)

            # Calculate metrics
            metrics = self._calculate_sentiment_metrics(db, match_id)

            logger.info(
                f"✅ Twitter sentiment analysis completed: "
                f"{len(sentiment_scores)} tweets analyzed"
            )

            return {
                'analyzed_count': len(sentiment_scores),
                'status': 'success',
                'metrics': metrics
            }

        except Exception as e:
            logger.error(f"❌ Twitter sentiment analysis failed: {e}")
            return {
                'analyzed_count': 0,
                'status': 'error',
                'error': str(e)
            }

    def _execute_reddit_sentiment_analysis(
        self,
        match_id: int,
        entity_ids: List[str],
        db: Session
    ) -> Dict:
        """
        Execute sentiment analysis for Reddit data.

        Args:
            match_id: Match identifier
            entity_ids: List of Reddit post IDs
            db: Database session

        Returns:
            Dictionary with sentiment analysis results
        """
        try:
            # Fetch Reddit posts from database
            posts = db.query(RedditPost).filter(
                RedditPost.post_id.in_(entity_ids)
            ).all()

            if not posts:
                logger.warning(f"⚠️ No Reddit posts found for entities: {entity_ids}")
                return {
                    'analyzed_count': 0,
                    'status': 'success',
                    'metrics': {
                        'total_count': 0,
                        'positive_count': 0,
                        'negative_count': 0,
                        'neutral_count': 0,
                        'average_compound': 0.0
                    }
                }

            # Check if already analyzed
            unanalyzed_posts = []
            for post in posts:
                existing_sentiment = get_sentiment_by_entity(
                    db, post.post_id, 'reddit_post'
                )
                if not existing_sentiment:
                    unanalyzed_posts.append(post)

            if not unanalyzed_posts:
                logger.info(
                    f"ℹ️ All {len(posts)} Reddit posts already analyzed"
                )
                # Get metrics from existing sentiments
                metrics = self._calculate_metrics_from_existing(db, match_id)
                return {
                    'analyzed_count': 0,
                    'status': 'success',
                    'metrics': metrics
                }

            # Analyze batch
            sentiment_scores = process_reddit_post_batch(db, unanalyzed_posts)

            # Calculate metrics
            metrics = self._calculate_sentiment_metrics(db, match_id)

            logger.info(
                f"✅ Reddit sentiment analysis completed: "
                f"{len(sentiment_scores)} posts analyzed"
            )

            return {
                'analyzed_count': len(sentiment_scores),
                'status': 'success',
                'metrics': metrics
            }

        except Exception as e:
            logger.error(f"❌ Reddit sentiment analysis failed: {e}")
            return {
                'analyzed_count': 0,
                'status': 'error',
                'error': str(e)
            }

    def _calculate_sentiment_metrics(
        self,
        db: Session,
        match_id: int
    ) -> Dict:
        """
        Calculate aggregated sentiment metrics for a match.

        Args:
            db: Database session
            match_id: Match identifier

        Returns:
            Dictionary with aggregated metrics
        """
        from app.services.sentiment_service import calculate_match_sentiment_metrics

        return calculate_match_sentiment_metrics(db, match_id)

    def _calculate_metrics_from_existing(
        self,
        db: Session,
        match_id: int
    ) -> Dict:
        """
        Calculate metrics from existing sentiment scores.

        Args:
            db: Database session
            match_id: Match identifier

        Returns:
            Dictionary with aggregated metrics
        """
        return self._calculate_sentiment_metrics(db, match_id)


def create_sentiment_worker() -> SentimentWorker:
    """
    Factory function to create a sentiment analysis worker.

    Returns:
        Configured SentimentWorker instance
    """
    return SentimentWorker()