chartbastan/backend/app/services/sentiment_service.py

"""
Sentiment Analysis Service

This module provides services for batch processing of tweets and posts,
storing sentiment scores in the database, and calculating aggregated metrics.
"""

from typing import List, Dict, Optional
from sqlalchemy.orm import Session

from app.ml.sentiment_analyzer import (
    analyze_sentiment,
    analyze_sentiment_batch,
    calculate_aggregated_metrics
)
from app.models.sentiment_score import SentimentScore
from app.models.tweet import Tweet
from app.models.reddit_post import RedditPost


def process_tweet_sentiment(
    db: Session,
    tweet_id: str,
    text: str
) -> SentimentScore:
    """
    Analyze sentiment for a single tweet and store in database.

    Args:
        db: Database session
        tweet_id: Tweet identifier
        text: Tweet text to analyze

    Returns:
        Created SentimentScore record
    """
    # Analyze sentiment
    sentiment_result = analyze_sentiment(text)

    # Create database record
    sentiment_score = SentimentScore(
        entity_id=tweet_id,
        entity_type='tweet',
        score=sentiment_result['compound'],
        sentiment_type=sentiment_result['sentiment'],
        positive=sentiment_result['positive'],
        negative=sentiment_result['negative'],
        neutral=sentiment_result['neutral']
    )

    db.add(sentiment_score)
    db.commit()
    db.refresh(sentiment_score)

    return sentiment_score


def process_tweet_batch(
    db: Session,
    tweets: List[Tweet]
) -> List[SentimentScore]:
    """
    Analyze sentiment for a batch of tweets and store in database.

    Args:
        db: Database session
        tweets: List of Tweet models to analyze

    Returns:
        List of created SentimentScore records
    """
    if not tweets:
        return []

    # Extract texts
    texts = [tweet.text for tweet in tweets]
    tweet_ids = [tweet.tweet_id for tweet in tweets]

    # Analyze in batch
    sentiment_results = analyze_sentiment_batch(texts)

    # Create database records
    sentiment_scores = []
    for tweet_id, result in zip(tweet_ids, sentiment_results):
        sentiment_score = SentimentScore(
            entity_id=tweet_id,
            entity_type='tweet',
            score=result['compound'],
            sentiment_type=result['sentiment'],
            positive=result['positive'],
            negative=result['negative'],
            neutral=result['neutral']
        )
        sentiment_scores.append(sentiment_score)

    # Batch insert
    db.add_all(sentiment_scores)
    db.commit()

    # Refresh to get IDs
    for score in sentiment_scores:
        db.refresh(score)

    return sentiment_scores


def process_reddit_post_sentiment(
    db: Session,
    post_id: str,
    text: str
) -> SentimentScore:
    """
    Analyze sentiment for a single Reddit post and store in database.

    Args:
        db: Database session
        post_id: Reddit post identifier
        text: Post text to analyze

    Returns:
        Created SentimentScore record
    """
    # Analyze sentiment
    sentiment_result = analyze_sentiment(text)

    # Create database record
    sentiment_score = SentimentScore(
        entity_id=post_id,
        entity_type='reddit_post',
        score=sentiment_result['compound'],
        sentiment_type=sentiment_result['sentiment'],
        positive=sentiment_result['positive'],
        negative=sentiment_result['negative'],
        neutral=sentiment_result['neutral']
    )

    db.add(sentiment_score)
    db.commit()
    db.refresh(sentiment_score)

    return sentiment_score


def process_reddit_post_batch(
    db: Session,
    posts: List[RedditPost]
) -> List[SentimentScore]:
    """
    Analyze sentiment for a batch of Reddit posts and store in database.

    Args:
        db: Database session
        posts: List of RedditPost models to analyze

    Returns:
        List of created SentimentScore records
    """
    if not posts:
        return []

    # Extract texts (combine title and text if available)
    texts = []
    post_ids = []
    for post in posts:
        text = post.text if post.text else ""
        full_text = f"{post.title} {text}"
        texts.append(full_text)
        post_ids.append(post.post_id)

    # Analyze in batch
    sentiment_results = analyze_sentiment_batch(texts)

    # Create database records
    sentiment_scores = []
    for post_id, result in zip(post_ids, sentiment_results):
        sentiment_score = SentimentScore(
            entity_id=post_id,
            entity_type='reddit_post',
            score=result['compound'],
            sentiment_type=result['sentiment'],
            positive=result['positive'],
            negative=result['negative'],
            neutral=result['neutral']
        )
        sentiment_scores.append(sentiment_score)

    # Batch insert
    db.add_all(sentiment_scores)
    db.commit()

    # Refresh to get IDs
    for score in sentiment_scores:
        db.refresh(score)

    return sentiment_scores


def get_sentiment_by_entity(
    db: Session,
    entity_id: str,
    entity_type: str
) -> Optional[SentimentScore]:
    """
    Retrieve sentiment score for a specific entity.

    Args:
        db: Database session
        entity_id: Entity identifier
        entity_type: Entity type ('tweet' or 'reddit_post')

    Returns:
        SentimentScore if found, None otherwise
    """
    return db.query(SentimentScore).filter(
        SentimentScore.entity_id == entity_id,
        SentimentScore.entity_type == entity_type
    ).first()


def get_sentiments_by_match(
    db: Session,
    match_id: int
) -> List[SentimentScore]:
    """
    Retrieve all sentiment scores for a specific match.

    Args:
        db: Database session
        match_id: Match identifier

    Returns:
        List of SentimentScore records for the match
    """
    # Join with tweets table to filter by match_id
    return db.query(SentimentScore).join(
        Tweet, Tweet.tweet_id == SentimentScore.entity_id
    ).filter(
        Tweet.match_id == match_id,
        SentimentScore.entity_type == 'tweet'
    ).all()


def calculate_match_sentiment_metrics(
    db: Session,
    match_id: int
) -> Dict:
    """
    Calculate aggregated sentiment metrics for a match.

    Args:
        db: Database session
        match_id: Match identifier

    Returns:
        Dictionary with aggregated metrics
    """
    # Get all sentiments for the match
    sentiments = get_sentiments_by_match(db, match_id)

    if not sentiments:
        return {
            'match_id': match_id,
            'total_count': 0,
            'positive_count': 0,
            'negative_count': 0,
            'neutral_count': 0,
            'positive_ratio': 0.0,
            'negative_ratio': 0.0,
            'neutral_ratio': 0.0,
            'average_compound': 0.0
        }

    # Convert to list of dicts for calculate_aggregated_metrics
    sentiment_dicts = [
        {
            'compound': s.score,
            'sentiment': s.sentiment_type
        }
        for s in sentiments
    ]

    # Calculate metrics
    metrics = calculate_aggregated_metrics(sentiment_dicts)
    metrics['match_id'] = match_id

    return metrics


def get_global_sentiment_metrics(
    db: Session
) -> Dict:
    """
    Calculate global sentiment metrics across all entities.

    Args:
        db: Database session

    Returns:
        Dictionary with global aggregated metrics
    """
    # Get all sentiment scores
    all_sentiments = db.query(SentimentScore).all()

    if not all_sentiments:
        return {
            'total_count': 0,
            'positive_count': 0,
            'negative_count': 0,
            'neutral_count': 0,
            'positive_ratio': 0.0,
            'negative_ratio': 0.0,
            'neutral_ratio': 0.0,
            'average_compound': 0.0
        }

    # Convert to list of dicts
    sentiment_dicts = [
        {
            'compound': s.score,
            'sentiment': s.sentiment_type
        }
        for s in all_sentiments
    ]

    # Calculate metrics
    return calculate_aggregated_metrics(sentiment_dicts)