""" Sentiment Analyzer Module Uses VADER (Valence Aware Dictionary and sEntiment Reasoner) for sentiment analysis. """ from typing import Dict, List, Optional from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # Initialize the VADER analyzer globally for better performance _analyzer = SentimentIntensityAnalyzer() def classify_sentiment(compound: float) -> str: """ Classify sentiment based on compound score. Args: compound: Compound sentiment score (-1 to 1) Returns: Sentiment classification: 'positive', 'negative', or 'neutral' """ if compound >= 0.05: return 'positive' elif compound <= -0.05: return 'negative' else: return 'neutral' def analyze_sentiment(text: str) -> Dict[str, float]: """ Analyze sentiment of a text using VADER. Args: text: Text to analyze Returns: Dictionary with sentiment scores: - compound: Overall compound score (-1 to 1) - positive: Positive proportion (0 to 1) - negative: Negative proportion (0 to 1) - neutral: Neutral proportion (0 to 1) - sentiment: Classification ('positive', 'negative', or 'neutral') """ if not text or not isinstance(text, str): raise ValueError("Text must be a non-empty string") scores = _analyzer.polarity_scores(text) sentiment = classify_sentiment(scores['compound']) return { 'compound': scores['compound'], 'positive': scores['pos'], 'negative': scores['neg'], 'neutral': scores['neu'], 'sentiment': sentiment } def analyze_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]: """ Analyze sentiment of multiple texts in batch for better performance. Args: texts: List of texts to analyze Returns: List of sentiment score dictionaries """ results = [] for text in texts: try: result = analyze_sentiment(text) results.append(result) except ValueError as e: # Log error but continue processing other texts print(f"Error analyzing text: {e}") results.append({ 'compound': 0.0, 'positive': 0.0, 'negative': 0.0, 'neutral': 1.0, 'sentiment': 'neutral' }) return results def calculate_aggregated_metrics(sentiments: List[Dict[str, float]]) -> Dict[str, float]: """ Calculate aggregated metrics from a list of sentiment analyses. Args: sentiments: List of sentiment score dictionaries Returns: Dictionary with aggregated metrics: - total_count: Total number of sentiments - positive_count: Count of positive sentiments - negative_count: Count of negative sentiments - neutral_count: Count of neutral sentiments - positive_ratio: Ratio of positive sentiments (0 to 1) - negative_ratio: Ratio of negative sentiments (0 to 1) - neutral_ratio: Ratio of neutral sentiments (0 to 1) - average_compound: Average compound score """ if not sentiments: return { 'total_count': 0, 'positive_count': 0, 'negative_count': 0, 'neutral_count': 0, 'positive_ratio': 0.0, 'negative_ratio': 0.0, 'neutral_ratio': 0.0, 'average_compound': 0.0 } total_count = len(sentiments) positive_count = sum(1 for s in sentiments if s['sentiment'] == 'positive') negative_count = sum(1 for s in sentiments if s['sentiment'] == 'negative') neutral_count = sum(1 for s in sentiments if s['sentiment'] == 'neutral') average_compound = sum(s['compound'] for s in sentiments) / total_count return { 'total_count': total_count, 'positive_count': positive_count, 'negative_count': negative_count, 'neutral_count': neutral_count, 'positive_ratio': positive_count / total_count, 'negative_ratio': negative_count / total_count, 'neutral_ratio': neutral_count / total_count, 'average_compound': average_compound } def test_analyzer_performance(num_tweets: int = 1000) -> float: """ Test the performance of the sentiment analyzer. Args: num_tweets: Number of tweets to test with (default: 1000) Returns: Time taken to analyze the tweets in seconds """ import time import random # Generate sample tweets sample_tweets = [ "I love this game! Best match ever!", "Terrible performance. Worst team ever.", "It's okay, nothing special.", "Amazing goal! What a comeback!", "Disappointed with the result.", "Great teamwork out there!", "Could have been better.", "Absolutely fantastic!", "Not good enough today.", "Well played both teams." ] tweets = [random.choice(sample_tweets) for _ in range(num_tweets)] # Measure time start_time = time.time() results = analyze_sentiment_batch(tweets) end_time = time.time() time_taken = end_time - start_time print(f"Analyzed {len(results)} tweets in {time_taken:.4f} seconds") print(f"Performance: {num_tweets / time_taken:.2f} tweets/second") return time_taken