179 lines
5.3 KiB
Python
179 lines
5.3 KiB
Python
"""
|
|
Sentiment Analyzer Module
|
|
Uses VADER (Valence Aware Dictionary and sEntiment Reasoner) for sentiment analysis.
|
|
"""
|
|
|
|
from typing import Dict, List, Optional
|
|
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
|
|
|
# Initialize the VADER analyzer globally for better performance
|
|
_analyzer = SentimentIntensityAnalyzer()
|
|
|
|
|
|
def classify_sentiment(compound: float) -> str:
|
|
"""
|
|
Classify sentiment based on compound score.
|
|
|
|
Args:
|
|
compound: Compound sentiment score (-1 to 1)
|
|
|
|
Returns:
|
|
Sentiment classification: 'positive', 'negative', or 'neutral'
|
|
"""
|
|
if compound >= 0.05:
|
|
return 'positive'
|
|
elif compound <= -0.05:
|
|
return 'negative'
|
|
else:
|
|
return 'neutral'
|
|
|
|
|
|
def analyze_sentiment(text: str) -> Dict[str, float]:
|
|
"""
|
|
Analyze sentiment of a text using VADER.
|
|
|
|
Args:
|
|
text: Text to analyze
|
|
|
|
Returns:
|
|
Dictionary with sentiment scores:
|
|
- compound: Overall compound score (-1 to 1)
|
|
- positive: Positive proportion (0 to 1)
|
|
- negative: Negative proportion (0 to 1)
|
|
- neutral: Neutral proportion (0 to 1)
|
|
- sentiment: Classification ('positive', 'negative', or 'neutral')
|
|
"""
|
|
if not text or not isinstance(text, str):
|
|
raise ValueError("Text must be a non-empty string")
|
|
|
|
scores = _analyzer.polarity_scores(text)
|
|
sentiment = classify_sentiment(scores['compound'])
|
|
|
|
return {
|
|
'compound': scores['compound'],
|
|
'positive': scores['pos'],
|
|
'negative': scores['neg'],
|
|
'neutral': scores['neu'],
|
|
'sentiment': sentiment
|
|
}
|
|
|
|
|
|
def analyze_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]:
|
|
"""
|
|
Analyze sentiment of multiple texts in batch for better performance.
|
|
|
|
Args:
|
|
texts: List of texts to analyze
|
|
|
|
Returns:
|
|
List of sentiment score dictionaries
|
|
"""
|
|
results = []
|
|
for text in texts:
|
|
try:
|
|
result = analyze_sentiment(text)
|
|
results.append(result)
|
|
except ValueError as e:
|
|
# Log error but continue processing other texts
|
|
print(f"Error analyzing text: {e}")
|
|
results.append({
|
|
'compound': 0.0,
|
|
'positive': 0.0,
|
|
'negative': 0.0,
|
|
'neutral': 1.0,
|
|
'sentiment': 'neutral'
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
def calculate_aggregated_metrics(sentiments: List[Dict[str, float]]) -> Dict[str, float]:
|
|
"""
|
|
Calculate aggregated metrics from a list of sentiment analyses.
|
|
|
|
Args:
|
|
sentiments: List of sentiment score dictionaries
|
|
|
|
Returns:
|
|
Dictionary with aggregated metrics:
|
|
- total_count: Total number of sentiments
|
|
- positive_count: Count of positive sentiments
|
|
- negative_count: Count of negative sentiments
|
|
- neutral_count: Count of neutral sentiments
|
|
- positive_ratio: Ratio of positive sentiments (0 to 1)
|
|
- negative_ratio: Ratio of negative sentiments (0 to 1)
|
|
- neutral_ratio: Ratio of neutral sentiments (0 to 1)
|
|
- average_compound: Average compound score
|
|
"""
|
|
if not sentiments:
|
|
return {
|
|
'total_count': 0,
|
|
'positive_count': 0,
|
|
'negative_count': 0,
|
|
'neutral_count': 0,
|
|
'positive_ratio': 0.0,
|
|
'negative_ratio': 0.0,
|
|
'neutral_ratio': 0.0,
|
|
'average_compound': 0.0
|
|
}
|
|
|
|
total_count = len(sentiments)
|
|
positive_count = sum(1 for s in sentiments if s['sentiment'] == 'positive')
|
|
negative_count = sum(1 for s in sentiments if s['sentiment'] == 'negative')
|
|
neutral_count = sum(1 for s in sentiments if s['sentiment'] == 'neutral')
|
|
|
|
average_compound = sum(s['compound'] for s in sentiments) / total_count
|
|
|
|
return {
|
|
'total_count': total_count,
|
|
'positive_count': positive_count,
|
|
'negative_count': negative_count,
|
|
'neutral_count': neutral_count,
|
|
'positive_ratio': positive_count / total_count,
|
|
'negative_ratio': negative_count / total_count,
|
|
'neutral_ratio': neutral_count / total_count,
|
|
'average_compound': average_compound
|
|
}
|
|
|
|
|
|
def test_analyzer_performance(num_tweets: int = 1000) -> float:
|
|
"""
|
|
Test the performance of the sentiment analyzer.
|
|
|
|
Args:
|
|
num_tweets: Number of tweets to test with (default: 1000)
|
|
|
|
Returns:
|
|
Time taken to analyze the tweets in seconds
|
|
"""
|
|
import time
|
|
import random
|
|
|
|
# Generate sample tweets
|
|
sample_tweets = [
|
|
"I love this game! Best match ever!",
|
|
"Terrible performance. Worst team ever.",
|
|
"It's okay, nothing special.",
|
|
"Amazing goal! What a comeback!",
|
|
"Disappointed with the result.",
|
|
"Great teamwork out there!",
|
|
"Could have been better.",
|
|
"Absolutely fantastic!",
|
|
"Not good enough today.",
|
|
"Well played both teams."
|
|
]
|
|
|
|
tweets = [random.choice(sample_tweets) for _ in range(num_tweets)]
|
|
|
|
# Measure time
|
|
start_time = time.time()
|
|
results = analyze_sentiment_batch(tweets)
|
|
end_time = time.time()
|
|
|
|
time_taken = end_time - start_time
|
|
|
|
print(f"Analyzed {len(results)} tweets in {time_taken:.4f} seconds")
|
|
print(f"Performance: {num_tweets / time_taken:.2f} tweets/second")
|
|
|
|
return time_taken
|