Initial commit

2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions
--- a/backend/app/ml/init.py
+++ b/backend/app/ml/init.py
@@ -0,0 +1,2 @@
+# ML Module
+# This module contains machine learning components for sentiment analysis and energy calculations
--- a/backend/app/ml/backtesting.py
+++ b/backend/app/ml/backtesting.py
@@ -0,0 +1,619 @@
+"""
+Backtesting Module.
+
+This module provides functions to run backtesting on historical match data,
+comparing predictions with actual results to calculate accuracy metrics.
+"""
+
+import json
+import csv
+from datetime import datetime
+from typing import Dict, List, Any, Optional
+from io import StringIO
+
+from app.ml.prediction_calculator import calculate_prediction
+
+
+# Validation thresholds
+ACCURACY_VALIDATED_THRESHOLD = 60.0  # >= 60%: System validated
+ACCURACY_ALERT_THRESHOLD = 55.0      # < 55%: Revision required
+
+
+def validate_accuracy(accuracy: float) -> str:
+    """
+    Validate the accuracy of the prediction system.
+
+    Args:
+        accuracy: Accuracy percentage (0.0 - 100.0)
+
+    Returns:
+        'VALIDATED' if accuracy >= 60%,
+        'REVISION_REQUIRED' if accuracy < 55%,
+        'BELOW_TARGET' if 55% <= accuracy < 60%
+
+    Examples:
+        >>> validate_accuracy(70.0)
+        'VALIDATED'
+        >>> validate_accuracy(50.0)
+        'REVISION_REQUIRED'
+        >>> validate_accuracy(58.0)
+        'BELOW_TARGET'
+    """
+    if accuracy >= ACCURACY_VALIDATED_THRESHOLD:
+        return 'VALIDATED'
+    elif accuracy < ACCURACY_ALERT_THRESHOLD:
+        return 'REVISION_REQUIRED'
+    else:
+        return 'BELOW_TARGET'
+
+
+def compare_prediction(predicted_winner: str, actual_winner: str) -> bool:
+    """
+    Compare predicted winner with actual match result.
+
+    Args:
+        predicted_winner: 'home', 'away', or 'draw'
+        actual_winner: 'home', 'away', or 'draw'
+
+    Returns:
+        True if prediction was correct, False otherwise
+
+    Examples:
+        >>> compare_prediction('home', 'home')
+        True
+        >>> compare_prediction('home', 'away')
+        False
+    """
+    return predicted_winner.lower() == actual_winner.lower()
+
+
+def run_backtesting_single_match(
+    match_id: int,
+    home_team: str,
+    away_team: str,
+    home_energy: float,
+    away_energy: float,
+    actual_winner: str
+) -> Dict[str, Any]:
+    """
+    Run backtesting for a single historical match.
+
+    Calculates prediction and compares it with the actual result.
+
+    Args:
+        match_id: Unique match identifier
+        home_team: Name of the home team
+        away_team: Name of the away team
+        home_energy: Energy score of the home team
+        away_energy: Energy score of the away team
+        actual_winner: Actual result ('home', 'away', or 'draw')
+
+    Returns:
+        Dictionary containing match details, prediction, and comparison result
+
+    Examples:
+        >>> result = run_backtesting_single_match(1, 'PSG', 'OM', 65.0, 45.0, 'home')
+        >>> result['correct']
+        True
+    """
+    # Calculate prediction
+    prediction = calculate_prediction(home_energy, away_energy)
+
+    # Compare with actual result
+    is_correct = compare_prediction(prediction['predicted_winner'], actual_winner)
+
+    return {
+        'match_id': match_id,
+        'home_team': home_team,
+        'away_team': away_team,
+        'home_energy': home_energy,
+        'away_energy': away_energy,
+        'prediction': prediction,
+        'actual_winner': actual_winner,
+        'correct': is_correct
+    }
+
+
+def run_backtesting_batch(matches: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Run backtesting on a batch of historical matches.
+
+    Processes multiple matches, calculates predictions, compares with actual
+    results, and generates accuracy metrics and detailed report.
+
+    Args:
+        matches: List of match dictionaries with keys:
+            - match_id (int)
+            - home_team (str)
+            - away_team (str)
+            - home_energy (float)
+            - away_energy (float)
+            - actual_winner (str)
+            - league (str, optional)
+            - date (datetime, optional)
+
+    Returns:
+        Dictionary containing:
+        - total_matches: Number of matches processed
+        - correct_predictions: Number of correct predictions
+        - incorrect_predictions: Number of incorrect predictions
+        - accuracy: Accuracy percentage
+        - status: Validation status (VALIDATED, REVISION_REQUIRED, BELOW_TARGET)
+        - results: List of individual match results
+        - metrics_by_league: Accuracy breakdown by league
+        - timestamp: When the backtesting was run
+
+    Examples:
+        >>> matches = [
+        ...     {'match_id': 1, 'home_team': 'PSG', 'away_team': 'OM',
+        ...      'home_energy': 65.0, 'away_energy': 45.0, 'actual_winner': 'home'},
+        ... ]
+        >>> result = run_backtesting_batch(matches)
+        >>> result['accuracy']
+        100.0
+    """
+    results = []
+    correct_predictions = 0
+    incorrect_predictions = 0
+
+    # Track metrics by league
+    league_metrics: Dict[str, Dict[str, Any]] = {}
+
+    for match in matches:
+        # Validate required fields
+        required_fields = ['match_id', 'home_team', 'away_team',
+                          'home_energy', 'away_energy', 'actual_winner']
+        if not all(field in match for field in required_fields):
+            raise ValueError(f"Match missing required fields: {match}")
+
+        # Extract league and date if available
+        league = match.get('league', 'unknown')
+        match_date = match.get('date')
+
+        # Run backtesting for this match
+        result = run_backtesting_single_match(
+            match_id=match['match_id'],
+            home_team=match['home_team'],
+            away_team=match['away_team'],
+            home_energy=match['home_energy'],
+            away_energy=match['away_energy'],
+            actual_winner=match['actual_winner']
+        )
+
+        # Add league and date to result
+        result['league'] = league
+        result['date'] = match_date.isoformat() if match_date else None
+
+        # Track correctness
+        if result['correct']:
+            correct_predictions += 1
+        else:
+            incorrect_predictions += 1
+
+        # Update league metrics
+        if league not in league_metrics:
+            league_metrics[league] = {
+                'total': 0,
+                'correct': 0,
+                'accuracy': 0.0
+            }
+        league_metrics[league]['total'] += 1
+        if result['correct']:
+            league_metrics[league]['correct'] += 1
+
+        results.append(result)
+
+    # Calculate overall accuracy
+    total_matches = len(matches)
+    accuracy = (correct_predictions / total_matches * 100.0) if total_matches > 0 else 0.0
+
+    # Calculate accuracy per league
+    for league, metrics in league_metrics.items():
+        if metrics['total'] > 0:
+            metrics['accuracy'] = (metrics['correct'] / metrics['total'] * 100.0)
+
+    # Get validation status
+    status = validate_accuracy(accuracy)
+
+    return {
+        'total_matches': total_matches,
+        'correct_predictions': correct_predictions,
+        'incorrect_predictions': incorrect_predictions,
+        'accuracy': round(accuracy, 2),
+        'status': status,
+        'results': results,
+        'metrics_by_league': league_metrics,
+        'timestamp': datetime.utcnow().isoformat(),
+        'validation_thresholds': {
+            'validated': ACCURACY_VALIDATED_THRESHOLD,
+            'alert': ACCURACY_ALERT_THRESHOLD
+        }
+    }
+
+
+def export_to_json(backtesting_result: Dict[str, Any]) -> str:
+    """
+    Export backtesting results to JSON format.
+
+    Args:
+        backtesting_result: Result from run_backtesting_batch
+
+    Returns:
+        JSON formatted string
+
+    Examples:
+        >>> result = run_backtesting_batch(matches)
+        >>> json_output = export_to_json(result)
+        >>> isinstance(json_output, str)
+        True
+    """
+    return json.dumps(backtesting_result, indent=2, default=str)
+
+
+def export_to_csv(backtesting_result: Dict[str, Any]) -> str:
+    """
+    Export backtesting results to CSV format.
+
+    Args:
+        backtesting_result: Result from run_backtesting_batch
+
+    Returns:
+        CSV formatted string
+
+    Examples:
+        >>> result = run_backtesting_batch(matches)
+        >>> csv_output = export_to_csv(result)
+        >>> isinstance(csv_output, str)
+        True
+    """
+    output = StringIO()
+    fieldnames = [
+        'match_id', 'league', 'date', 'home_team', 'away_team',
+        'home_energy', 'away_energy', 'predicted_winner',
+        'confidence', 'actual_winner', 'correct'
+    ]
+
+    writer = csv.DictWriter(output, fieldnames=fieldnames)
+    writer.writeheader()
+
+    for result in backtesting_result.get('results', []):
+        row = {
+            'match_id': result['match_id'],
+            'league': result.get('league', ''),
+            'date': result.get('date', ''),
+            'home_team': result['home_team'],
+            'away_team': result['away_team'],
+            'home_energy': result['home_energy'],
+            'away_energy': result['away_energy'],
+            'predicted_winner': result['prediction']['predicted_winner'],
+            'confidence': result['prediction']['confidence'],
+            'actual_winner': result['actual_winner'],
+            'correct': result['correct']
+        }
+        writer.writerow(row)
+
+    return output.getvalue()
+
+
+def export_to_html(backtesting_result: Dict[str, Any]) -> str:
+    """
+    Export backtesting results to HTML format for publication.
+
+    Args:
+        backtesting_result: Result from run_backtesting_batch
+
+    Returns:
+        HTML formatted string with styling and charts
+
+    Examples:
+        >>> result = run_backtesting_batch(matches)
+        >>> html_output = export_to_html(result)
+        >>> '<html>' in html_output
+        True
+    """
+    status_colors = {
+        'VALIDATED': '#10B981',  # Green
+        'BELOW_TARGET': '#F59E0B',  # Orange
+        'REVISION_REQUIRED': '#EF4444'  # Red
+    }
+
+    status = backtesting_result['status']
+    accuracy = backtesting_result['accuracy']
+    total_matches = backtesting_result['total_matches']
+    correct_predictions = backtesting_result['correct_predictions']
+    incorrect_predictions = backtesting_result['incorrect_predictions']
+
+    # Build HTML
+    html = f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Backtesting Report - ChartBastan</title>
+    <style>
+        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            padding: 20px;
+            min-height: 100vh;
+        }}
+        .container {{
+            max-width: 1200px;
+            margin: 0 auto;
+            background: white;
+            border-radius: 20px;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.3);
+            padding: 40px;
+        }}
+        .header {{
+            text-align: center;
+            margin-bottom: 40px;
+        }}
+        .header h1 {{
+            font-size: 2.5em;
+            color: #667eea;
+            margin-bottom: 10px;
+        }}
+        .header p {{
+            color: #666;
+            font-size: 1.1em;
+        }}
+        .summary {{
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+            gap: 20px;
+            margin-bottom: 40px;
+        }}
+        .card {{
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 25px;
+            border-radius: 15px;
+            text-align: center;
+        }}
+        .card h3 {{
+            font-size: 0.9em;
+            opacity: 0.9;
+            margin-bottom: 10px;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+        }}
+        .card .value {{
+            font-size: 2.5em;
+            font-weight: bold;
+            margin-bottom: 5px;
+        }}
+        .card .sub {{
+            font-size: 0.9em;
+            opacity: 0.9;
+        }}
+        .status-badge {{
+            display: inline-block;
+            padding: 10px 25px;
+            border-radius: 25px;
+            color: white;
+            font-weight: bold;
+            font-size: 1.2em;
+            margin: 20px 0;
+        }}
+        .section {{
+            margin-bottom: 40px;
+        }}
+        .section h2 {{
+            font-size: 1.8em;
+            color: #333;
+            margin-bottom: 20px;
+            padding-bottom: 10px;
+            border-bottom: 3px solid #667eea;
+        }}
+        table {{
+            width: 100%;
+            border-collapse: collapse;
+            margin-top: 20px;
+        }}
+        th, td {{
+            padding: 15px;
+            text-align: left;
+            border-bottom: 1px solid #ddd;
+        }}
+        th {{
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            font-weight: 600;
+            text-transform: uppercase;
+            font-size: 0.85em;
+            letter-spacing: 0.5px;
+        }}
+        tr:hover {{
+            background: #f5f5f5;
+        }}
+        .correct {{
+            color: #10B981;
+            font-weight: bold;
+        }}
+        .incorrect {{
+            color: #EF4444;
+            font-weight: bold;
+        }}
+        .footer {{
+            text-align: center;
+            margin-top: 40px;
+            padding-top: 20px;
+            border-top: 2px solid #ddd;
+            color: #666;
+        }}
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>📊 Backtesting Report</h1>
+            <p>ChartBastan Prediction System Performance Analysis</p>
+            <p style="margin-top: 10px; font-size: 0.9em;">
+                Generated: {backtesting_result.get('timestamp', 'N/A')}
+            </p>
+        </div>
+
+        <div class="summary">
+            <div class="card">
+                <h3>Total Matches</h3>
+                <div class="value">{total_matches}</div>
+                <div class="sub">matches analyzed</div>
+            </div>
+            <div class="card">
+                <h3>Accuracy</h3>
+                <div class="value">{accuracy}%</div>
+                <div class="sub">prediction accuracy</div>
+            </div>
+            <div class="card">
+                <h3>Correct</h3>
+                <div class="value">{correct_predictions}</div>
+                <div class="sub">predictions</div>
+            </div>
+            <div class="card">
+                <h3>Incorrect</h3>
+                <div class="value">{incorrect_predictions}</div>
+                <div class="sub">predictions</div>
+            </div>
+        </div>
+
+        <div style="text-align: center;">
+            <div class="status-badge" style="background-color: {status_colors.get(status, '#666')};">
+                Status: {status}
+            </div>
+        </div>
+
+        <div class="section">
+            <h2>📈 Metrics by League</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>League</th>
+                        <th>Matches</th>
+                        <th>Correct</th>
+                        <th>Accuracy</th>
+                    </tr>
+                </thead>
+                <tbody>
+"""
+
+    # Add league metrics
+    for league, metrics in backtesting_result.get('metrics_by_league', {}).items():
+        html += f"""
+                    <tr>
+                        <td>{league}</td>
+                        <td>{metrics['total']}</td>
+                        <td>{metrics['correct']}</td>
+                        <td>{metrics['accuracy']:.2f}%</td>
+                    </tr>
+"""
+
+    html += """
+                </tbody>
+            </table>
+        </div>
+
+        <div class="section">
+            <h2>📋 Detailed Results</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>Match ID</th>
+                        <th>League</th>
+                        <th>Home vs Away</th>
+                        <th>Prediction</th>
+                        <th>Confidence</th>
+                        <th>Actual</th>
+                        <th>Result</th>
+                    </tr>
+                </thead>
+                <tbody>
+"""
+
+    # Add detailed results
+    for result in backtesting_result.get('results', []):
+        result_class = 'correct' if result['correct'] else 'incorrect'
+        html += f"""
+                    <tr>
+                        <td>{result['match_id']}</td>
+                        <td>{result.get('league', 'N/A')}</td>
+                        <td>{result['home_team']} vs {result['away_team']}</td>
+                        <td>{result['prediction']['predicted_winner']}</td>
+                        <td>{result['prediction']['confidence']:.1f}%</td>
+                        <td>{result['actual_winner']}</td>
+                        <td class="{result_class}">{'✓ Correct' if result['correct'] else '✗ Incorrect'}</td>
+                    </tr>
+"""
+
+    html += """
+                </tbody>
+            </table>
+        </div>
+
+        <div class="footer">
+            <p>🎯 ChartBastan - Football Match Prediction System</p>
+            <p>© 2026 All rights reserved</p>
+        </div>
+    </div>
+</body>
+</html>
+"""
+
+    return html
+
+
+def filter_matches_by_league(matches: List[Dict[str, Any]], leagues: List[str]) -> List[Dict[str, Any]]:
+    """
+    Filter matches by league(s).
+
+    Args:
+        matches: List of match dictionaries
+        leagues: List of league names to include
+
+    Returns:
+        Filtered list of matches
+
+    Examples:
+        >>> matches = [{'league': 'Ligue 1', 'home_team': 'PSG', ...}]
+        >>> filtered = filter_matches_by_league(matches, ['Ligue 1'])
+        >>> len(filtered)
+        1
+    """
+    if not leagues:
+        return matches
+
+    return [m for m in matches if m.get('league') in leagues]
+
+
+def filter_matches_by_period(
+    matches: List[Dict[str, Any]],
+    start_date: Optional[datetime] = None,
+    end_date: Optional[datetime] = None
+) -> List[Dict[str, Any]]:
+    """
+    Filter matches by date period.
+
+    Args:
+        matches: List of match dictionaries
+        start_date: Start date (inclusive), or None for no lower bound
+        end_date: End date (inclusive), or None for no upper bound
+
+    Returns:
+        Filtered list of matches
+
+    Examples:
+        >>> from datetime import datetime
+        >>> matches = [{'date': datetime(2026, 1, 1), ...}]
+        >>> filtered = filter_matches_by_period(matches, datetime(2025, 1, 1))
+    """
+    filtered = matches
+
+    if start_date:
+        filtered = [m for m in filtered if m.get('date') and m['date'] >= start_date]
+
+    if end_date:
+        filtered = [m for m in filtered if m.get('date') and m['date'] <= end_date]
+
+    return filtered
--- a/backend/app/ml/energy_calculator.py
+++ b/backend/app/ml/energy_calculator.py
@@ -0,0 +1,356 @@
+"""
+Energy Calculator Module.
+
+This module calculates collective energy scores based on sentiment analysis
+from multiple sources (Twitter, Reddit, RSS) using a weighted formula.
+
+Formula: Score = (Positive - Negative) × Volume × Virality
+"""
+
+from datetime import datetime
+from typing import Dict, List, Optional
+from logging import getLogger
+
+logger = getLogger(__name__)
+
+# Source weights as specified in requirements
+SOURCE_WEIGHTS = {
+    'twitter': 0.60,
+    'reddit': 0.25,
+    'rss': 0.15
+}
+
+# Temporal weighting parameters
+TEMPORAL_DECAY_HOURS = 48  # Full decay over 48 hours
+MIN_TEMPORAL_WEIGHT = 0.5  # Minimum weight for old tweets
+
+
+def calculate_energy_score(
+    match_id: int,
+    team_id: int,
+    twitter_sentiments: List[Dict[str, float]] = None,
+    reddit_sentiments: List[Dict[str, float]] = None,
+    rss_sentiments: List[Dict[str, float]] = None,
+    tweets_with_timestamps: List[Dict] = None
+) -> Dict[str, any]:
+    """
+    Calculate energy score for a team based on multi-source sentiment data.
+
+    Args:
+        match_id: ID of the match
+        team_id: ID of the team
+        twitter_sentiments: List of Twitter sentiment scores
+        reddit_sentiments: List of Reddit sentiment scores
+        rss_sentiments: List of RSS sentiment scores
+        tweets_with_timestamps: List of tweets with timestamps for temporal weighting
+
+    Returns:
+        Dictionary containing:
+            - score: Final energy score (0-100)
+            - confidence: Confidence level (0-1)
+            - sources_used: List of sources used in calculation
+    """
+    # Initialize with empty lists if None
+    twitter_sentiments = twitter_sentiments or []
+    reddit_sentiments = reddit_sentiments or []
+    rss_sentiments = rss_sentiments or []
+    tweets_with_timestamps = tweets_with_timestamps or []
+
+    # Calculate energy scores for each source using the formula
+    twitter_energy_score = _calculate_source_energy(twitter_sentiments)
+    reddit_energy_score = _calculate_source_energy(reddit_sentiments)
+    rss_energy_score = _calculate_source_energy(rss_sentiments)
+
+    # Determine available sources
+    available_sources = []
+    if twitter_sentiments:
+        available_sources.append('twitter')
+    if reddit_sentiments:
+        available_sources.append('reddit')
+    if rss_sentiments:
+        available_sources.append('rss')
+
+    # Check if no sentiment data is available
+    if not available_sources:
+        logger.warning(f"No sentiment data available for match_id={match_id}, team_id={team_id}")
+        return {
+            'score': 0.0,
+            'confidence': 0.0,
+            'sources_used': []
+        }
+
+    # Apply source weights (with degraded mode adjustment)
+    weighted_score = apply_source_weights(
+        twitter_score=twitter_energy_score,
+        reddit_score=reddit_energy_score,
+        rss_score=rss_energy_score,
+        available_sources=available_sources
+    )
+
+    # Apply temporal weighting if tweets with timestamps are available
+    time_weighted_score = weighted_score
+    if tweets_with_timestamps and available_sources:
+        time_weighted_score = apply_temporal_weighting(
+            base_score=weighted_score,
+            tweets_with_timestamps=tweets_with_timestamps
+        )
+
+    # Normalize score to 0-100 range
+    final_score = normalize_score(time_weighted_score)
+
+    # Calculate confidence level
+    total_weight = sum(SOURCE_WEIGHTS[s] for s in available_sources)
+    confidence = calculate_confidence(
+        available_sources=available_sources,
+        total_weight=total_weight
+    )
+
+    return {
+        'score': final_score,
+        'confidence': confidence,
+        'sources_used': available_sources
+    }
+
+
+def _calculate_source_energy(sentiments: List[Dict[str, float]]) -> float:
+    """
+    Calculate energy score for a single source using the formula:
+    Score = (Positive - Negative) × Volume × Virality
+
+    Args:
+        sentiments: List of sentiment scores with 'positive' and 'negative' keys
+
+    Returns:
+        Energy score for the source (can be negative or positive)
+    """
+    if not sentiments:
+        return 0.0
+
+    # Calculate aggregated metrics
+    total_count = len(sentiments)
+    positive_ratio = sum(s.get('positive', 0) for s in sentiments) / total_count
+    negative_ratio = sum(s.get('negative', 0) for s in sentiments) / total_count
+
+    # Volume: total number of sentiments
+    volume = total_count
+
+    # Virality: average absolute compound score (intensity of sentiment)
+    virality = sum(abs(s.get('compound', 0)) for s in sentiments) / total_count
+
+    # Apply the energy formula
+    energy = (positive_ratio - negative_ratio) * volume * virality
+
+    return energy
+
+
+def apply_source_weights(
+    twitter_score: float,
+    reddit_score: float,
+    rss_score: float,
+    available_sources: List[str]
+) -> float:
+    """
+    Apply source weights to calculate weighted score.
+
+    Args:
+        twitter_score: Energy score from Twitter
+        reddit_score: Energy score from Reddit
+        rss_score: Energy score from RSS
+        available_sources: List of available sources
+
+    Returns:
+        Weighted energy score
+    """
+    if not available_sources:
+        return 0.0
+
+    # Adjust weights for degraded mode
+    adjusted_weights = adjust_weights_for_degraded_mode(
+        original_weights=SOURCE_WEIGHTS,
+        available_sources=available_sources
+    )
+
+    # Calculate weighted score
+    weighted_score = 0.0
+    if 'twitter' in available_sources:
+        weighted_score += twitter_score * adjusted_weights['twitter']
+    if 'reddit' in available_sources:
+        weighted_score += reddit_score * adjusted_weights['reddit']
+    if 'rss' in available_sources:
+        weighted_score += rss_score * adjusted_weights['rss']
+
+    return weighted_score
+
+
+def adjust_weights_for_degraded_mode(
+    original_weights: Dict[str, float],
+    available_sources: List[str]
+) -> Dict[str, float]:
+    """
+    Adjust weights proportionally when sources are unavailable.
+
+    Args:
+        original_weights: Original source weights
+        available_sources: List of available sources
+
+    Returns:
+        Adjusted weights that sum to 1.0
+    """
+    if not available_sources:
+        return {}
+
+    # Calculate total weight of available sources
+    total_weight = sum(original_weights[s] for s in available_sources)
+
+    # Adjust weights proportionally
+    adjusted_weights = {}
+    for source in available_sources:
+        adjusted_weights[source] = original_weights[source] / total_weight
+
+    logger.info(f"Adjusted weights for degraded mode: {adjusted_weights}")
+
+    return adjusted_weights
+
+
+def apply_temporal_weighting(
+    base_score: float,
+    tweets_with_timestamps: List[Dict]
+) -> float:
+    """
+    Apply temporal weighting to energy score based on tweet recency.
+
+    Recent tweets (within 1 hour) have higher weight (1.0)
+    Old tweets (24+ hours) have lower weight (0.5)
+    Decay happens over 48 hours.
+
+    Args:
+        base_score: Base energy score
+        tweets_with_timestamps: List of tweets with 'created_at' timestamps
+
+    Returns:
+        Temporally weighted energy score
+    """
+    if not tweets_with_timestamps:
+        return base_score
+
+    now = datetime.utcnow()
+    weighted_sum = 0.0
+    total_weight = 0.0
+
+    for tweet in tweets_with_timestamps:
+        # Parse timestamp
+        created_at = tweet.get('created_at')
+        if not created_at:
+            continue
+
+        # Calculate time difference in hours
+        if isinstance(created_at, str):
+            created_at = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
+
+        hours_ago = (now - created_at).total_seconds() / 3600
+
+        # Calculate temporal weight (linear decay from 1.0 to 0.5 over 48 hours)
+        time_weight = max(MIN_TEMPORAL_WEIGHT, 1.0 - (hours_ago / TEMPORAL_DECAY_HOURS))
+
+        # Weight the tweet's contribution by its temporal weight
+        sentiment_contribution = tweet.get('compound', 0)
+        weighted_sum += sentiment_contribution * time_weight
+        total_weight += time_weight
+
+    # Calculate weighted average
+    if total_weight > 0:
+        # Adjust base score by temporal factor
+        temporal_factor = weighted_sum / total_weight
+        # Apply temporal weighting to the base score
+        time_weighted_score = base_score * (1 + abs(temporal_factor))
+    else:
+        time_weighted_score = base_score
+
+    return time_weighted_score
+
+
+def normalize_score(score: float) -> float:
+    """
+    Normalize energy score to 0-100 range.
+
+    Args:
+        score: Raw energy score
+
+    Returns:
+        Normalized score between 0 and 100
+    """
+    # Clamp score to 0-100 range
+    normalized = max(0.0, min(100.0, score))
+    return normalized
+
+
+def calculate_confidence(
+    available_sources: List[str],
+    total_weight: float
+) -> float:
+    """
+    Calculate confidence level based on available sources.
+
+    Args:
+        available_sources: List of available sources
+        total_weight: Total weight of available sources
+
+    Returns:
+        Confidence level between 0 and 1
+    """
+    if not available_sources:
+        return 0.0
+
+    # Confidence is based on total weight of available sources
+    # All sources: 0.6 + 0.25 + 0.15 = 1.0 → confidence ~1.0
+    # Single source (Twitter): 0.6 → confidence ~0.6
+    # Single source (RSS): 0.15 → confidence ~0.15
+
+    confidence = total_weight
+
+    return confidence
+
+
+def calculate_energy_score_by_source(
+    source: str,
+    sentiments: List[Dict[str, float]]
+) -> float:
+    """
+    Calculate energy score for a single source.
+
+    Args:
+        source: Source name ('twitter', 'reddit', or 'rss')
+        sentiments: List of sentiment scores
+
+    Returns:
+        Energy score for the source
+    """
+    if source not in SOURCE_WEIGHTS:
+        logger.warning(f"Unknown source: {source}")
+        return 0.0
+
+    energy_score = _calculate_source_energy(sentiments)
+    return energy_score
+
+
+def get_source_weights() -> Dict[str, float]:
+    """
+    Get the current source weights.
+
+    Returns:
+        Dictionary of source weights
+    """
+    return SOURCE_WEIGHTS.copy()
+
+
+def get_temporal_weighting_parameters() -> Dict[str, float]:
+    """
+    Get the current temporal weighting parameters.
+
+    Returns:
+        Dictionary of temporal weighting parameters
+    """
+    return {
+        'decay_hours': TEMPORAL_DECAY_HOURS,
+        'min_weight': MIN_TEMPORAL_WEIGHT
+    }
--- a/backend/app/ml/prediction_calculator.py
+++ b/backend/app/ml/prediction_calculator.py
@@ -0,0 +1,146 @@
+"""
+Prediction Calculator Module.
+
+This module provides functions to calculate match predictions based on
+energy scores from sentiment analysis.
+"""
+
+from typing import Dict, Any
+
+
+def calculate_confidence_meter(home_energy: float, away_energy: float) -> float:
+    """
+    Calculate the Confidence Meter (0-100%) based on energy difference.
+    
+    The Confidence Meter represents how confident we are in the prediction
+    based on the difference in energy scores between the two teams.
+    
+    Formula: min(100, abs(home_energy - away_energy) * 2)
+    
+    Args:
+        home_energy: Energy score of the home team (float, any value)
+        away_energy: Energy score of the away team (float, any value)
+    
+    Returns:
+        Confidence score between 0.0 and 100.0
+    
+    Examples:
+        >>> calculate_confidence_meter(50.0, 50.0)
+        0.0
+        >>> calculate_confidence_meter(60.0, 50.0)
+        20.0
+        >>> calculate_confidence_meter(100.0, 50.0)
+        100.0
+    """
+    energy_diff = abs(home_energy - away_energy)
+    confidence = min(100.0, energy_diff * 2.0)
+    return confidence
+
+
+def determine_winner(home_energy: float, away_energy: float) -> str:
+    """
+    Determine the predicted winner based on energy scores.
+    
+    Args:
+        home_energy: Energy score of the home team
+        away_energy: Energy score of the away team
+    
+    Returns:
+        'home' if home team has higher energy,
+        'away' if away team has higher energy,
+        'draw' if energies are equal
+    
+    Examples:
+        >>> determine_winner(60.0, 40.0)
+        'home'
+        >>> determine_winner(40.0, 60.0)
+        'away'
+        >>> determine_winner(50.0, 50.0)
+        'draw'
+    """
+    if home_energy > away_energy:
+        return 'home'
+    elif away_energy > home_energy:
+        return 'away'
+    else:
+        return 'draw'
+
+
+def calculate_prediction(home_energy: float, away_energy: float) -> Dict[str, Any]:
+    """
+    Calculate a complete match prediction based on energy scores.
+    
+    This function combines confidence calculation and winner determination
+    to provide a comprehensive prediction result.
+    
+    Args:
+        home_energy: Energy score of the home team
+        away_energy: Energy score of the away team
+    
+    Returns:
+        Dictionary containing:
+        - confidence: Confidence score (0.0 - 100.0)
+        - predicted_winner: 'home', 'away', or 'draw'
+        - home_energy: Original home energy score
+        - away_energy: Original away energy score
+    
+    Examples:
+        >>> calculate_prediction(65.0, 45.0)
+        {'confidence': 40.0, 'predicted_winner': 'home', 
+         'home_energy': 65.0, 'away_energy': 45.0}
+    """
+    confidence = calculate_confidence_meter(home_energy, away_energy)
+    predicted_winner = determine_winner(home_energy, away_energy)
+    
+    return {
+        'confidence': confidence,
+        'predicted_winner': predicted_winner,
+        'home_energy': home_energy,
+        'away_energy': away_energy
+    }
+
+
+def validate_prediction_result(result: Dict[str, Any]) -> bool:
+    """
+    Validate that a prediction result contains all required fields and valid values.
+    
+    Args:
+        result: Dictionary to validate
+    
+    Returns:
+        True if valid, False otherwise
+    
+    Examples:
+        >>> validate_prediction_result({'confidence': 75.0, 'predicted_winner': 'home',
+        ...                            'home_energy': 65.0, 'away_energy': 45.0})
+        True
+        >>> validate_prediction_result({'confidence': -10.0, 'predicted_winner': 'home',
+        ...                            'home_energy': 65.0, 'away_energy': 45.0})
+        False
+    """
+    # Check required fields
+    required_fields = ['confidence', 'predicted_winner', 'home_energy', 'away_energy']
+    if not all(field in result for field in required_fields):
+        return False
+    
+    # Validate confidence
+    confidence = result['confidence']
+    if not isinstance(confidence, (int, float)):
+        return False
+    if confidence < 0.0 or confidence > 100.0:
+        return False
+    
+    # Validate predicted_winner
+    winner = result['predicted_winner']
+    if winner not in ['home', 'away', 'draw']:
+        return False
+    
+    # Validate energy scores (should be non-negative)
+    home_energy = result['home_energy']
+    away_energy = result['away_energy']
+    if not isinstance(home_energy, (int, float)) or not isinstance(away_energy, (int, float)):
+        return False
+    if home_energy < 0.0 or away_energy < 0.0:
+        return False
+    
+    return True
--- a/backend/app/ml/sentiment_analyzer.py
+++ b/backend/app/ml/sentiment_analyzer.py
@@ -0,0 +1,178 @@
+"""
+Sentiment Analyzer Module
+Uses VADER (Valence Aware Dictionary and sEntiment Reasoner) for sentiment analysis.
+"""
+
+from typing import Dict, List, Optional
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+# Initialize the VADER analyzer globally for better performance
+_analyzer = SentimentIntensityAnalyzer()
+
+
+def classify_sentiment(compound: float) -> str:
+    """
+    Classify sentiment based on compound score.
+
+    Args:
+        compound: Compound sentiment score (-1 to 1)
+
+    Returns:
+        Sentiment classification: 'positive', 'negative', or 'neutral'
+    """
+    if compound >= 0.05:
+        return 'positive'
+    elif compound <= -0.05:
+        return 'negative'
+    else:
+        return 'neutral'
+
+
+def analyze_sentiment(text: str) -> Dict[str, float]:
+    """
+    Analyze sentiment of a text using VADER.
+
+    Args:
+        text: Text to analyze
+
+    Returns:
+        Dictionary with sentiment scores:
+            - compound: Overall compound score (-1 to 1)
+            - positive: Positive proportion (0 to 1)
+            - negative: Negative proportion (0 to 1)
+            - neutral: Neutral proportion (0 to 1)
+            - sentiment: Classification ('positive', 'negative', or 'neutral')
+    """
+    if not text or not isinstance(text, str):
+        raise ValueError("Text must be a non-empty string")
+
+    scores = _analyzer.polarity_scores(text)
+    sentiment = classify_sentiment(scores['compound'])
+
+    return {
+        'compound': scores['compound'],
+        'positive': scores['pos'],
+        'negative': scores['neg'],
+        'neutral': scores['neu'],
+        'sentiment': sentiment
+    }
+
+
+def analyze_sentiment_batch(texts: List[str]) -> List[Dict[str, float]]:
+    """
+    Analyze sentiment of multiple texts in batch for better performance.
+
+    Args:
+        texts: List of texts to analyze
+
+    Returns:
+        List of sentiment score dictionaries
+    """
+    results = []
+    for text in texts:
+        try:
+            result = analyze_sentiment(text)
+            results.append(result)
+        except ValueError as e:
+            # Log error but continue processing other texts
+            print(f"Error analyzing text: {e}")
+            results.append({
+                'compound': 0.0,
+                'positive': 0.0,
+                'negative': 0.0,
+                'neutral': 1.0,
+                'sentiment': 'neutral'
+            })
+
+    return results
+
+
+def calculate_aggregated_metrics(sentiments: List[Dict[str, float]]) -> Dict[str, float]:
+    """
+    Calculate aggregated metrics from a list of sentiment analyses.
+
+    Args:
+        sentiments: List of sentiment score dictionaries
+
+    Returns:
+        Dictionary with aggregated metrics:
+            - total_count: Total number of sentiments
+            - positive_count: Count of positive sentiments
+            - negative_count: Count of negative sentiments
+            - neutral_count: Count of neutral sentiments
+            - positive_ratio: Ratio of positive sentiments (0 to 1)
+            - negative_ratio: Ratio of negative sentiments (0 to 1)
+            - neutral_ratio: Ratio of neutral sentiments (0 to 1)
+            - average_compound: Average compound score
+    """
+    if not sentiments:
+        return {
+            'total_count': 0,
+            'positive_count': 0,
+            'negative_count': 0,
+            'neutral_count': 0,
+            'positive_ratio': 0.0,
+            'negative_ratio': 0.0,
+            'neutral_ratio': 0.0,
+            'average_compound': 0.0
+        }
+
+    total_count = len(sentiments)
+    positive_count = sum(1 for s in sentiments if s['sentiment'] == 'positive')
+    negative_count = sum(1 for s in sentiments if s['sentiment'] == 'negative')
+    neutral_count = sum(1 for s in sentiments if s['sentiment'] == 'neutral')
+
+    average_compound = sum(s['compound'] for s in sentiments) / total_count
+
+    return {
+        'total_count': total_count,
+        'positive_count': positive_count,
+        'negative_count': negative_count,
+        'neutral_count': neutral_count,
+        'positive_ratio': positive_count / total_count,
+        'negative_ratio': negative_count / total_count,
+        'neutral_ratio': neutral_count / total_count,
+        'average_compound': average_compound
+    }
+
+
+def test_analyzer_performance(num_tweets: int = 1000) -> float:
+    """
+    Test the performance of the sentiment analyzer.
+
+    Args:
+        num_tweets: Number of tweets to test with (default: 1000)
+
+    Returns:
+        Time taken to analyze the tweets in seconds
+    """
+    import time
+    import random
+
+    # Generate sample tweets
+    sample_tweets = [
+        "I love this game! Best match ever!",
+        "Terrible performance. Worst team ever.",
+        "It's okay, nothing special.",
+        "Amazing goal! What a comeback!",
+        "Disappointed with the result.",
+        "Great teamwork out there!",
+        "Could have been better.",
+        "Absolutely fantastic!",
+        "Not good enough today.",
+        "Well played both teams."
+    ]
+
+    tweets = [random.choice(sample_tweets) for _ in range(num_tweets)]
+
+    # Measure time
+    start_time = time.time()
+    results = analyze_sentiment_batch(tweets)
+    end_time = time.time()
+
+    time_taken = end_time - start_time
+
+    print(f"Analyzed {len(results)} tweets in {time_taken:.4f} seconds")
+    print(f"Performance: {num_tweets / time_taken:.2f} tweets/second")
+
+    return time_taken