chartbastan/backend/tests/test_twitter_scraper.py
2026-02-01 09:31:38 +01:00

187 lines
6.0 KiB
Python

"""
Unit tests for Twitter scraper.
"""
import pytest
from datetime import datetime, timezone
from unittest.mock import Mock, patch, MagicMock
from app.scrapers.twitter_scraper import (
TwitterScraper,
RateLimitInfo,
TweetData,
create_twitter_scraper
)
@pytest.fixture
def mock_tweepy_client():
"""Mock tweepy Client."""
with patch('app.scrapers.twitter_scraper.tweepy.Client') as mock:
yield mock
@pytest.fixture
def test_bearer_token():
"""Test bearer token."""
return "test_bearer_token_12345"
@pytest.fixture
def test_scraper(test_bearer_token):
"""Create test scraper instance."""
with patch('app.scrapers.twitter_scraper.tweepy.Client'):
scraper = TwitterScraper(
bearer_token=test_bearer_token,
max_tweets_per_hour=100,
rate_limit_alert_threshold=0.9,
vip_match_ids=[1, 2, 3]
)
return scraper
class TestRateLimitInfo:
"""Test RateLimitInfo dataclass."""
def test_usage_percentage(self):
"""Test usage percentage calculation."""
info = RateLimitInfo(remaining=100, limit=1000, reset_time=None)
assert info.usage_percentage == 0.9
info = RateLimitInfo(remaining=0, limit=1000, reset_time=None)
assert info.usage_percentage == 1.0
info = RateLimitInfo(remaining=1000, limit=1000, reset_time=None)
assert info.usage_percentage == 0.0
class TestTweetData:
"""Test TweetData dataclass."""
def test_tweet_data_creation(self):
"""Test creating TweetData instance."""
tweet = TweetData(
tweet_id="123456789",
text="Test tweet content",
created_at=datetime.now(timezone.utc),
retweet_count=10,
like_count=20,
match_id=1
)
assert tweet.tweet_id == "123456789"
assert tweet.source == "twitter"
assert tweet.match_id == 1
assert tweet.retweet_count == 10
assert tweet.like_count == 20
class TestTwitterScraper:
"""Test TwitterScraper class."""
def test_scraper_initialization(self, test_bearer_token):
"""Test scraper initialization."""
with patch('app.scrapers.twitter_scraper.tweepy.Client'):
scraper = TwitterScraper(
bearer_token=test_bearer_token,
max_tweets_per_hour=1000,
rate_limit_alert_threshold=0.9,
vip_match_ids=[1, 2, 3]
)
assert scraper.bearer_token == test_bearer_token
assert scraper.max_tweets_per_hour == 1000
assert scraper.rate_limit_alert_threshold == 0.9
assert scraper.vip_match_ids == [1, 2, 3]
assert scraper.vip_mode_only is False
assert scraper.api_calls_made == 0
def test_check_rate_limit_normal(self, test_scraper):
"""Test rate limit check under normal conditions."""
test_scraper.api_calls_made = 800 # 80% usage
assert test_scraper._check_rate_limit() is True
def test_check_rate_limit_alert(self, test_scraper, caplog):
"""Test rate limit alert at threshold."""
test_scraper.api_calls_made = 900 # 90% usage
with caplog.at_level("WARNING"):
result = test_scraper._check_rate_limit()
assert result is True
assert "Rate limit approaching" in caplog.text
def test_check_rate_limit_exceeded(self, test_scraper, caplog):
"""Test rate limit exceeded."""
test_scraper.api_calls_made = 1000 # 100% usage
with caplog.at_level("ERROR"):
result = test_scraper._check_rate_limit()
assert result is False
assert "Rate limit reached" in caplog.text
def test_enable_vip_mode_only(self, test_scraper, caplog):
"""Test enabling VIP mode."""
with caplog.at_level("WARNING"):
test_scraper._enable_vip_mode_only()
assert test_scraper.vip_mode_only is True
assert "ENTERING DEGRADED MODE" in caplog.text
assert "VIP match IDs:" in caplog.text
def test_scrape_non_vip_in_vip_mode(self, test_scraper):
"""Test scraping non-VIP match when VIP mode is active."""
test_scraper.vip_mode_only = True
with pytest.raises(ValueError, match="Match 4 is not VIP"):
test_scraper.scrape_twitter_match(
match_id=4,
keywords=["test"],
max_results=10
)
def test_scrape_vip_in_vip_mode(self, test_scraper, mock_tweepy_client):
"""Test scraping VIP match when VIP mode is active."""
test_scraper.vip_mode_only = True
# Mock API response
mock_response = Mock()
mock_response.data = [
Mock(
id="123456789",
text="Test tweet",
created_at=datetime.now(timezone.utc),
public_metrics={'retweet_count': 10, 'like_count': 20}
)
]
mock_tweepy_client.return_value.search_recent_tweets.return_value = mock_response
result = test_scraper.scrape_twitter_match(
match_id=1,
keywords=["test"],
max_results=10
)
assert len(result) == 1
assert result[0].tweet_id == "123456789"
assert test_scraper.api_calls_made == 1
class TestCreateTwitterScraper:
"""Test create_twitter_scraper factory function."""
def test_factory_function(self, test_bearer_token):
"""Test factory function creates scraper."""
with patch('app.scrapers.twitter_scraper.TwitterScraper') as MockScraper:
mock_instance = Mock()
MockScraper.return_value = mock_instance
result = create_twitter_scraper(
bearer_token=test_bearer_token,
vip_match_ids=[1, 2, 3]
)
MockScraper.assert_called_once()
assert result == mock_instance