chartbastan/backend/tests/test_scraping_worker.py
2026-02-01 09:31:38 +01:00

247 lines
7.9 KiB
Python

"""
Tests for scraping worker.
"""
import pytest
from unittest.mock import Mock, patch
from sqlalchemy.orm import Session
from app.workers.scraping_worker import (
ScrapingWorker,
create_scraping_worker
)
class TestScrapingWorker:
"""Tests for ScrapingWorker class."""
def test_initialization(self):
"""Test scraping worker initialization."""
worker = ScrapingWorker(
twitter_bearer_token="test_token",
reddit_client_id="test_id",
reddit_client_secret="test_secret"
)
assert worker.twitter_bearer_token == "test_token"
assert worker.reddit_client_id == "test_id"
assert worker.reddit_client_secret == "test_secret"
assert worker.twitter_scraper is None
assert worker.reddit_scraper is None
def test_execute_scraping_task_twitter(self):
"""Test executing a Twitter scraping task."""
# Create worker
worker = ScrapingWorker(
twitter_bearer_token="test_token",
reddit_client_id="test_id",
reddit_client_secret="test_secret"
)
# Mock Twitter scraper
mock_twitter_scraper = Mock()
worker.twitter_scraper = mock_twitter_scraper
mock_twitter_scraper.scrape_and_save.return_value = [Mock()] * 50
# Mock database session
mock_db = Mock(spec=Session)
# Execute task
task = {
'match_id': 123,
'source': 'twitter',
'keywords': ['#MatchName'],
'priority': 'normal'
}
result = worker.execute_scraping_task(task, mock_db)
# Verify scraping called
mock_twitter_scraper.scrape_and_save.assert_called_once_with(
match_id=123,
keywords=['#MatchName'],
db=mock_db,
max_results=100
)
# Verify result
assert result['collected_count'] == 50
assert result['status'] == 'success'
assert result['metadata']['source'] == 'twitter'
assert result['metadata']['match_id'] == 123
def test_execute_scraping_task_reddit(self):
"""Test executing a Reddit scraping task."""
# Create worker
worker = ScrapingWorker(
twitter_bearer_token="test_token",
reddit_client_id="test_id",
reddit_client_secret="test_secret"
)
# Mock Reddit scraper
mock_reddit_scraper = Mock()
worker.reddit_scraper = mock_reddit_scraper
mock_reddit_scraper.scrape_and_save.return_value = {
'posts': [Mock()] * 20,
'comments': [Mock()] * 30
}
# Mock database session
mock_db = Mock(spec=Session)
# Execute task
task = {
'match_id': 456,
'source': 'reddit',
'keywords': ['Ligue1'],
'priority': 'vip'
}
result = worker.execute_scraping_task(task, mock_db)
# Verify scraping called
mock_reddit_scraper.scrape_and_save.assert_called_once_with(
match_id=456,
db=mock_db,
keywords=['Ligue1'],
scrape_comments=True
)
# Verify result
assert result['collected_count'] == 50 # 20 posts + 30 comments
assert result['status'] == 'success'
assert result['metadata']['source'] == 'reddit'
assert result['metadata']['match_id'] == 456
assert result['metadata']['posts_count'] == 20
assert result['metadata']['comments_count'] == 30
def test_execute_scraping_task_unknown_source(self):
"""Test executing task with unknown source."""
worker = ScrapingWorker(
twitter_bearer_token="test_token",
reddit_client_id="test_id",
reddit_client_secret="test_secret"
)
mock_db = Mock(spec=Session)
# Execute task with unknown source
task = {
'match_id': 123,
'source': 'unknown',
'keywords': ['#MatchName']
}
result = worker.execute_scraping_task(task, mock_db)
# Verify error result
assert result['collected_count'] == 0
assert result['status'] == 'error'
assert 'error' in result
assert 'Unknown source' in result['error']
def test_execute_scraping_task_twitter_error(self):
"""Test handling Twitter scraping errors."""
worker = ScrapingWorker(
twitter_bearer_token="test_token",
reddit_client_id="test_id",
reddit_client_secret="test_secret"
)
# Mock Twitter scraper with error
mock_twitter_scraper = Mock()
worker.twitter_scraper = mock_twitter_scraper
mock_twitter_scraper.scrape_and_save.side_effect = Exception("API Error")
mock_db = Mock(spec=Session)
# Execute task
task = {
'match_id': 123,
'source': 'twitter',
'keywords': ['#MatchName']
}
result = worker.execute_scraping_task(task, mock_db)
# Verify error handling
assert result['collected_count'] == 0
assert result['status'] == 'error'
assert 'error' in result
@patch('app.workers.scraping_worker.create_twitter_scraper')
def test_get_twitter_scraper_lazy_initialization(self, mock_create_scraper):
"""Test lazy initialization of Twitter scraper."""
worker = ScrapingWorker(
twitter_bearer_token="test_token",
reddit_client_id="test_id",
reddit_client_secret="test_secret"
)
# First call should create scraper
mock_scraper_instance = Mock()
mock_create_scraper.return_value = mock_scraper_instance
scraper1 = worker._get_twitter_scraper()
# Verify creation
mock_create_scraper.assert_called_once_with(
bearer_token="test_token",
vip_match_ids=[]
)
assert scraper1 == mock_scraper_instance
# Second call should return same instance
scraper2 = worker._get_twitter_scraper()
assert scraper2 == scraper1
# Verify not created again
assert mock_create_scraper.call_count == 1
@patch('app.workers.scraping_worker.create_reddit_scraper')
def test_get_reddit_scraper_lazy_initialization(self, mock_create_scraper):
"""Test lazy initialization of Reddit scraper."""
worker = ScrapingWorker(
twitter_bearer_token="test_token",
reddit_client_id="test_id",
reddit_client_secret="test_secret"
)
# First call should create scraper
mock_scraper_instance = Mock()
mock_create_scraper.return_value = mock_scraper_instance
scraper1 = worker._get_reddit_scraper()
# Verify creation
mock_create_scraper.assert_called_once_with(
client_id="test_id",
client_secret="test_secret"
)
assert scraper1 == mock_scraper_instance
# Second call should return same instance
scraper2 = worker._get_reddit_scraper()
assert scraper2 == scraper1
# Verify not created again
assert mock_create_scraper.call_count == 1
class TestCreateScrapingWorker:
"""Tests for create_scraping_worker factory function."""
def test_create_scraping_worker(self):
"""Test creating a scraping worker."""
worker = create_scraping_worker(
twitter_bearer_token="token123",
reddit_client_id="id456",
reddit_client_secret="secret789"
)
assert isinstance(worker, ScrapingWorker)
assert worker.twitter_bearer_token == "token123"
assert worker.reddit_client_id == "id456"
assert worker.reddit_client_secret == "secret789"