247 lines
7.9 KiB
Python
247 lines
7.9 KiB
Python
"""
|
|
Tests for scraping worker.
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import Mock, patch
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.workers.scraping_worker import (
|
|
ScrapingWorker,
|
|
create_scraping_worker
|
|
)
|
|
|
|
|
|
class TestScrapingWorker:
|
|
"""Tests for ScrapingWorker class."""
|
|
|
|
def test_initialization(self):
|
|
"""Test scraping worker initialization."""
|
|
worker = ScrapingWorker(
|
|
twitter_bearer_token="test_token",
|
|
reddit_client_id="test_id",
|
|
reddit_client_secret="test_secret"
|
|
)
|
|
|
|
assert worker.twitter_bearer_token == "test_token"
|
|
assert worker.reddit_client_id == "test_id"
|
|
assert worker.reddit_client_secret == "test_secret"
|
|
assert worker.twitter_scraper is None
|
|
assert worker.reddit_scraper is None
|
|
|
|
def test_execute_scraping_task_twitter(self):
|
|
"""Test executing a Twitter scraping task."""
|
|
# Create worker
|
|
worker = ScrapingWorker(
|
|
twitter_bearer_token="test_token",
|
|
reddit_client_id="test_id",
|
|
reddit_client_secret="test_secret"
|
|
)
|
|
|
|
# Mock Twitter scraper
|
|
mock_twitter_scraper = Mock()
|
|
worker.twitter_scraper = mock_twitter_scraper
|
|
mock_twitter_scraper.scrape_and_save.return_value = [Mock()] * 50
|
|
|
|
# Mock database session
|
|
mock_db = Mock(spec=Session)
|
|
|
|
# Execute task
|
|
task = {
|
|
'match_id': 123,
|
|
'source': 'twitter',
|
|
'keywords': ['#MatchName'],
|
|
'priority': 'normal'
|
|
}
|
|
|
|
result = worker.execute_scraping_task(task, mock_db)
|
|
|
|
# Verify scraping called
|
|
mock_twitter_scraper.scrape_and_save.assert_called_once_with(
|
|
match_id=123,
|
|
keywords=['#MatchName'],
|
|
db=mock_db,
|
|
max_results=100
|
|
)
|
|
|
|
# Verify result
|
|
assert result['collected_count'] == 50
|
|
assert result['status'] == 'success'
|
|
assert result['metadata']['source'] == 'twitter'
|
|
assert result['metadata']['match_id'] == 123
|
|
|
|
def test_execute_scraping_task_reddit(self):
|
|
"""Test executing a Reddit scraping task."""
|
|
# Create worker
|
|
worker = ScrapingWorker(
|
|
twitter_bearer_token="test_token",
|
|
reddit_client_id="test_id",
|
|
reddit_client_secret="test_secret"
|
|
)
|
|
|
|
# Mock Reddit scraper
|
|
mock_reddit_scraper = Mock()
|
|
worker.reddit_scraper = mock_reddit_scraper
|
|
mock_reddit_scraper.scrape_and_save.return_value = {
|
|
'posts': [Mock()] * 20,
|
|
'comments': [Mock()] * 30
|
|
}
|
|
|
|
# Mock database session
|
|
mock_db = Mock(spec=Session)
|
|
|
|
# Execute task
|
|
task = {
|
|
'match_id': 456,
|
|
'source': 'reddit',
|
|
'keywords': ['Ligue1'],
|
|
'priority': 'vip'
|
|
}
|
|
|
|
result = worker.execute_scraping_task(task, mock_db)
|
|
|
|
# Verify scraping called
|
|
mock_reddit_scraper.scrape_and_save.assert_called_once_with(
|
|
match_id=456,
|
|
db=mock_db,
|
|
keywords=['Ligue1'],
|
|
scrape_comments=True
|
|
)
|
|
|
|
# Verify result
|
|
assert result['collected_count'] == 50 # 20 posts + 30 comments
|
|
assert result['status'] == 'success'
|
|
assert result['metadata']['source'] == 'reddit'
|
|
assert result['metadata']['match_id'] == 456
|
|
assert result['metadata']['posts_count'] == 20
|
|
assert result['metadata']['comments_count'] == 30
|
|
|
|
def test_execute_scraping_task_unknown_source(self):
|
|
"""Test executing task with unknown source."""
|
|
worker = ScrapingWorker(
|
|
twitter_bearer_token="test_token",
|
|
reddit_client_id="test_id",
|
|
reddit_client_secret="test_secret"
|
|
)
|
|
|
|
mock_db = Mock(spec=Session)
|
|
|
|
# Execute task with unknown source
|
|
task = {
|
|
'match_id': 123,
|
|
'source': 'unknown',
|
|
'keywords': ['#MatchName']
|
|
}
|
|
|
|
result = worker.execute_scraping_task(task, mock_db)
|
|
|
|
# Verify error result
|
|
assert result['collected_count'] == 0
|
|
assert result['status'] == 'error'
|
|
assert 'error' in result
|
|
assert 'Unknown source' in result['error']
|
|
|
|
def test_execute_scraping_task_twitter_error(self):
|
|
"""Test handling Twitter scraping errors."""
|
|
worker = ScrapingWorker(
|
|
twitter_bearer_token="test_token",
|
|
reddit_client_id="test_id",
|
|
reddit_client_secret="test_secret"
|
|
)
|
|
|
|
# Mock Twitter scraper with error
|
|
mock_twitter_scraper = Mock()
|
|
worker.twitter_scraper = mock_twitter_scraper
|
|
mock_twitter_scraper.scrape_and_save.side_effect = Exception("API Error")
|
|
|
|
mock_db = Mock(spec=Session)
|
|
|
|
# Execute task
|
|
task = {
|
|
'match_id': 123,
|
|
'source': 'twitter',
|
|
'keywords': ['#MatchName']
|
|
}
|
|
|
|
result = worker.execute_scraping_task(task, mock_db)
|
|
|
|
# Verify error handling
|
|
assert result['collected_count'] == 0
|
|
assert result['status'] == 'error'
|
|
assert 'error' in result
|
|
|
|
@patch('app.workers.scraping_worker.create_twitter_scraper')
|
|
def test_get_twitter_scraper_lazy_initialization(self, mock_create_scraper):
|
|
"""Test lazy initialization of Twitter scraper."""
|
|
worker = ScrapingWorker(
|
|
twitter_bearer_token="test_token",
|
|
reddit_client_id="test_id",
|
|
reddit_client_secret="test_secret"
|
|
)
|
|
|
|
# First call should create scraper
|
|
mock_scraper_instance = Mock()
|
|
mock_create_scraper.return_value = mock_scraper_instance
|
|
|
|
scraper1 = worker._get_twitter_scraper()
|
|
|
|
# Verify creation
|
|
mock_create_scraper.assert_called_once_with(
|
|
bearer_token="test_token",
|
|
vip_match_ids=[]
|
|
)
|
|
assert scraper1 == mock_scraper_instance
|
|
|
|
# Second call should return same instance
|
|
scraper2 = worker._get_twitter_scraper()
|
|
assert scraper2 == scraper1
|
|
|
|
# Verify not created again
|
|
assert mock_create_scraper.call_count == 1
|
|
|
|
@patch('app.workers.scraping_worker.create_reddit_scraper')
|
|
def test_get_reddit_scraper_lazy_initialization(self, mock_create_scraper):
|
|
"""Test lazy initialization of Reddit scraper."""
|
|
worker = ScrapingWorker(
|
|
twitter_bearer_token="test_token",
|
|
reddit_client_id="test_id",
|
|
reddit_client_secret="test_secret"
|
|
)
|
|
|
|
# First call should create scraper
|
|
mock_scraper_instance = Mock()
|
|
mock_create_scraper.return_value = mock_scraper_instance
|
|
|
|
scraper1 = worker._get_reddit_scraper()
|
|
|
|
# Verify creation
|
|
mock_create_scraper.assert_called_once_with(
|
|
client_id="test_id",
|
|
client_secret="test_secret"
|
|
)
|
|
assert scraper1 == mock_scraper_instance
|
|
|
|
# Second call should return same instance
|
|
scraper2 = worker._get_reddit_scraper()
|
|
assert scraper2 == scraper1
|
|
|
|
# Verify not created again
|
|
assert mock_create_scraper.call_count == 1
|
|
|
|
|
|
class TestCreateScrapingWorker:
|
|
"""Tests for create_scraping_worker factory function."""
|
|
|
|
def test_create_scraping_worker(self):
|
|
"""Test creating a scraping worker."""
|
|
worker = create_scraping_worker(
|
|
twitter_bearer_token="token123",
|
|
reddit_client_id="id456",
|
|
reddit_client_secret="secret789"
|
|
)
|
|
|
|
assert isinstance(worker, ScrapingWorker)
|
|
assert worker.twitter_bearer_token == "token123"
|
|
assert worker.reddit_client_id == "id456"
|
|
assert worker.reddit_client_secret == "secret789"
|