""" Tests for scraping worker. """ import pytest from unittest.mock import Mock, patch from sqlalchemy.orm import Session from app.workers.scraping_worker import ( ScrapingWorker, create_scraping_worker ) class TestScrapingWorker: """Tests for ScrapingWorker class.""" def test_initialization(self): """Test scraping worker initialization.""" worker = ScrapingWorker( twitter_bearer_token="test_token", reddit_client_id="test_id", reddit_client_secret="test_secret" ) assert worker.twitter_bearer_token == "test_token" assert worker.reddit_client_id == "test_id" assert worker.reddit_client_secret == "test_secret" assert worker.twitter_scraper is None assert worker.reddit_scraper is None def test_execute_scraping_task_twitter(self): """Test executing a Twitter scraping task.""" # Create worker worker = ScrapingWorker( twitter_bearer_token="test_token", reddit_client_id="test_id", reddit_client_secret="test_secret" ) # Mock Twitter scraper mock_twitter_scraper = Mock() worker.twitter_scraper = mock_twitter_scraper mock_twitter_scraper.scrape_and_save.return_value = [Mock()] * 50 # Mock database session mock_db = Mock(spec=Session) # Execute task task = { 'match_id': 123, 'source': 'twitter', 'keywords': ['#MatchName'], 'priority': 'normal' } result = worker.execute_scraping_task(task, mock_db) # Verify scraping called mock_twitter_scraper.scrape_and_save.assert_called_once_with( match_id=123, keywords=['#MatchName'], db=mock_db, max_results=100 ) # Verify result assert result['collected_count'] == 50 assert result['status'] == 'success' assert result['metadata']['source'] == 'twitter' assert result['metadata']['match_id'] == 123 def test_execute_scraping_task_reddit(self): """Test executing a Reddit scraping task.""" # Create worker worker = ScrapingWorker( twitter_bearer_token="test_token", reddit_client_id="test_id", reddit_client_secret="test_secret" ) # Mock Reddit scraper mock_reddit_scraper = Mock() worker.reddit_scraper = mock_reddit_scraper mock_reddit_scraper.scrape_and_save.return_value = { 'posts': [Mock()] * 20, 'comments': [Mock()] * 30 } # Mock database session mock_db = Mock(spec=Session) # Execute task task = { 'match_id': 456, 'source': 'reddit', 'keywords': ['Ligue1'], 'priority': 'vip' } result = worker.execute_scraping_task(task, mock_db) # Verify scraping called mock_reddit_scraper.scrape_and_save.assert_called_once_with( match_id=456, db=mock_db, keywords=['Ligue1'], scrape_comments=True ) # Verify result assert result['collected_count'] == 50 # 20 posts + 30 comments assert result['status'] == 'success' assert result['metadata']['source'] == 'reddit' assert result['metadata']['match_id'] == 456 assert result['metadata']['posts_count'] == 20 assert result['metadata']['comments_count'] == 30 def test_execute_scraping_task_unknown_source(self): """Test executing task with unknown source.""" worker = ScrapingWorker( twitter_bearer_token="test_token", reddit_client_id="test_id", reddit_client_secret="test_secret" ) mock_db = Mock(spec=Session) # Execute task with unknown source task = { 'match_id': 123, 'source': 'unknown', 'keywords': ['#MatchName'] } result = worker.execute_scraping_task(task, mock_db) # Verify error result assert result['collected_count'] == 0 assert result['status'] == 'error' assert 'error' in result assert 'Unknown source' in result['error'] def test_execute_scraping_task_twitter_error(self): """Test handling Twitter scraping errors.""" worker = ScrapingWorker( twitter_bearer_token="test_token", reddit_client_id="test_id", reddit_client_secret="test_secret" ) # Mock Twitter scraper with error mock_twitter_scraper = Mock() worker.twitter_scraper = mock_twitter_scraper mock_twitter_scraper.scrape_and_save.side_effect = Exception("API Error") mock_db = Mock(spec=Session) # Execute task task = { 'match_id': 123, 'source': 'twitter', 'keywords': ['#MatchName'] } result = worker.execute_scraping_task(task, mock_db) # Verify error handling assert result['collected_count'] == 0 assert result['status'] == 'error' assert 'error' in result @patch('app.workers.scraping_worker.create_twitter_scraper') def test_get_twitter_scraper_lazy_initialization(self, mock_create_scraper): """Test lazy initialization of Twitter scraper.""" worker = ScrapingWorker( twitter_bearer_token="test_token", reddit_client_id="test_id", reddit_client_secret="test_secret" ) # First call should create scraper mock_scraper_instance = Mock() mock_create_scraper.return_value = mock_scraper_instance scraper1 = worker._get_twitter_scraper() # Verify creation mock_create_scraper.assert_called_once_with( bearer_token="test_token", vip_match_ids=[] ) assert scraper1 == mock_scraper_instance # Second call should return same instance scraper2 = worker._get_twitter_scraper() assert scraper2 == scraper1 # Verify not created again assert mock_create_scraper.call_count == 1 @patch('app.workers.scraping_worker.create_reddit_scraper') def test_get_reddit_scraper_lazy_initialization(self, mock_create_scraper): """Test lazy initialization of Reddit scraper.""" worker = ScrapingWorker( twitter_bearer_token="test_token", reddit_client_id="test_id", reddit_client_secret="test_secret" ) # First call should create scraper mock_scraper_instance = Mock() mock_create_scraper.return_value = mock_scraper_instance scraper1 = worker._get_reddit_scraper() # Verify creation mock_create_scraper.assert_called_once_with( client_id="test_id", client_secret="test_secret" ) assert scraper1 == mock_scraper_instance # Second call should return same instance scraper2 = worker._get_reddit_scraper() assert scraper2 == scraper1 # Verify not created again assert mock_create_scraper.call_count == 1 class TestCreateScrapingWorker: """Tests for create_scraping_worker factory function.""" def test_create_scraping_worker(self): """Test creating a scraping worker.""" worker = create_scraping_worker( twitter_bearer_token="token123", reddit_client_id="id456", reddit_client_secret="secret789" ) assert isinstance(worker, ScrapingWorker) assert worker.twitter_bearer_token == "token123" assert worker.reddit_client_id == "id456" assert worker.reddit_client_secret == "secret789"