office_translator/tests/test_glossary_service.py

"""
Tests for Glossary Service
Story 3.10: Glossaires - Application lors Traduction LLM
"""

import pytest
from unittest.mock import Mock, patch, MagicMock
import uuid

from services.glossary_service import (
    get_glossary_terms,
    validate_glossary_access,
    format_glossary_for_prompt,
    build_full_prompt,
)
from utils.exceptions import GlossaryNotFoundError


class TestGetGlossaryTerms:
    """Tests for get_glossary_terms function."""

    def test_get_glossary_terms_success(self):
        """Test retrieving terms from an existing glossary."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        # Mock the database session and models
        mock_glossary = Mock()
        mock_glossary.id = glossary_id
        mock_glossary.user_id = user_id

        mock_term1 = Mock()
        mock_term1.source = "cloud computing"
        mock_term1.target = "informatique en nuage"

        mock_term2 = Mock()
        mock_term2.source = "machine learning"
        mock_term2.target = "apprentissage automatique"

        with patch('services.glossary_service.get_sync_session') as mock_session:
            mock_context = MagicMock()
            mock_session.return_value.__enter__ = Mock(return_value=mock_context)
            mock_session.return_value.__exit__ = Mock(return_value=False)

            # First call: glossary query, Second call: terms query
            mock_glossary_query = MagicMock()
            mock_terms_query = MagicMock()
            mock_context.query.side_effect = [mock_glossary_query, mock_terms_query]

            mock_glossary_query.filter.return_value = mock_glossary_query
            mock_glossary_query.first.return_value = mock_glossary

            mock_terms_query.filter.return_value = mock_terms_query
            mock_terms_query.all.return_value = [mock_term1, mock_term2]

            result = get_glossary_terms(glossary_id, user_id)

            assert len(result) == 2
            assert result[0]["source"] == "cloud computing"
            assert result[0]["target"] == "informatique en nuage"
            assert result[1]["source"] == "machine learning"
            assert result[1]["target"] == "apprentissage automatique"

    def test_get_glossary_terms_not_found(self):
        """Test error when glossary doesn't exist."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        with patch('services.glossary_service.get_sync_session') as mock_session:
            mock_context = MagicMock()
            mock_session.return_value.__enter__ = Mock(return_value=mock_context)
            mock_session.return_value.__exit__ = Mock(return_value=False)

            mock_query = MagicMock()
            mock_context.query.return_value = mock_query
            mock_query.filter.return_value = mock_query
            mock_query.first.return_value = None  # Glossary not found

            with pytest.raises(GlossaryNotFoundError) as exc_info:
                get_glossary_terms(glossary_id, user_id)

            assert exc_info.value.code == "GLOSSARY_NOT_FOUND"

    def test_get_glossary_terms_wrong_user(self):
        """Test error when glossary belongs to another user."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())
        other_user_id = str(uuid.uuid4())

        with patch('services.glossary_service.get_sync_session') as mock_session:
            mock_context = MagicMock()
            mock_session.return_value.__enter__ = Mock(return_value=mock_context)
            mock_session.return_value.__exit__ = Mock(return_value=False)

            mock_query = MagicMock()
            mock_context.query.return_value = mock_query
            mock_query.filter.return_value = mock_query
            mock_query.first.return_value = None  # No match for this user

            with pytest.raises(GlossaryNotFoundError) as exc_info:
                get_glossary_terms(glossary_id, user_id)

            assert exc_info.value.code == "GLOSSARY_NOT_FOUND"

    def test_get_glossary_terms_empty(self):
        """Test retrieving terms from a glossary with no terms."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        mock_glossary = Mock()
        mock_glossary.id = glossary_id
        mock_glossary.user_id = user_id

        with patch('services.glossary_service.get_sync_session') as mock_session:
            mock_context = MagicMock()
            mock_session.return_value.__enter__ = Mock(return_value=mock_context)
            mock_session.return_value.__exit__ = Mock(return_value=False)

            mock_query = MagicMock()
            mock_context.query.side_effect = [mock_query, MagicMock()]
            mock_query.filter.return_value = mock_query
            mock_query.first.return_value = mock_glossary

            # Empty terms list
            mock_terms_query = MagicMock()
            mock_terms_query.filter.return_value = mock_terms_query
            mock_terms_query.all.return_value = []

            result = get_glossary_terms(glossary_id, user_id)

            assert result == []


class TestValidateGlossaryAccess:
    """Tests for validate_glossary_access function."""

    def test_validate_glossary_access_success(self):
        """Test validating access to an existing glossary."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        mock_glossary = Mock()
        mock_glossary.id = glossary_id
        mock_glossary.user_id = user_id

        with patch('services.glossary_service.get_sync_session') as mock_session:
            mock_context = MagicMock()
            mock_session.return_value.__enter__ = Mock(return_value=mock_context)
            mock_session.return_value.__exit__ = Mock(return_value=False)

            mock_query = MagicMock()
            mock_context.query.return_value = mock_query
            mock_query.filter.return_value = mock_query
            mock_query.first.return_value = mock_glossary

            result = validate_glossary_access(glossary_id, user_id)

            assert result is True

    def test_validate_glossary_access_not_found(self):
        """Test error when glossary doesn't exist."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        with patch('services.glossary_service.get_sync_session') as mock_session:
            mock_context = MagicMock()
            mock_session.return_value.__enter__ = Mock(return_value=mock_context)
            mock_session.return_value.__exit__ = Mock(return_value=False)

            mock_query = MagicMock()
            mock_context.query.return_value = mock_query
            mock_query.filter.return_value = mock_query
            mock_query.first.return_value = None

            with pytest.raises(GlossaryNotFoundError):
                validate_glossary_access(glossary_id, user_id)


class TestFormatGlossaryForPrompt:
    """Tests for format_glossary_for_prompt function."""

    def test_format_glossary_basic(self):
        """Test basic glossary formatting."""
        terms = [
            {"source": "cloud computing", "target": "informatique en nuage"},
            {"source": "API", "target": "interface de programmation"},
        ]

        result = format_glossary_for_prompt(terms)

        assert "TERMINOLOGY GLOSSARY" in result
        assert "'cloud computing' → 'informatique en nuage'" in result
        assert "'API' → 'interface de programmation'" in result
        assert "IMPORTANT: Always use these translations" in result

    def test_format_glossary_sorted_by_length(self):
        """Test that terms are sorted by length (longest first)."""
        terms = [
            {"source": "API", "target": "interface"},
            {"source": "machine learning", "target": "apprentissage automatique"},
            {"source": "cloud", "target": "nuage"},
        ]

        result = format_glossary_for_prompt(terms)

        # "machine learning" should appear before "cloud" and "API"
        ml_pos = result.index("machine learning")
        cloud_pos = result.index("'cloud'")
        api_pos = result.index("'API'")

        assert ml_pos < cloud_pos
        assert ml_pos < api_pos

    def test_format_glossary_empty(self):
        """Test formatting an empty glossary."""
        result = format_glossary_for_prompt([])

        assert result == ""

    def test_format_glossary_special_characters(self):
        """Test formatting terms with special characters."""
        terms = [
            {"source": "it's", "target": "c'est"},
            {"source": "user's guide", "target": "guide de l'utilisateur"},
        ]

        result = format_glossary_for_prompt(terms)

        # Single quotes should be escaped
        assert "it\\'s" in result
        assert "c\\'est" in result

    def test_format_glossary_empty_source_target(self):
        """Test that empty source or target are skipped."""
        terms = [
            {"source": "valid", "target": "valide"},
            {"source": "", "target": "empty_source"},
            {"source": "empty_target", "target": ""},
        ]

        result = format_glossary_for_prompt(terms)

        assert "'valid' → 'valide'" in result
        assert "empty_source" not in result
        assert "empty_target" not in result


class TestBuildFullPrompt:
    """Tests for build_full_prompt function."""

    def test_build_full_prompt_both(self):
        """Test building prompt with both custom prompt and glossary."""
        custom_prompt = "Translate technical documents accurately."
        glossary_terms = [
            {"source": "API", "target": "interface de programmation"},
        ]

        result = build_full_prompt(custom_prompt, glossary_terms)

        assert "Translate technical documents accurately." in result
        assert "TERMINOLOGY GLOSSARY" in result
        assert "'API' → 'interface de programmation'" in result

    def test_build_full_prompt_only_custom(self):
        """Test building prompt with only custom prompt."""
        custom_prompt = "Translate technical documents accurately."

        result = build_full_prompt(custom_prompt, None)

        assert result == "Translate technical documents accurately."

    def test_build_full_prompt_only_glossary(self):
        """Test building prompt with only glossary."""
        glossary_terms = [
            {"source": "API", "target": "interface de programmation"},
        ]

        result = build_full_prompt(None, glossary_terms)

        assert "TERMINOLOGY GLOSSARY" in result
        assert "'API' → 'interface de programmation'" in result

    def test_build_full_prompt_empty(self):
        """Test building prompt with neither custom prompt nor glossary."""
        result = build_full_prompt(None, None)

        assert result == ""

    def test_build_full_prompt_empty_glossary_list(self):
        """Test building prompt with empty glossary list."""
        custom_prompt = "Translate accurately."

        result = build_full_prompt(custom_prompt, [])

        assert result == "Translate accurately."


class TestGetGlossaryTermsDatabaseErrors:
    """Tests for database error handling in get_glossary_terms."""

    def test_get_glossary_terms_database_error(self):
        """Test that database errors are wrapped in GlossaryNotFoundError."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        with patch('services.glossary_service.get_sync_session') as mock_session:
            # Simulate a database connection error
            mock_session.side_effect = Exception("Database connection failed")

            with pytest.raises(GlossaryNotFoundError) as exc_info:
                get_glossary_terms(glossary_id, user_id)

            assert exc_info.value.code == "GLOSSARY_NOT_FOUND"
            assert "Erreur lors de la récupération" in str(exc_info.value.message)

    def test_validate_glossary_access_database_error(self):
        """Test that database errors are wrapped in GlossaryNotFoundError."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        with patch('services.glossary_service.get_sync_session') as mock_session:
            # Simulate a database connection error
            mock_session.side_effect = Exception("Database connection failed")

            with pytest.raises(GlossaryNotFoundError) as exc_info:
                validate_glossary_access(glossary_id, user_id)

            assert exc_info.value.code == "GLOSSARY_NOT_FOUND"


class TestGlossaryIntegration:
    """Integration-style tests for glossary in translation flow."""

    def test_empty_glossary_terms_returns_empty_list(self):
        """Test that a glossary with no terms returns empty list."""
        glossary_id = str(uuid.uuid4())
        user_id = str(uuid.uuid4())

        mock_glossary = Mock()
        mock_glossary.id = glossary_id
        mock_glossary.user_id = user_id

        with patch('services.glossary_service.get_sync_session') as mock_session:
            mock_context = MagicMock()
            mock_session.return_value.__enter__ = Mock(return_value=mock_context)
            mock_session.return_value.__exit__ = Mock(return_value=False)

            mock_glossary_query = MagicMock()
            mock_terms_query = MagicMock()
            mock_context.query.side_effect = [mock_glossary_query, mock_terms_query]

            mock_glossary_query.filter.return_value = mock_glossary_query
            mock_glossary_query.first.return_value = mock_glossary

            mock_terms_query.filter.return_value = mock_terms_query
            mock_terms_query.all.return_value = []  # Empty terms

            result = get_glossary_terms(glossary_id, user_id)

            assert result == []

    def test_build_full_prompt_with_empty_glossary_terms(self):
        """Test that empty glossary terms don't add content to prompt."""
        custom_prompt = "Translate accurately."

        result = build_full_prompt(custom_prompt, [])

        # Should only contain custom prompt, no glossary section
        assert result == "Translate accurately."
        assert "TERMINOLOGY GLOSSARY" not in result

    def test_format_glossary_with_unicode_characters(self):
        """Test formatting terms with unicode characters."""
        terms = [
            {"source": "café", "target": "coffee"},
            {"source": "naïve", "target": "naive"},
            {"source": "日本語", "target": "Japanese"},
        ]

        result = format_glossary_for_prompt(terms)

        assert "'café' → 'coffee'" in result
        assert "'naïve' → 'naive'" in result
        assert "'日本語' → 'Japanese'" in result