Initial commit

2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/init.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/init.py
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_bleu.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_bleu.py
@@ -0,0 +1,416 @@
+"""
+Tests for BLEU translation evaluation metric
+"""
+
+import unittest
+
+import numpy as np
+
+from nltk.data import find
+from nltk.translate.bleu_score import (
+    SmoothingFunction,
+    brevity_penalty,
+    closest_ref_length,
+    corpus_bleu,
+    modified_precision,
+    sentence_bleu,
+)
+
+
+class TestBLEU(unittest.TestCase):
+    def test_modified_precision(self):
+        """
+        Examples from the original BLEU paper
+        https://www.aclweb.org/anthology/P02-1040.pdf
+        """
+        # Example 1: the "the*" example.
+        # Reference sentences.
+        ref1 = "the cat is on the mat".split()
+        ref2 = "there is a cat on the mat".split()
+        # Hypothesis sentence(s).
+        hyp1 = "the the the the the the the".split()
+
+        references = [ref1, ref2]
+
+        # Testing modified unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        assert round(hyp1_unigram_precision, 4) == 0.2857
+        # With assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4)
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 0.0
+
+        # Example 2: the "of the" example.
+        # Reference sentences
+        ref1 = str(
+            "It is a guide to action that ensures that the military "
+            "will forever heed Party commands"
+        ).split()
+        ref2 = str(
+            "It is the guiding principle which guarantees the military "
+            "forces always being under the command of the Party"
+        ).split()
+        ref3 = str(
+            "It is the practical guide for the army always to heed "
+            "the directions of the party"
+        ).split()
+        # Hypothesis sentence(s).
+        hyp1 = "of the".split()
+
+        references = [ref1, ref2, ref3]
+        # Testing modified unigram precision.
+        assert float(modified_precision(references, hyp1, n=1)) == 1.0
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 1.0
+
+        # Example 3: Proper MT outputs.
+        hyp1 = str(
+            "It is a guide to action which ensures that the military "
+            "always obeys the commands of the party"
+        ).split()
+        hyp2 = str(
+            "It is to insure the troops forever hearing the activity "
+            "guidebook that party direct"
+        ).split()
+
+        references = [ref1, ref2, ref3]
+
+        # Unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        hyp2_unigram_precision = float(modified_precision(references, hyp2, n=1))
+        # Test unigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4)
+        self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4)
+        # Test unigram precision with rounding.
+        assert round(hyp1_unigram_precision, 4) == 0.9444
+        assert round(hyp2_unigram_precision, 4) == 0.5714
+
+        # Bigram precision
+        hyp1_bigram_precision = float(modified_precision(references, hyp1, n=2))
+        hyp2_bigram_precision = float(modified_precision(references, hyp2, n=2))
+        # Test bigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4)
+        self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4)
+        # Test bigram precision with rounding.
+        assert round(hyp1_bigram_precision, 4) == 0.5882
+        assert round(hyp2_bigram_precision, 4) == 0.0769
+
+    def test_brevity_penalty(self):
+        # Test case from brevity_penalty_closest function in mteval-v13a.pl.
+        # Same test cases as in the doctest in nltk.translate.bleu_score.py
+        references = [["a"] * 11, ["a"] * 8]
+        hypothesis = ["a"] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        self.assertAlmostEqual(
+            brevity_penalty(closest_ref_len, hyp_len), 0.8669, places=4
+        )
+
+        references = [["a"] * 11, ["a"] * 8, ["a"] * 6, ["a"] * 7]
+        hypothesis = ["a"] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        assert brevity_penalty(closest_ref_len, hyp_len) == 1.0
+
+    def test_zero_matches(self):
+        # Test case where there's 0 matches
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = "John loves Mary".split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = (1.0 / n,) * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 0
+
+    def test_full_matches(self):
+        # Test case where there's 100% matches
+        references = ["John loves Mary".split()]
+        hypothesis = "John loves Mary".split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = (1.0 / n,) * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 1.0
+
+    def test_partial_matches_hypothesis_longer_than_reference(self):
+        references = ["John loves Mary".split()]
+        hypothesis = "John loves Mary who loves Mike".split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised because len(reference) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+
+# @unittest.skip("Skipping fringe cases for BLEU.")
+class TestBLEUFringeCases(unittest.TestCase):
+    def test_case_where_n_is_bigger_than_hypothesis_length(self):
+        # Test BLEU to nth order of n-grams, where n > len(hypothesis).
+        references = ["John loves Mary ?".split()]
+        hypothesis = "John loves Mary".split()
+        n = len(hypothesis) + 1  #
+        weights = (1.0 / n,) * n  # Uniform weights.
+        # Since no n-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+        # Checks that the warning has been raised because len(hypothesis) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+        # Test case where n > len(hypothesis) but so is n > len(reference), and
+        # it's a special case where reference == hypothesis.
+        references = ["John loves Mary".split()]
+        hypothesis = "John loves Mary".split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+
+    def test_empty_hypothesis(self):
+        # Test case where there's hypothesis is empty.
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_length_one_hypothesis(self):
+        # Test case where there's hypothesis is of length 1 in Smoothing method 4.
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = ["Foo"]
+        method4 = SmoothingFunction().method4
+        try:
+            sentence_bleu(references, hypothesis, smoothing_function=method4)
+        except ValueError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+    def test_empty_references(self):
+        # Test case where there's reference is empty.
+        references = [[]]
+        hypothesis = "John loves Mary".split()
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_empty_references_and_hypothesis(self):
+        # Test case where both references and hypothesis is empty.
+        references = [[]]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_reference_or_hypothesis_shorter_than_fourgrams(self):
+        # Test case where the length of reference or hypothesis
+        # is shorter than 4.
+        references = ["let it go".split()]
+        hypothesis = "let go it".split()
+        # Checks that the value the hypothesis and reference returns is 0.0
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+    def test_numpy_weights(self):
+        # Test case where there's 0 matches
+        references = ["The candidate has no alignment to any of the references".split()]
+        hypothesis = "John loves Mary".split()
+
+        weights = np.array([0.25] * 4)
+        assert sentence_bleu(references, hypothesis, weights) == 0
+
+
+class TestBLEUvsMteval13a(unittest.TestCase):
+    def test_corpus_bleu(self):
+        ref_file = find("models/wmt15_eval/ref.ru")
+        hyp_file = find("models/wmt15_eval/google.ru")
+        mteval_output_file = find("models/wmt15_eval/mteval-13a.output")
+
+        # Reads the BLEU scores from the `mteval-13a.output` file.
+        # The order of the list corresponds to the order of the ngrams.
+        with open(mteval_output_file) as mteval_fin:
+            # The numbers are located in the last 2nd line of the file.
+            # The first and 2nd item in the list are the score and system names.
+            mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1])
+
+        with open(ref_file, encoding="utf8") as ref_fin:
+            with open(hyp_file, encoding="utf8") as hyp_fin:
+                # Whitespace tokenize the file.
+                # Note: split() automatically strip().
+                hypothesis = list(map(lambda x: x.split(), hyp_fin))
+                # Note that the corpus_bleu input is list of list of references.
+                references = list(map(lambda x: [x.split()], ref_fin))
+                # Without smoothing.
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references, hypothesis, weights=(1.0 / i,) * i
+                    )
+                    # Check that the BLEU scores difference is less than 0.005 .
+                    # Note: This is an approximate comparison; as much as
+                    #       +/- 0.01 BLEU might be "statistically significant",
+                    #       the actual translation quality might not be.
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+                # With the same smoothing method used in mteval-v13a.pl
+                chencherry = SmoothingFunction()
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references,
+                        hypothesis,
+                        weights=(1.0 / i,) * i,
+                        smoothing_function=chencherry.method3,
+                    )
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+
+class TestBLEUWithBadSentence(unittest.TestCase):
+    def test_corpus_bleu_with_bad_sentence(self):
+        hyp = "Teo S yb , oe uNb , R , T t , , t Tue Ar saln S , , 5istsi l , 5oe R ulO sae oR R"
+        ref = str(
+            "Their tasks include changing a pump on the faulty stokehold ."
+            "Likewise , two species that are very similar in morphology "
+            "were distinguished using genetics ."
+        )
+        references = [[ref.split()]]
+        hypotheses = [hyp.split()]
+        try:  # Check that the warning is raised since no. of 2-grams < 0.
+            with self.assertWarns(UserWarning):
+                # Verify that the BLEU output is undesired since no. of 2-grams < 0.
+                self.assertAlmostEqual(
+                    corpus_bleu(references, hypotheses), 0.0, places=4
+                )
+        except (
+            AttributeError
+        ):  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+            self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4)
+
+
+class TestBLEUWithMultipleWeights(unittest.TestCase):
+    def test_corpus_bleu_with_multiple_weights(self):
+        hyp1 = [
+            "It",
+            "is",
+            "a",
+            "guide",
+            "to",
+            "action",
+            "which",
+            "ensures",
+            "that",
+            "the",
+            "military",
+            "always",
+            "obeys",
+            "the",
+            "commands",
+            "of",
+            "the",
+            "party",
+        ]
+        ref1a = [
+            "It",
+            "is",
+            "a",
+            "guide",
+            "to",
+            "action",
+            "that",
+            "ensures",
+            "that",
+            "the",
+            "military",
+            "will",
+            "forever",
+            "heed",
+            "Party",
+            "commands",
+        ]
+        ref1b = [
+            "It",
+            "is",
+            "the",
+            "guiding",
+            "principle",
+            "which",
+            "guarantees",
+            "the",
+            "military",
+            "forces",
+            "always",
+            "being",
+            "under",
+            "the",
+            "command",
+            "of",
+            "the",
+            "Party",
+        ]
+        ref1c = [
+            "It",
+            "is",
+            "the",
+            "practical",
+            "guide",
+            "for",
+            "the",
+            "army",
+            "always",
+            "to",
+            "heed",
+            "the",
+            "directions",
+            "of",
+            "the",
+            "party",
+        ]
+        hyp2 = [
+            "he",
+            "read",
+            "the",
+            "book",
+            "because",
+            "he",
+            "was",
+            "interested",
+            "in",
+            "world",
+            "history",
+        ]
+        ref2a = [
+            "he",
+            "was",
+            "interested",
+            "in",
+            "world",
+            "history",
+            "because",
+            "he",
+            "read",
+            "the",
+            "book",
+        ]
+        weight_1 = (1, 0, 0, 0)
+        weight_2 = (0.25, 0.25, 0.25, 0.25)
+        weight_3 = (0, 0, 0, 0, 1)
+
+        bleu_scores = corpus_bleu(
+            list_of_references=[[ref1a, ref1b, ref1c], [ref2a]],
+            hypotheses=[hyp1, hyp2],
+            weights=[weight_1, weight_2, weight_3],
+        )
+        assert bleu_scores[0] == corpus_bleu(
+            [[ref1a, ref1b, ref1c], [ref2a]], [hyp1, hyp2], weight_1
+        )
+        assert bleu_scores[1] == corpus_bleu(
+            [[ref1a, ref1b, ref1c], [ref2a]], [hyp1, hyp2], weight_2
+        )
+        assert bleu_scores[2] == corpus_bleu(
+            [[ref1a, ref1b, ref1c], [ref2a]], [hyp1, hyp2], weight_3
+        )
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_gdfa.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_gdfa.py
@@ -0,0 +1,154 @@
+"""
+Tests GDFA alignments
+"""
+
+import unittest
+
+from nltk.translate.gdfa import grow_diag_final_and
+
+
+class TestGDFA(unittest.TestCase):
+    def test_from_eflomal_outputs(self):
+        """
+        Testing GDFA with first 10 eflomal outputs from issue #1829
+        https://github.com/nltk/nltk/issues/1829
+        """
+        # Input.
+        forwards = [
+            "0-0 1-2",
+            "0-0 1-1",
+            "0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 7-8 9-9 10-10 9-11 11-12 12-13 13-14",
+            "0-0 1-1 1-2 2-3 3-4 4-5 4-6 5-7 6-8 8-9 9-10",
+            "0-0 14-1 15-2 16-3 20-5 21-6 22-7 5-8 6-9 7-10 8-11 9-12 10-13 11-14 12-15 13-16 14-17 17-18 18-19 19-20 20-21 23-22 24-23 25-24 26-25 27-27 28-28 29-29 30-30 31-31",
+            "0-0 1-1 0-2 2-3",
+            "0-0 2-2 4-4",
+            "0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-20",
+            "3-0 4-1 6-2 5-3 6-4 7-5 8-6 9-7 10-8 11-9 16-10 9-12 10-13 12-14",
+            "1-0",
+        ]
+        backwards = [
+            "0-0 1-2",
+            "0-0 1-1",
+            "0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-8 10-10 11-12 12-11 13-13",
+            "0-0 1-2 2-3 3-4 4-6 6-8 7-5 8-7 9-8",
+            "0-0 1-8 2-9 3-10 4-11 5-12 6-11 8-13 9-14 10-15 11-16 12-17 13-18 14-19 15-20 16-21 17-22 18-23 19-24 20-29 21-30 22-31 23-2 24-3 25-4 26-5 27-5 28-6 29-7 30-28 31-31",
+            "0-0 1-1 2-3",
+            "0-0 1-1 2-3 4-4",
+            "0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-16 21-18",
+            "0-0 1-1 3-2 4-1 5-3 6-4 7-5 8-6 9-7 10-8 11-9 12-8 13-9 14-8 15-9 16-10",
+            "1-0",
+        ]
+        source_lens = [2, 3, 3, 15, 11, 33, 4, 6, 23, 18]
+        target_lens = [2, 4, 3, 16, 12, 33, 5, 6, 22, 16]
+        # Expected Output.
+        expected = [
+            [(0, 0), (1, 2)],
+            [(0, 0), (1, 1)],
+            [
+                (0, 0),
+                (2, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (8, 7),
+                (10, 10),
+                (11, 12),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (1, 2),
+                (2, 3),
+                (3, 4),
+                (4, 5),
+                (4, 6),
+                (5, 7),
+                (6, 8),
+                (7, 5),
+                (8, 7),
+                (8, 9),
+                (9, 8),
+                (9, 10),
+            ],
+            [
+                (0, 0),
+                (1, 8),
+                (2, 9),
+                (3, 10),
+                (4, 11),
+                (5, 8),
+                (6, 9),
+                (6, 11),
+                (7, 10),
+                (8, 11),
+                (31, 31),
+            ],
+            [(0, 0), (0, 2), (1, 1), (2, 3)],
+            [(0, 0), (1, 1), (2, 2), (2, 3), (4, 4)],
+            [
+                (0, 0),
+                (1, 1),
+                (2, 3),
+                (3, 4),
+                (5, 5),
+                (7, 6),
+                (8, 7),
+                (9, 8),
+                (10, 9),
+                (11, 10),
+                (12, 11),
+                (13, 12),
+                (14, 13),
+                (15, 14),
+                (16, 16),
+                (17, 17),
+                (18, 18),
+                (19, 19),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 0),
+                (3, 2),
+                (4, 1),
+                (5, 3),
+                (6, 2),
+                (6, 4),
+                (7, 5),
+                (8, 6),
+                (9, 7),
+                (9, 12),
+                (10, 8),
+                (10, 13),
+                (11, 9),
+                (12, 8),
+                (12, 14),
+                (13, 9),
+                (14, 8),
+                (15, 9),
+                (16, 10),
+            ],
+            [(1, 0)],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (9, 10),
+                (10, 12),
+                (11, 13),
+                (12, 14),
+                (13, 15),
+            ],
+        ]
+
+        # Iterate through all 10 examples and check for expected outputs.
+        for fw, bw, src_len, trg_len, expect in zip(
+            forwards, backwards, source_lens, target_lens, expected
+        ):
+            self.assertListEqual(expect, grow_diag_final_and(src_len, trg_len, fw, bw))
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm1.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm1.py
@@ -0,0 +1,73 @@
+"""
+Tests for IBM Model 1 training methods
+"""
+
+import unittest
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, IBMModel, IBMModel1
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel1(unittest.TestCase):
+    def test_set_uniform_translation_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / (target vocab size + 1)
+        self.assertEqual(model1.translation_table["ham"]["eier"], 1.0 / 3)
+        self.assertEqual(model1.translation_table["eggs"][None], 1.0 / 3)
+
+    def test_set_uniform_translation_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine target words that are not in the training data domain
+        self.assertEqual(model1.translation_table["parrot"]["eier"], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", "esse", "ja", "gern", "räucherschinken"]
+        trg_sentence = ["i", "love", "to", "eat", "smoked", "ham"]
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ["UNUSED"] + trg_sentence,
+            None,
+        )
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table["i"]["ich"] = 0.98
+        translation_table["love"]["gern"] = 0.98
+        translation_table["to"][None] = 0.98
+        translation_table["eat"]["esse"] = 0.98
+        translation_table["smoked"]["räucherschinken"] = 0.98
+        translation_table["ham"]["räucherschinken"] = 0.98
+
+        model1 = IBMModel1(corpus, 0)
+        model1.translation_table = translation_table
+
+        # act
+        probability = model1.prob_t_a_given_s(alignment_info)
+
+        # assert
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        expected_probability = lexical_translation
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm2.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm2.py
@@ -0,0 +1,86 @@
+"""
+Tests for IBM Model 2 training methods
+"""
+
+import unittest
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, IBMModel, IBMModel2
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel2(unittest.TestCase):
+    def test_set_uniform_alignment_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model2 = IBMModel2(corpus, 0)
+
+        # act
+        model2.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / (length of source sentence + 1)
+        self.assertEqual(model2.alignment_table[0][1][3][2], 1.0 / 4)
+        self.assertEqual(model2.alignment_table[2][4][2][4], 1.0 / 3)
+
+    def test_set_uniform_alignment_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model2 = IBMModel2(corpus, 0)
+
+        # act
+        model2.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine i and j values that are not in the training data domain
+        self.assertEqual(model2.alignment_table[99][1][3][2], IBMModel.MIN_PROB)
+        self.assertEqual(model2.alignment_table[2][99][2][4], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", "esse", "ja", "gern", "räucherschinken"]
+        trg_sentence = ["i", "love", "to", "eat", "smoked", "ham"]
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ["UNUSED"] + trg_sentence,
+            None,
+        )
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table["i"]["ich"] = 0.98
+        translation_table["love"]["gern"] = 0.98
+        translation_table["to"][None] = 0.98
+        translation_table["eat"]["esse"] = 0.98
+        translation_table["smoked"]["räucherschinken"] = 0.98
+        translation_table["ham"]["räucherschinken"] = 0.98
+
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        alignment_table[0][3][5][6] = 0.97  # None -> to
+        alignment_table[1][1][5][6] = 0.97  # ich -> i
+        alignment_table[2][4][5][6] = 0.97  # esse -> eat
+        alignment_table[4][2][5][6] = 0.97  # gern -> love
+        alignment_table[5][5][5][6] = 0.96  # räucherschinken -> smoked
+        alignment_table[5][6][5][6] = 0.96  # räucherschinken -> ham
+
+        model2 = IBMModel2(corpus, 0)
+        model2.translation_table = translation_table
+        model2.alignment_table = alignment_table
+
+        # act
+        probability = model2.prob_t_a_given_s(alignment_info)
+
+        # assert
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        alignment = 0.97 * 0.97 * 0.97 * 0.97 * 0.96 * 0.96
+        expected_probability = lexical_translation * alignment
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm3.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm3.py
@@ -0,0 +1,105 @@
+"""
+Tests for IBM Model 3 training methods
+"""
+
+import unittest
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, IBMModel, IBMModel3
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel3(unittest.TestCase):
+    def test_set_uniform_distortion_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model3 = IBMModel3(corpus, 0)
+
+        # act
+        model3.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / length of target sentence
+        self.assertEqual(model3.distortion_table[1][0][3][2], 1.0 / 2)
+        self.assertEqual(model3.distortion_table[4][2][2][4], 1.0 / 4)
+
+    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model3 = IBMModel3(corpus, 0)
+
+        # act
+        model3.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine i and j values that are not in the training data domain
+        self.assertEqual(model3.distortion_table[0][0][3][2], IBMModel.MIN_PROB)
+        self.assertEqual(model3.distortion_table[9][2][2][4], IBMModel.MIN_PROB)
+        self.assertEqual(model3.distortion_table[2][9][2][4], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", "esse", "ja", "gern", "räucherschinken"]
+        trg_sentence = ["i", "love", "to", "eat", "smoked", "ham"]
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ["UNUSED"] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        distortion_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        distortion_table[1][1][5][6] = 0.97  # i -> ich
+        distortion_table[2][4][5][6] = 0.97  # love -> gern
+        distortion_table[3][0][5][6] = 0.97  # to -> NULL
+        distortion_table[4][2][5][6] = 0.97  # eat -> esse
+        distortion_table[5][5][5][6] = 0.97  # smoked -> räucherschinken
+        distortion_table[6][5][5][6] = 0.97  # ham -> räucherschinken
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table["i"]["ich"] = 0.98
+        translation_table["love"]["gern"] = 0.98
+        translation_table["to"][None] = 0.98
+        translation_table["eat"]["esse"] = 0.98
+        translation_table["smoked"]["räucherschinken"] = 0.98
+        translation_table["ham"]["räucherschinken"] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]["ich"] = 0.99
+        fertility_table[1]["esse"] = 0.99
+        fertility_table[0]["ja"] = 0.99
+        fertility_table[1]["gern"] = 0.99
+        fertility_table[2]["räucherschinken"] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            "p1": 0.167,
+            "translation_table": translation_table,
+            "distortion_table": distortion_table,
+            "fertility_table": fertility_table,
+            "alignment_table": None,
+        }
+
+        model3 = IBMModel3(corpus, 0, probabilities)
+
+        # act
+        probability = model3.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        distortion = 0.97 * 0.97 * 0.97 * 0.97 * 0.97 * 0.97
+        expected_probability = (
+            null_generation * fertility * lexical_translation * distortion
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm4.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm4.py
@@ -0,0 +1,120 @@
+"""
+Tests for IBM Model 4 training methods
+"""
+
+import unittest
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, IBMModel, IBMModel4
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel4(unittest.TestCase):
+    def test_set_uniform_distortion_probabilities_of_max_displacements(self):
+        # arrange
+        src_classes = {"schinken": 0, "eier": 0, "spam": 1}
+        trg_classes = {"ham": 0, "eggs": 1, "spam": 2}
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model4.set_uniform_probabilities(corpus)
+
+        # assert
+        # number of displacement values =
+        #     2 *(number of words in longest target sentence - 1)
+        expected_prob = 1.0 / (2 * (4 - 1))
+
+        # examine the boundary values for (displacement, src_class, trg_class)
+        self.assertEqual(model4.head_distortion_table[3][0][0], expected_prob)
+        self.assertEqual(model4.head_distortion_table[-3][1][2], expected_prob)
+        self.assertEqual(model4.non_head_distortion_table[3][0], expected_prob)
+        self.assertEqual(model4.non_head_distortion_table[-3][2], expected_prob)
+
+    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
+        # arrange
+        src_classes = {"schinken": 0, "eier": 0, "spam": 1}
+        trg_classes = {"ham": 0, "eggs": 1, "spam": 2}
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model4.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine displacement values that are not in the training data domain
+        self.assertEqual(model4.head_distortion_table[4][0][0], IBMModel.MIN_PROB)
+        self.assertEqual(model4.head_distortion_table[100][1][2], IBMModel.MIN_PROB)
+        self.assertEqual(model4.non_head_distortion_table[4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model4.non_head_distortion_table[100][2], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", "esse", "ja", "gern", "räucherschinken"]
+        trg_sentence = ["i", "love", "to", "eat", "smoked", "ham"]
+        src_classes = {"räucherschinken": 0, "ja": 1, "ich": 2, "esse": 3, "gern": 4}
+        trg_classes = {"ham": 0, "smoked": 1, "i": 3, "love": 4, "to": 2, "eat": 4}
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ["UNUSED"] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        head_distortion_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        head_distortion_table[1][None][3] = 0.97  # None, i
+        head_distortion_table[3][2][4] = 0.97  # ich, eat
+        head_distortion_table[-2][3][4] = 0.97  # esse, love
+        head_distortion_table[3][4][1] = 0.97  # gern, smoked
+
+        non_head_distortion_table = defaultdict(lambda: defaultdict(float))
+        non_head_distortion_table[1][0] = 0.96  # ham
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table["i"]["ich"] = 0.98
+        translation_table["love"]["gern"] = 0.98
+        translation_table["to"][None] = 0.98
+        translation_table["eat"]["esse"] = 0.98
+        translation_table["smoked"]["räucherschinken"] = 0.98
+        translation_table["ham"]["räucherschinken"] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]["ich"] = 0.99
+        fertility_table[1]["esse"] = 0.99
+        fertility_table[0]["ja"] = 0.99
+        fertility_table[1]["gern"] = 0.99
+        fertility_table[2]["räucherschinken"] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            "p1": 0.167,
+            "translation_table": translation_table,
+            "head_distortion_table": head_distortion_table,
+            "non_head_distortion_table": non_head_distortion_table,
+            "fertility_table": fertility_table,
+            "alignment_table": None,
+        }
+
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes, probabilities)
+
+        # act
+        probability = model4.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        distortion = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
+        expected_probability = (
+            null_generation * fertility * lexical_translation * distortion
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm5.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm5.py
@@ -0,0 +1,160 @@
+"""
+Tests for IBM Model 5 training methods
+"""
+
+import unittest
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, IBMModel, IBMModel4, IBMModel5
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel5(unittest.TestCase):
+    def test_set_uniform_vacancy_probabilities_of_max_displacements(self):
+        # arrange
+        src_classes = {"schinken": 0, "eier": 0, "spam": 1}
+        trg_classes = {"ham": 0, "eggs": 1, "spam": 2}
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model5.set_uniform_probabilities(corpus)
+
+        # assert
+        # number of vacancy difference values =
+        #     2 * number of words in longest target sentence
+        expected_prob = 1.0 / (2 * 4)
+
+        # examine the boundary values for (dv, max_v, trg_class)
+        self.assertEqual(model5.head_vacancy_table[4][4][0], expected_prob)
+        self.assertEqual(model5.head_vacancy_table[-3][1][2], expected_prob)
+        self.assertEqual(model5.non_head_vacancy_table[4][4][0], expected_prob)
+        self.assertEqual(model5.non_head_vacancy_table[-3][1][2], expected_prob)
+
+    def test_set_uniform_vacancy_probabilities_of_non_domain_values(self):
+        # arrange
+        src_classes = {"schinken": 0, "eier": 0, "spam": 1}
+        trg_classes = {"ham": 0, "eggs": 1, "spam": 2}
+        corpus = [
+            AlignedSent(["ham", "eggs"], ["schinken", "schinken", "eier"]),
+            AlignedSent(["spam", "spam", "spam", "spam"], ["spam", "spam"]),
+        ]
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model5.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine dv and max_v values that are not in the training data domain
+        self.assertEqual(model5.head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
+        self.assertEqual(model5.head_vacancy_table[4][0][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.non_head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.non_head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", "esse", "ja", "gern", "räucherschinken"]
+        trg_sentence = ["i", "love", "to", "eat", "smoked", "ham"]
+        src_classes = {"räucherschinken": 0, "ja": 1, "ich": 2, "esse": 3, "gern": 4}
+        trg_classes = {"ham": 0, "smoked": 1, "i": 3, "love": 4, "to": 2, "eat": 4}
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ["UNUSED"] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        head_vacancy_table[1 - 0][6][3] = 0.97  # ich -> i
+        head_vacancy_table[3 - 0][5][4] = 0.97  # esse -> eat
+        head_vacancy_table[1 - 2][4][4] = 0.97  # gern -> love
+        head_vacancy_table[2 - 0][2][1] = 0.97  # räucherschinken -> smoked
+
+        non_head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        non_head_vacancy_table[1 - 0][1][0] = 0.96  # räucherschinken -> ham
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table["i"]["ich"] = 0.98
+        translation_table["love"]["gern"] = 0.98
+        translation_table["to"][None] = 0.98
+        translation_table["eat"]["esse"] = 0.98
+        translation_table["smoked"]["räucherschinken"] = 0.98
+        translation_table["ham"]["räucherschinken"] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]["ich"] = 0.99
+        fertility_table[1]["esse"] = 0.99
+        fertility_table[0]["ja"] = 0.99
+        fertility_table[1]["gern"] = 0.99
+        fertility_table[2]["räucherschinken"] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            "p1": 0.167,
+            "translation_table": translation_table,
+            "fertility_table": fertility_table,
+            "head_vacancy_table": head_vacancy_table,
+            "non_head_vacancy_table": non_head_vacancy_table,
+            "head_distortion_table": None,
+            "non_head_distortion_table": None,
+            "alignment_table": None,
+        }
+
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes, probabilities)
+
+        # act
+        probability = model5.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        vacancy = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
+        expected_probability = (
+            null_generation * fertility * lexical_translation * vacancy
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
+
+    def test_prune(self):
+        # arrange
+        alignment_infos = [
+            AlignmentInfo((1, 1), None, None, None),
+            AlignmentInfo((1, 2), None, None, None),
+            AlignmentInfo((2, 1), None, None, None),
+            AlignmentInfo((2, 2), None, None, None),
+            AlignmentInfo((0, 0), None, None, None),
+        ]
+        min_factor = IBMModel5.MIN_SCORE_FACTOR
+        best_score = 0.9
+        scores = {
+            (1, 1): min(min_factor * 1.5, 1) * best_score,  # above threshold
+            (1, 2): best_score,
+            (2, 1): min_factor * best_score,  # at threshold
+            (2, 2): min_factor * best_score * 0.5,  # low score
+            (0, 0): min(min_factor * 1.1, 1) * 1.2,  # above threshold
+        }
+        corpus = [AlignedSent(["a"], ["b"])]
+        original_prob_function = IBMModel4.model4_prob_t_a_given_s
+        # mock static method
+        IBMModel4.model4_prob_t_a_given_s = staticmethod(
+            lambda a, model: scores[a.alignment]
+        )
+        model5 = IBMModel5(corpus, 0, None, None)
+
+        # act
+        pruned_alignments = model5.prune(alignment_infos)
+
+        # assert
+        self.assertEqual(len(pruned_alignments), 3)
+
+        # restore static method
+        IBMModel4.model4_prob_t_a_given_s = original_prob_function
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm_model.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_ibm_model.py
@@ -0,0 +1,269 @@
+"""
+Tests for common methods of IBM translation models
+"""
+
+import unittest
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, IBMModel
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel(unittest.TestCase):
+    __TEST_SRC_SENTENCE = ["j'", "aime", "bien", "jambon"]
+    __TEST_TRG_SENTENCE = ["i", "love", "ham"]
+
+    def test_vocabularies_are_initialized(self):
+        parallel_corpora = [
+            AlignedSent(["one", "two", "three", "four"], ["un", "deux", "trois"]),
+            AlignedSent(["five", "one", "six"], ["quatre", "cinq", "six"]),
+            AlignedSent([], ["sept"]),
+        ]
+
+        ibm_model = IBMModel(parallel_corpora)
+        self.assertEqual(len(ibm_model.src_vocab), 8)
+        self.assertEqual(len(ibm_model.trg_vocab), 6)
+
+    def test_vocabularies_are_initialized_even_with_empty_corpora(self):
+        parallel_corpora = []
+
+        ibm_model = IBMModel(parallel_corpora)
+        self.assertEqual(len(ibm_model.src_vocab), 1)  # addition of NULL token
+        self.assertEqual(len(ibm_model.trg_vocab), 0)
+
+    def test_best_model2_alignment(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        # None and 'bien' have zero fertility
+        translation_table = {
+            "i": {"j'": 0.9, "aime": 0.05, "bien": 0.02, "jambon": 0.03, None: 0},
+            "love": {"j'": 0.05, "aime": 0.9, "bien": 0.01, "jambon": 0.01, None: 0.03},
+            "ham": {"j'": 0, "aime": 0.01, "bien": 0, "jambon": 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 2, 4))  # 0th element unused
+        self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
+
+    def test_best_model2_alignment_does_not_change_pegged_alignment(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        translation_table = {
+            "i": {"j'": 0.9, "aime": 0.05, "bien": 0.02, "jambon": 0.03, None: 0},
+            "love": {"j'": 0.05, "aime": 0.9, "bien": 0.01, "jambon": 0.01, None: 0.03},
+            "ham": {"j'": 0, "aime": 0.01, "bien": 0, "jambon": 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act: force 'love' to be pegged to 'jambon'
+        a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4)
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 4, 4))
+        self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])
+
+    def test_best_model2_alignment_handles_fertile_words(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            ["i", "really", ",", "really", "love", "ham"],
+            TestIBMModel.__TEST_SRC_SENTENCE,
+        )
+        # 'bien' produces 2 target words: 'really' and another 'really'
+        translation_table = {
+            "i": {"j'": 0.9, "aime": 0.05, "bien": 0.02, "jambon": 0.03, None: 0},
+            "really": {"j'": 0, "aime": 0, "bien": 0.9, "jambon": 0.01, None: 0.09},
+            ",": {"j'": 0, "aime": 0, "bien": 0.3, "jambon": 0, None: 0.7},
+            "love": {"j'": 0.05, "aime": 0.9, "bien": 0.01, "jambon": 0.01, None: 0.03},
+            "ham": {"j'": 0, "aime": 0.01, "bien": 0, "jambon": 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4))
+        self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
+
+    def test_best_model2_alignment_handles_empty_src_sentence(self):
+        # arrange
+        sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, [])
+        ibm_model = IBMModel([])
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (0, 0, 0))
+        self.assertEqual(a_info.cepts, [[1, 2, 3]])
+
+    def test_best_model2_alignment_handles_empty_trg_sentence(self):
+        # arrange
+        sentence_pair = AlignedSent([], TestIBMModel.__TEST_SRC_SENTENCE)
+        ibm_model = IBMModel([])
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], ())
+        self.assertEqual(a_info.cepts, [[], [], [], [], []])
+
+    def test_neighboring_finds_neighbor_alignments(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, "des", "œufs", "verts"),
+            ("UNUSED", "green", "eggs"),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act
+        neighbors = ibm_model.neighboring(a_info)
+
+        # assert
+        neighbor_alignments = set()
+        for neighbor in neighbors:
+            neighbor_alignments.add(neighbor.alignment)
+        expected_alignments = {
+            # moves
+            (0, 0, 2),
+            (0, 1, 2),
+            (0, 2, 2),
+            (0, 3, 0),
+            (0, 3, 1),
+            (0, 3, 3),
+            # swaps
+            (0, 2, 3),
+            # original alignment
+            (0, 3, 2),
+        }
+        self.assertEqual(neighbor_alignments, expected_alignments)
+
+    def test_neighboring_sets_neighbor_alignment_info(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, "des", "œufs", "verts"),
+            ("UNUSED", "green", "eggs"),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act
+        neighbors = ibm_model.neighboring(a_info)
+
+        # assert: select a few particular alignments
+        for neighbor in neighbors:
+            if neighbor.alignment == (0, 2, 2):
+                moved_alignment = neighbor
+            elif neighbor.alignment == (0, 3, 2):
+                swapped_alignment = neighbor
+
+        self.assertEqual(moved_alignment.cepts, [[], [], [1, 2], []])
+        self.assertEqual(swapped_alignment.cepts, [[], [], [2], [1]])
+
+    def test_neighboring_returns_neighbors_with_pegged_alignment(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, "des", "œufs", "verts"),
+            ("UNUSED", "green", "eggs"),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act: peg 'eggs' to align with 'œufs'
+        neighbors = ibm_model.neighboring(a_info, 2)
+
+        # assert
+        neighbor_alignments = set()
+        for neighbor in neighbors:
+            neighbor_alignments.add(neighbor.alignment)
+        expected_alignments = {
+            # moves
+            (0, 0, 2),
+            (0, 1, 2),
+            (0, 2, 2),
+            # no swaps
+            # original alignment
+            (0, 3, 2),
+        }
+        self.assertEqual(neighbor_alignments, expected_alignments)
+
+    def test_hillclimb(self):
+        # arrange
+        initial_alignment = AlignmentInfo((0, 3, 2), None, None, None)
+
+        def neighboring_mock(a, j):
+            if a.alignment == (0, 3, 2):
+                return {
+                    AlignmentInfo((0, 2, 2), None, None, None),
+                    AlignmentInfo((0, 1, 1), None, None, None),
+                }
+            elif a.alignment == (0, 2, 2):
+                return {
+                    AlignmentInfo((0, 3, 3), None, None, None),
+                    AlignmentInfo((0, 4, 4), None, None, None),
+                }
+            return set()
+
+        def prob_t_a_given_s_mock(a):
+            prob_values = {
+                (0, 3, 2): 0.5,
+                (0, 2, 2): 0.6,
+                (0, 1, 1): 0.4,
+                (0, 3, 3): 0.6,
+                (0, 4, 4): 0.7,
+            }
+            return prob_values.get(a.alignment, 0.01)
+
+        ibm_model = IBMModel([])
+        ibm_model.neighboring = neighboring_mock
+        ibm_model.prob_t_a_given_s = prob_t_a_given_s_mock
+
+        # act
+        best_alignment = ibm_model.hillclimb(initial_alignment)
+
+        # assert: hill climbing goes from (0, 3, 2) -> (0, 2, 2) -> (0, 4, 4)
+        self.assertEqual(best_alignment.alignment, (0, 4, 4))
+
+    def test_sample(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        ibm_model = IBMModel([])
+        ibm_model.prob_t_a_given_s = lambda x: 0.001
+
+        # act
+        samples, best_alignment = ibm_model.sample(sentence_pair)
+
+        # assert
+        self.assertEqual(len(samples), 61)
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_meteor.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_meteor.py
@@ -0,0 +1,20 @@
+import unittest
+
+from nltk.translate.meteor_score import meteor_score
+
+
+class TestMETEOR(unittest.TestCase):
+    reference = [["this", "is", "a", "test"], ["this", "is" "test"]]
+    candidate = ["THIS", "Is", "a", "tEST"]
+
+    def test_meteor(self):
+        score = meteor_score(self.reference, self.candidate, preprocess=str.lower)
+        assert score == 0.9921875
+
+    def test_reference_type_check(self):
+        str_reference = [" ".join(ref) for ref in self.reference]
+        self.assertRaises(TypeError, meteor_score, str_reference, self.candidate)
+
+    def test_candidate_type_check(self):
+        str_candidate = " ".join(self.candidate)
+        self.assertRaises(TypeError, meteor_score, self.reference, str_candidate)
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_nist.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_nist.py
@@ -0,0 +1,36 @@
+"""
+Tests for NIST translation evaluation metric
+"""
+
+import io
+import unittest
+
+from nltk.data import find
+from nltk.translate.nist_score import corpus_nist
+
+
+class TestNIST(unittest.TestCase):
+    def test_sentence_nist(self):
+        ref_file = find("models/wmt15_eval/ref.ru")
+        hyp_file = find("models/wmt15_eval/google.ru")
+        mteval_output_file = find("models/wmt15_eval/mteval-13a.output")
+
+        # Reads the NIST scores from the `mteval-13a.output` file.
+        # The order of the list corresponds to the order of the ngrams.
+        with open(mteval_output_file) as mteval_fin:
+            # The numbers are located in the last 4th line of the file.
+            # The first and 2nd item in the list are the score and system names.
+            mteval_nist_scores = map(float, mteval_fin.readlines()[-4].split()[1:-1])
+
+        with open(ref_file, encoding="utf8") as ref_fin:
+            with open(hyp_file, encoding="utf8") as hyp_fin:
+                # Whitespace tokenize the file.
+                # Note: split() automatically strip().
+                hypotheses = list(map(lambda x: x.split(), hyp_fin))
+                # Note that the corpus_bleu input is list of list of references.
+                references = list(map(lambda x: [x.split()], ref_fin))
+                # Without smoothing.
+                for i, mteval_nist in zip(range(1, 10), mteval_nist_scores):
+                    nltk_nist = corpus_nist(references, hypotheses, i)
+                    # Check that the NIST scores difference is less than 0.5
+                    assert abs(mteval_nist - nltk_nist) < 0.05
--- a/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_stack_decoder.py
+++ b/backend/venv/Lib/site-packages/nltk/test/unit/translate/test_stack_decoder.py
@@ -0,0 +1,294 @@
+# Natural Language Toolkit: Stack decoder
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Tests for stack decoder
+"""
+
+import unittest
+from collections import defaultdict
+from math import log
+
+from nltk.translate import PhraseTable, StackDecoder
+from nltk.translate.stack_decoder import _Hypothesis, _Stack
+
+
+class TestStackDecoder(unittest.TestCase):
+    def test_find_all_src_phrases(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        stack_decoder = StackDecoder(phrase_table, None)
+        sentence = ("my", "hovercraft", "is", "full", "of", "eels")
+
+        # act
+        src_phrase_spans = stack_decoder.find_all_src_phrases(sentence)
+
+        # assert
+        self.assertEqual(src_phrase_spans[0], [2])  # 'my hovercraft'
+        self.assertEqual(src_phrase_spans[1], [2])  # 'hovercraft'
+        self.assertEqual(src_phrase_spans[2], [3])  # 'is'
+        self.assertEqual(src_phrase_spans[3], [5, 6])  # 'full of', 'full of eels'
+        self.assertFalse(src_phrase_spans[4])  # no entry starting with 'of'
+        self.assertEqual(src_phrase_spans[5], [6])  # 'eels'
+
+    def test_distortion_score(self):
+        # arrange
+        stack_decoder = StackDecoder(None, None)
+        stack_decoder.distortion_factor = 0.5
+        hypothesis = _Hypothesis()
+        hypothesis.src_phrase_span = (3, 5)
+
+        # act
+        score = stack_decoder.distortion_score(hypothesis, (8, 10))
+
+        # assert
+        expected_score = log(stack_decoder.distortion_factor) * (8 - 5)
+        self.assertEqual(score, expected_score)
+
+    def test_distortion_score_of_first_expansion(self):
+        # arrange
+        stack_decoder = StackDecoder(None, None)
+        stack_decoder.distortion_factor = 0.5
+        hypothesis = _Hypothesis()
+
+        # act
+        score = stack_decoder.distortion_score(hypothesis, (8, 10))
+
+        # assert
+        # expansion from empty hypothesis always has zero distortion cost
+        self.assertEqual(score, 0.0)
+
+    def test_compute_future_costs(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        language_model = TestStackDecoder.create_fake_language_model()
+        stack_decoder = StackDecoder(phrase_table, language_model)
+        sentence = ("my", "hovercraft", "is", "full", "of", "eels")
+
+        # act
+        future_scores = stack_decoder.compute_future_scores(sentence)
+
+        # assert
+        self.assertEqual(
+            future_scores[1][2],
+            (
+                phrase_table.translations_for(("hovercraft",))[0].log_prob
+                + language_model.probability(("hovercraft",))
+            ),
+        )
+        self.assertEqual(
+            future_scores[0][2],
+            (
+                phrase_table.translations_for(("my", "hovercraft"))[0].log_prob
+                + language_model.probability(("my", "hovercraft"))
+            ),
+        )
+
+    def test_compute_future_costs_for_phrases_not_in_phrase_table(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        language_model = TestStackDecoder.create_fake_language_model()
+        stack_decoder = StackDecoder(phrase_table, language_model)
+        sentence = ("my", "hovercraft", "is", "full", "of", "eels")
+
+        # act
+        future_scores = stack_decoder.compute_future_scores(sentence)
+
+        # assert
+        self.assertEqual(
+            future_scores[1][3],  # 'hovercraft is' is not in phrase table
+            future_scores[1][2] + future_scores[2][3],
+        )  # backoff
+
+    def test_future_score(self):
+        # arrange: sentence with 8 words; words 2, 3, 4 already translated
+        hypothesis = _Hypothesis()
+        hypothesis.untranslated_spans = lambda _: [(0, 2), (5, 8)]  # mock
+        future_score_table = defaultdict(lambda: defaultdict(float))
+        future_score_table[0][2] = 0.4
+        future_score_table[5][8] = 0.5
+        stack_decoder = StackDecoder(None, None)
+
+        # act
+        future_score = stack_decoder.future_score(hypothesis, future_score_table, 8)
+
+        # assert
+        self.assertEqual(future_score, 0.4 + 0.5)
+
+    def test_valid_phrases(self):
+        # arrange
+        hypothesis = _Hypothesis()
+        # mock untranslated_spans method
+        hypothesis.untranslated_spans = lambda _: [(0, 2), (3, 6)]
+        all_phrases_from = [[1, 4], [2], [], [5], [5, 6, 7], [], [7]]
+
+        # act
+        phrase_spans = StackDecoder.valid_phrases(all_phrases_from, hypothesis)
+
+        # assert
+        self.assertEqual(phrase_spans, [(0, 1), (1, 2), (3, 5), (4, 5), (4, 6)])
+
+    @staticmethod
+    def create_fake_phrase_table():
+        phrase_table = PhraseTable()
+        phrase_table.add(("hovercraft",), ("",), 0.8)
+        phrase_table.add(("my", "hovercraft"), ("", ""), 0.7)
+        phrase_table.add(("my", "cheese"), ("", ""), 0.7)
+        phrase_table.add(("is",), ("",), 0.8)
+        phrase_table.add(("is",), ("",), 0.5)
+        phrase_table.add(("full", "of"), ("", ""), 0.01)
+        phrase_table.add(("full", "of", "eels"), ("", "", ""), 0.5)
+        phrase_table.add(("full", "of", "spam"), ("", ""), 0.5)
+        phrase_table.add(("eels",), ("",), 0.5)
+        phrase_table.add(("spam",), ("",), 0.5)
+        return phrase_table
+
+    @staticmethod
+    def create_fake_language_model():
+        # nltk.model should be used here once it is implemented
+        language_prob = defaultdict(lambda: -999.0)
+        language_prob[("my",)] = log(0.1)
+        language_prob[("hovercraft",)] = log(0.1)
+        language_prob[("is",)] = log(0.1)
+        language_prob[("full",)] = log(0.1)
+        language_prob[("of",)] = log(0.1)
+        language_prob[("eels",)] = log(0.1)
+        language_prob[("my", "hovercraft")] = log(0.3)
+        language_model = type(
+            "", (object,), {"probability": lambda _, phrase: language_prob[phrase]}
+        )()
+        return language_model
+
+
+class TestHypothesis(unittest.TestCase):
+    def setUp(self):
+        root = _Hypothesis()
+        child = _Hypothesis(
+            raw_score=0.5,
+            src_phrase_span=(3, 7),
+            trg_phrase=("hello", "world"),
+            previous=root,
+        )
+        grandchild = _Hypothesis(
+            raw_score=0.4,
+            src_phrase_span=(1, 2),
+            trg_phrase=("and", "goodbye"),
+            previous=child,
+        )
+        self.hypothesis_chain = grandchild
+
+    def test_translation_so_far(self):
+        # act
+        translation = self.hypothesis_chain.translation_so_far()
+
+        # assert
+        self.assertEqual(translation, ["hello", "world", "and", "goodbye"])
+
+    def test_translation_so_far_for_empty_hypothesis(self):
+        # arrange
+        hypothesis = _Hypothesis()
+
+        # act
+        translation = hypothesis.translation_so_far()
+
+        # assert
+        self.assertEqual(translation, [])
+
+    def test_total_translated_words(self):
+        # act
+        total_translated_words = self.hypothesis_chain.total_translated_words()
+
+        # assert
+        self.assertEqual(total_translated_words, 5)
+
+    def test_translated_positions(self):
+        # act
+        translated_positions = self.hypothesis_chain.translated_positions()
+
+        # assert
+        translated_positions.sort()
+        self.assertEqual(translated_positions, [1, 3, 4, 5, 6])
+
+    def test_untranslated_spans(self):
+        # act
+        untranslated_spans = self.hypothesis_chain.untranslated_spans(10)
+
+        # assert
+        self.assertEqual(untranslated_spans, [(0, 1), (2, 3), (7, 10)])
+
+    def test_untranslated_spans_for_empty_hypothesis(self):
+        # arrange
+        hypothesis = _Hypothesis()
+
+        # act
+        untranslated_spans = hypothesis.untranslated_spans(10)
+
+        # assert
+        self.assertEqual(untranslated_spans, [(0, 10)])
+
+
+class TestStack(unittest.TestCase):
+    def test_push_bumps_off_worst_hypothesis_when_stack_is_full(self):
+        # arrange
+        stack = _Stack(3)
+        poor_hypothesis = _Hypothesis(0.01)
+
+        # act
+        stack.push(_Hypothesis(0.2))
+        stack.push(poor_hypothesis)
+        stack.push(_Hypothesis(0.1))
+        stack.push(_Hypothesis(0.3))
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+
+    def test_push_removes_hypotheses_that_fall_below_beam_threshold(self):
+        # arrange
+        stack = _Stack(3, 0.5)
+        poor_hypothesis = _Hypothesis(0.01)
+        worse_hypothesis = _Hypothesis(0.009)
+
+        # act
+        stack.push(poor_hypothesis)
+        stack.push(worse_hypothesis)
+        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+        self.assertFalse(worse_hypothesis in stack)
+
+    def test_push_does_not_add_hypothesis_that_falls_below_beam_threshold(self):
+        # arrange
+        stack = _Stack(3, 0.5)
+        poor_hypothesis = _Hypothesis(0.01)
+
+        # act
+        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
+        stack.push(poor_hypothesis)
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+
+    def test_best_returns_the_best_hypothesis(self):
+        # arrange
+        stack = _Stack(3)
+        best_hypothesis = _Hypothesis(0.99)
+
+        # act
+        stack.push(_Hypothesis(0.0))
+        stack.push(best_hypothesis)
+        stack.push(_Hypothesis(0.5))
+
+        # assert
+        self.assertEqual(stack.best(), best_hypothesis)
+
+    def test_best_returns_none_when_stack_is_empty(self):
+        # arrange
+        stack = _Stack(3)
+
+        # assert
+        self.assertEqual(stack.best(), None)