Initial commit

2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions
--- a/backend/venv/Lib/site-packages/nltk/translate/init.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/init.py
@@ -0,0 +1,33 @@
+# Natural Language Toolkit: Machine Translation
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>, Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Experimental features for machine translation.
+These interfaces are prone to change.
+
+isort:skip_file
+"""
+
+from nltk.translate.api import AlignedSent, Alignment, PhraseTable
+from nltk.translate.ibm_model import IBMModel
+from nltk.translate.ibm1 import IBMModel1
+from nltk.translate.ibm2 import IBMModel2
+from nltk.translate.ibm3 import IBMModel3
+from nltk.translate.ibm4 import IBMModel4
+from nltk.translate.ibm5 import IBMModel5
+from nltk.translate.bleu_score import sentence_bleu as bleu
+from nltk.translate.ribes_score import sentence_ribes as ribes
+from nltk.translate.meteor_score import meteor_score as meteor
+from nltk.translate.metrics import alignment_error_rate
+from nltk.translate.stack_decoder import StackDecoder
+from nltk.translate.nist_score import sentence_nist as nist
+from nltk.translate.chrf_score import sentence_chrf as chrf
+from nltk.translate.gale_church import trace
+from nltk.translate.gdfa import grow_diag_final_and
+from nltk.translate.gleu_score import sentence_gleu as gleu
+from nltk.translate.phrase_based import extract
+from nltk.translate.lepor import sentence_lepor as lepor, corpus_lepor
--- a/backend/venv/Lib/site-packages/nltk/translate/api.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/api.py
@@ -0,0 +1,335 @@
+# Natural Language Toolkit: API for alignment and translation objects
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Will Zhang <wilzzha@gmail.com>
+#         Guan Gui <ggui@student.unimelb.edu.au>
+#         Steven Bird <stevenbird1@gmail.com>
+#         Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import subprocess
+from collections import namedtuple
+
+
+class AlignedSent:
+    """
+    Return an aligned sentence object, which encapsulates two sentences
+    along with an ``Alignment`` between them.
+
+    Typically used in machine translation to represent a sentence and
+    its translation.
+
+        >>> from nltk.translate import AlignedSent, Alignment
+        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
+        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
+        >>> algnsent.words
+        ['klein', 'ist', 'das', 'Haus']
+        >>> algnsent.mots
+        ['the', 'house', 'is', 'small']
+        >>> algnsent.alignment
+        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
+        >>> from nltk.corpus import comtrans
+        >>> print(comtrans.aligned_sents()[54])
+        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
+        >>> print(comtrans.aligned_sents()[54].alignment)
+        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13
+
+    :param words: Words in the target language sentence
+    :type words: list(str)
+    :param mots: Words in the source language sentence
+    :type mots: list(str)
+    :param alignment: Word-level alignments between ``words`` and ``mots``.
+        Each alignment is represented as a 2-tuple (words_index, mots_index).
+    :type alignment: Alignment
+    """
+
+    def __init__(self, words, mots, alignment=None):
+        self._words = words
+        self._mots = mots
+        if alignment is None:
+            self.alignment = Alignment([])
+        else:
+            assert type(alignment) is Alignment
+            self.alignment = alignment
+
+    @property
+    def words(self):
+        return self._words
+
+    @property
+    def mots(self):
+        return self._mots
+
+    def _get_alignment(self):
+        return self._alignment
+
+    def _set_alignment(self, alignment):
+        _check_alignment(len(self.words), len(self.mots), alignment)
+        self._alignment = alignment
+
+    alignment = property(_get_alignment, _set_alignment)
+
+    def __repr__(self):
+        """
+        Return a string representation for this ``AlignedSent``.
+
+        :rtype: str
+        """
+        words = "[%s]" % (", ".join("'%s'" % w for w in self._words))
+        mots = "[%s]" % (", ".join("'%s'" % w for w in self._mots))
+
+        return f"AlignedSent({words}, {mots}, {self._alignment!r})"
+
+    def _to_dot(self):
+        """
+        Dot representation of the aligned sentence
+        """
+        s = "graph align {\n"
+        s += "node[shape=plaintext]\n"
+
+        # Declare node
+        s += "".join([f'"{w}_source" [label="{w}"] \n' for w in self._words])
+        s += "".join([f'"{w}_target" [label="{w}"] \n' for w in self._mots])
+
+        # Alignment
+        s += "".join(
+            [
+                f'"{self._words[u]}_source" -- "{self._mots[v]}_target" \n'
+                for u, v in self._alignment
+            ]
+        )
+
+        # Connect the source words
+        for i in range(len(self._words) - 1):
+            s += '"{}_source" -- "{}_source" [style=invis]\n'.format(
+                self._words[i],
+                self._words[i + 1],
+            )
+
+        # Connect the target words
+        for i in range(len(self._mots) - 1):
+            s += '"{}_target" -- "{}_target" [style=invis]\n'.format(
+                self._mots[i],
+                self._mots[i + 1],
+            )
+
+        # Put it in the same rank
+        s += "{rank = same; %s}\n" % (" ".join('"%s_source"' % w for w in self._words))
+        s += "{rank = same; %s}\n" % (" ".join('"%s_target"' % w for w in self._mots))
+
+        s += "}"
+
+        return s
+
+    def _repr_svg_(self):
+        """
+        Ipython magic : show SVG representation of this ``AlignedSent``.
+        """
+        dot_string = self._to_dot().encode("utf8")
+        output_format = "svg"
+        try:
+            process = subprocess.Popen(
+                ["dot", "-T%s" % output_format],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+        except OSError as e:
+            raise Exception("Cannot find the dot binary from Graphviz package") from e
+        out, err = process.communicate(dot_string)
+
+        return out.decode("utf8")
+
+    def __str__(self):
+        """
+        Return a human-readable string representation for this ``AlignedSent``.
+
+        :rtype: str
+        """
+        source = " ".join(self._words)[:20] + "..."
+        target = " ".join(self._mots)[:20] + "..."
+        return f"<AlignedSent: '{source}' -> '{target}'>"
+
+    def invert(self):
+        """
+        Return the aligned sentence pair, reversing the directionality
+
+        :rtype: AlignedSent
+        """
+        return AlignedSent(self._mots, self._words, self._alignment.invert())
+
+
+class Alignment(frozenset):
+    """
+    A storage class for representing alignment between two sequences, s1, s2.
+    In general, an alignment is a set of tuples of the form (i, j, ...)
+    representing an alignment between the i-th element of s1 and the
+    j-th element of s2.  Tuples are extensible (they might contain
+    additional data, such as a boolean to indicate sure vs possible alignments).
+
+        >>> from nltk.translate import Alignment
+        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
+        >>> a.invert()
+        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
+        >>> print(a.invert())
+        0-0 1-0 2-1 2-2
+        >>> a[0]
+        [(0, 1), (0, 0)]
+        >>> a.invert()[2]
+        [(2, 1), (2, 2)]
+        >>> b = Alignment([(0, 0), (0, 1)])
+        >>> b.issubset(a)
+        True
+        >>> c = Alignment.fromstring('0-0 0-1')
+        >>> b == c
+        True
+    """
+
+    def __new__(cls, pairs):
+        self = frozenset.__new__(cls, pairs)
+        self._len = max(p[0] for p in self) if self != frozenset([]) else 0
+        self._index = None
+        return self
+
+    @classmethod
+    def fromstring(cls, s):
+        """
+        Read a giza-formatted string and return an Alignment object.
+
+            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
+            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])
+
+        :type s: str
+        :param s: the positional alignments in giza format
+        :rtype: Alignment
+        :return: An Alignment object corresponding to the string representation ``s``.
+        """
+
+        return Alignment([_giza2pair(a) for a in s.split()])
+
+    def __getitem__(self, key):
+        """
+        Look up the alignments that map from a given index or slice.
+        """
+        if not self._index:
+            self._build_index()
+        return self._index.__getitem__(key)
+
+    def invert(self):
+        """
+        Return an Alignment object, being the inverted mapping.
+        """
+        return Alignment(((p[1], p[0]) + p[2:]) for p in self)
+
+    def range(self, positions=None):
+        """
+        Work out the range of the mapping from the given positions.
+        If no positions are specified, compute the range of the entire mapping.
+        """
+        image = set()
+        if not self._index:
+            self._build_index()
+        if not positions:
+            positions = list(range(len(self._index)))
+        for p in positions:
+            image.update(f for _, f in self._index[p])
+        return sorted(image)
+
+    def __repr__(self):
+        """
+        Produce a Giza-formatted string representing the alignment.
+        """
+        return "Alignment(%r)" % sorted(self)
+
+    def __str__(self):
+        """
+        Produce a Giza-formatted string representing the alignment.
+        """
+        return " ".join("%d-%d" % p[:2] for p in sorted(self))
+
+    def _build_index(self):
+        """
+        Build a list self._index such that self._index[i] is a list
+        of the alignments originating from word i.
+        """
+        self._index = [[] for _ in range(self._len + 1)]
+        for p in self:
+            self._index[p[0]].append(p)
+
+
+def _giza2pair(pair_string):
+    i, j = pair_string.split("-")
+    return int(i), int(j)
+
+
+def _naacl2pair(pair_string):
+    i, j, p = pair_string.split("-")
+    return int(i), int(j)
+
+
+def _check_alignment(num_words, num_mots, alignment):
+    """
+    Check whether the alignments are legal.
+
+    :param num_words: the number of source language words
+    :type num_words: int
+    :param num_mots: the number of target language words
+    :type num_mots: int
+    :param alignment: alignment to be checked
+    :type alignment: Alignment
+    :raise IndexError: if alignment falls outside the sentence
+    """
+
+    assert type(alignment) is Alignment
+
+    if not all(0 <= pair[0] < num_words for pair in alignment):
+        raise IndexError("Alignment is outside boundary of words")
+    if not all(pair[1] is None or 0 <= pair[1] < num_mots for pair in alignment):
+        raise IndexError("Alignment is outside boundary of mots")
+
+
+PhraseTableEntry = namedtuple("PhraseTableEntry", ["trg_phrase", "log_prob"])
+
+
+class PhraseTable:
+    """
+    In-memory store of translations for a given phrase, and the log
+    probability of the those translations
+    """
+
+    def __init__(self):
+        self.src_phrases = dict()
+
+    def translations_for(self, src_phrase):
+        """
+        Get the translations for a source language phrase
+
+        :param src_phrase: Source language phrase of interest
+        :type src_phrase: tuple(str)
+
+        :return: A list of target language phrases that are translations
+            of ``src_phrase``, ordered in decreasing order of
+            likelihood. Each list element is a tuple of the target
+            phrase and its log probability.
+        :rtype: list(PhraseTableEntry)
+        """
+        return self.src_phrases[src_phrase]
+
+    def add(self, src_phrase, trg_phrase, log_prob):
+        """
+        :type src_phrase: tuple(str)
+        :type trg_phrase: tuple(str)
+
+        :param log_prob: Log probability that given ``src_phrase``,
+            ``trg_phrase`` is its translation
+        :type log_prob: float
+        """
+        entry = PhraseTableEntry(trg_phrase=trg_phrase, log_prob=log_prob)
+        if src_phrase not in self.src_phrases:
+            self.src_phrases[src_phrase] = []
+        self.src_phrases[src_phrase].append(entry)
+        self.src_phrases[src_phrase].sort(key=lambda e: e.log_prob, reverse=True)
+
+    def __contains__(self, src_phrase):
+        return src_phrase in self.src_phrases
--- a/backend/venv/Lib/site-packages/nltk/translate/bleu_score.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/bleu_score.py
@@ -0,0 +1,714 @@
+# Natural Language Toolkit: BLEU Score
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
+# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""BLEU score implementation."""
+import math
+import sys
+import warnings
+from collections import Counter
+from fractions import Fraction as _Fraction
+
+from nltk.util import ngrams
+
+
+class Fraction(_Fraction):
+    """Fraction with _normalize=False support for 3.12"""
+
+    def __new__(cls, numerator=0, denominator=None, _normalize=False):
+        if sys.version_info >= (3, 12):
+            self = super().__new__(cls, numerator, denominator)
+        else:
+            self = super().__new__(cls, numerator, denominator, _normalize=_normalize)
+        self._normalize = _normalize
+        self._original_numerator = numerator
+        self._original_denominator = denominator
+        return self
+
+    @property
+    def numerator(self):
+        if not self._normalize:
+            return self._original_numerator
+        return super().numerator
+
+    @property
+    def denominator(self):
+        if not self._normalize:
+            return self._original_denominator
+        return super().denominator
+
+
+def sentence_bleu(
+    references,
+    hypothesis,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+):
+    """
+    Calculate BLEU score (Bilingual Evaluation Understudy) from
+    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
+    "BLEU: a method for automatic evaluation of machine translation."
+    In Proceedings of ACL. https://www.aclweb.org/anthology/P02-1040.pdf
+
+    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...               'ensures', 'that', 'the', 'military', 'always',
+    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
+    ...               'that', 'party', 'direct']
+
+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...               'heed', 'Party', 'commands']
+
+    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...               'guarantees', 'the', 'military', 'forces', 'always',
+    ...               'being', 'under', 'the', 'command', 'of', 'the',
+    ...               'Party']
+
+    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...               'of', 'the', 'party']
+
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
+    0.5045...
+
+    If there is no ngrams overlap for any order of n-grams, BLEU returns the
+    value 0. This is because the precision for the order of n-grams without
+    overlap is 0, and the geometric mean in the final BLEU score computation
+    multiplies the 0 with the precision of other n-grams. This results in 0
+    (independently of the precision of the other n-gram orders). The following
+    example has zero 3-gram and 4-gram overlaps:
+
+    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
+    0.0
+
+    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
+    function can be used.
+
+    >>> chencherry = SmoothingFunction()
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
+    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
+    0.0370...
+
+    The default BLEU calculates a score for up to 4-grams using uniform
+    weights (this is called BLEU-4). To evaluate your translations with
+    higher/lower order ngrams, use customized weights. E.g. when accounting
+    for up to 5-grams with uniform weights (this is called BLEU-5) use:
+
+    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
+    0.3920...
+
+    Multiple BLEU scores can be computed at once, by supplying a list of weights.
+    E.g. for computing BLEU-2, BLEU-3 *and* BLEU-4 in one computation, use:
+    >>> weights = [
+    ...     (1./2., 1./2.),
+    ...     (1./3., 1./3., 1./3.),
+    ...     (1./4., 1./4., 1./4., 1./4.)
+    ... ]
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
+    [0.7453..., 0.6240..., 0.5045...]
+
+    :param references: reference sentences
+    :type references: list(list(str))
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
+    :type weights: tuple(float) / list(tuple(float))
+    :param smoothing_function:
+    :type smoothing_function: SmoothingFunction
+    :param auto_reweigh: Option to re-normalize the weights uniformly.
+    :type auto_reweigh: bool
+    :return: The sentence-level BLEU score. Returns a list if multiple weights were supplied.
+    :rtype: float / list(float)
+    """
+    return corpus_bleu(
+        [references], [hypothesis], weights, smoothing_function, auto_reweigh
+    )
+
+
+def corpus_bleu(
+    list_of_references,
+    hypotheses,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+):
+    """
+    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
+    the hypotheses and their respective references.
+
+    Instead of averaging the sentence level BLEU scores (i.e. macro-average
+    precision), the original BLEU metric (Papineni et al. 2002) accounts for
+    the micro-average precision (i.e. summing the numerators and denominators
+    for each hypothesis-reference(s) pairs before the division).
+
+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...         'ensures', 'that', 'the', 'military', 'always',
+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...          'heed', 'Party', 'commands']
+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...          'guarantees', 'the', 'military', 'forces', 'always',
+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...          'of', 'the', 'party']
+
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
+    ...         'interested', 'in', 'world', 'history']
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
+    ...          'because', 'he', 'read', 'the', 'book']
+
+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+    >>> hypotheses = [hyp1, hyp2]
+    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
+    0.5920...
+
+    The example below show that corpus_bleu() is different from averaging
+    sentence_bleu() for hypotheses
+
+    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
+    >>> score2 = sentence_bleu([ref2a], hyp2)
+    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
+    0.6223...
+
+    Custom weights may be supplied to fine-tune the BLEU score further.
+    A tuple of float weights for unigrams, bigrams, trigrams and so on can be given.
+    >>> weights = (0.1, 0.3, 0.5, 0.1)
+    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
+    0.5818...
+
+    This particular weight gave extra value to trigrams.
+    Furthermore, multiple weights can be given, resulting in multiple BLEU scores.
+    >>> weights = [
+    ...     (0.5, 0.5),
+    ...     (0.333, 0.333, 0.334),
+    ...     (0.25, 0.25, 0.25, 0.25),
+    ...     (0.2, 0.2, 0.2, 0.2, 0.2)
+    ... ]
+    >>> corpus_bleu(list_of_references, hypotheses, weights=weights) # doctest: +ELLIPSIS
+    [0.8242..., 0.7067..., 0.5920..., 0.4719...]
+
+    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type list_of_references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param weights: weights for unigrams, bigrams, trigrams and so on (one or a list of weights)
+    :type weights: tuple(float) / list(tuple(float))
+    :param smoothing_function:
+    :type smoothing_function: SmoothingFunction
+    :param auto_reweigh: Option to re-normalize the weights uniformly.
+    :type auto_reweigh: bool
+    :return: The corpus-level BLEU score.
+    :rtype: float
+    """
+    # Before proceeding to compute BLEU, perform sanity checks.
+
+    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
+    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
+    hyp_lengths, ref_lengths = 0, 0
+
+    assert len(list_of_references) == len(hypotheses), (
+        "The number of hypotheses and their reference(s) should be the " "same "
+    )
+
+    try:
+        weights[0][0]
+    except:
+        weights = [weights]
+    max_weight_length = max(len(weight) for weight in weights)
+
+    # Iterate through each hypothesis and their corresponding references.
+    for references, hypothesis in zip(list_of_references, hypotheses):
+        # For each order of ngram, calculate the numerator and
+        # denominator for the corpus-level modified precision.
+        for i in range(1, max_weight_length + 1):
+            p_i = modified_precision(references, hypothesis, i)
+            p_numerators[i] += p_i.numerator
+            p_denominators[i] += p_i.denominator
+
+        # Calculate the hypothesis length and the closest reference length.
+        # Adds them to the corpus-level hypothesis and reference counts.
+        hyp_len = len(hypothesis)
+        hyp_lengths += hyp_len
+        ref_lengths += closest_ref_length(references, hyp_len)
+
+    # Calculate corpus-level brevity penalty.
+    bp = brevity_penalty(ref_lengths, hyp_lengths)
+
+    # Collects the various precision values for the different ngram orders.
+    p_n = [
+        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
+        for i in range(1, max_weight_length + 1)
+    ]
+
+    # Returns 0 if there's no matching n-grams
+    # We only need to check for p_numerators[1] == 0, since if there's
+    # no unigrams, there won't be any higher order ngrams.
+    if p_numerators[1] == 0:
+        return 0 if len(weights) == 1 else [0] * len(weights)
+
+    # If there's no smoothing, set use method0 from SmoothinFunction class.
+    if not smoothing_function:
+        smoothing_function = SmoothingFunction().method0
+    # Smoothen the modified precision.
+    # Note: smoothing_function() may convert values into floats;
+    #       it tries to retain the Fraction object as much as the
+    #       smoothing method allows.
+    p_n = smoothing_function(
+        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
+    )
+
+    bleu_scores = []
+    for weight in weights:
+        # Uniformly re-weighting based on maximum hypothesis lengths if largest
+        # order of n-grams < 4 and weights is set at default.
+        if auto_reweigh:
+            if hyp_lengths < 4 and weight == (0.25, 0.25, 0.25, 0.25):
+                weight = (1 / hyp_lengths,) * hyp_lengths
+
+        s = (w_i * math.log(p_i) for w_i, p_i in zip(weight, p_n) if p_i > 0)
+        s = bp * math.exp(math.fsum(s))
+        bleu_scores.append(s)
+    return bleu_scores[0] if len(weights) == 1 else bleu_scores
+
+
+def modified_precision(references, hypothesis, n):
+    """
+    Calculate modified ngram precision.
+
+    The normal precision method may lead to some wrong translations with
+    high-precision, e.g., the translation, in which a word of reference
+    repeats several times, has very high precision.
+
+    This function only returns the Fraction object that contains the numerator
+    and denominator necessary to calculate the corpus-level precision.
+    To calculate the modified precision for a single pair of hypothesis and
+    references, cast the Fraction object into a float.
+
+    The famous "the the the ... " example shows that you can get BLEU precision
+    by duplicating high frequency words.
+
+        >>> reference1 = 'the cat is on the mat'.split()
+        >>> reference2 = 'there is a cat on the mat'.split()
+        >>> hypothesis1 = 'the the the the the the the'.split()
+        >>> references = [reference1, reference2]
+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+        0.2857...
+
+    In the modified n-gram precision, a reference word will be considered
+    exhausted after a matching hypothesis word is identified, e.g.
+
+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+        ...               'ensures', 'that', 'the', 'military', 'will',
+        ...               'forever', 'heed', 'Party', 'commands']
+        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+        ...               'guarantees', 'the', 'military', 'forces', 'always',
+        ...               'being', 'under', 'the', 'command', 'of', 'the',
+        ...               'Party']
+        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+        ...               'of', 'the', 'party']
+        >>> hypothesis = 'of the'.split()
+        >>> references = [reference1, reference2, reference3]
+        >>> float(modified_precision(references, hypothesis, n=1))
+        1.0
+        >>> float(modified_precision(references, hypothesis, n=2))
+        1.0
+
+    An example of a normal machine translation hypothesis:
+
+        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+        ...               'ensures', 'that', 'the', 'military', 'always',
+        ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+        >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+        ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
+        ...               'that', 'party', 'direct']
+
+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+        ...               'ensures', 'that', 'the', 'military', 'will',
+        ...               'forever', 'heed', 'Party', 'commands']
+
+        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+        ...               'guarantees', 'the', 'military', 'forces', 'always',
+        ...               'being', 'under', 'the', 'command', 'of', 'the',
+        ...               'Party']
+
+        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+        ...               'of', 'the', 'party']
+        >>> references = [reference1, reference2, reference3]
+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+        0.9444...
+        >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
+        0.5714...
+        >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
+        0.5882352941176471
+        >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
+        0.07692...
+
+
+    :param references: A list of reference translations.
+    :type references: list(list(str))
+    :param hypothesis: A hypothesis translation.
+    :type hypothesis: list(str)
+    :param n: The ngram order.
+    :type n: int
+    :return: BLEU's modified precision for the nth order ngram.
+    :rtype: Fraction
+    """
+    # Extracts all ngrams in hypothesis
+    # Set an empty Counter if hypothesis is empty.
+    counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
+    # Extract a union of references' counts.
+    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
+    max_counts = {}
+    for reference in references:
+        reference_counts = (
+            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
+        )
+        for ngram in counts:
+            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
+
+    # Assigns the intersection between hypothesis and references' counts.
+    clipped_counts = {
+        ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
+    }
+
+    numerator = sum(clipped_counts.values())
+    # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
+    # Usually this happens when the ngram order is > len(reference).
+    denominator = max(1, sum(counts.values()))
+
+    return Fraction(numerator, denominator, _normalize=False)
+
+
+def closest_ref_length(references, hyp_len):
+    """
+    This function finds the reference that is the closest length to the
+    hypothesis. The closest reference length is referred to as *r* variable
+    from the brevity penalty formula in Papineni et. al. (2002)
+
+    :param references: A list of reference translations.
+    :type references: list(list(str))
+    :param hyp_len: The length of the hypothesis.
+    :type hyp_len: int
+    :return: The length of the reference that's closest to the hypothesis.
+    :rtype: int
+    """
+    ref_lens = (len(reference) for reference in references)
+    closest_ref_len = min(
+        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
+    )
+    return closest_ref_len
+
+
+def brevity_penalty(closest_ref_len, hyp_len):
+    """
+    Calculate brevity penalty.
+
+    As the modified n-gram precision still has the problem from the short
+    length sentence, brevity penalty is used to modify the overall BLEU
+    score according to length.
+
+    An example from the paper. There are three references with length 12, 15
+    and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
+
+    >>> reference1 = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
+    >>> reference2 = list('aaaaaaaaaaaaaaa')   # i.e. ['a'] * 15
+    >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
+    >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
+    >>> references = [reference1, reference2, reference3]
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len)
+    1.0
+
+    In case a hypothesis translation is shorter than the references, penalty is
+    applied.
+
+    >>> references = [['a'] * 28, ['a'] * 28]
+    >>> hypothesis = ['a'] * 12
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len)
+    0.2635971381157267
+
+    The length of the closest reference is used to compute the penalty. If the
+    length of a hypothesis is 12, and the reference lengths are 13 and 2, the
+    penalty is applied because the hypothesis length (12) is less then the
+    closest reference length (13).
+
+    >>> references = [['a'] * 13, ['a'] * 2]
+    >>> hypothesis = ['a'] * 12
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+    0.9200...
+
+    The brevity penalty doesn't depend on reference order. More importantly,
+    when two reference sentences are at the same distance, the shortest
+    reference sentence length is used.
+
+    >>> references = [['a'] * 13, ['a'] * 11]
+    >>> hypothesis = ['a'] * 12
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
+    >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
+    >>> bp1 == bp2 == 1
+    True
+
+    A test example from mteval-v13a.pl (starting from the line 705):
+
+    >>> references = [['a'] * 11, ['a'] * 8]
+    >>> hypothesis = ['a'] * 7
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+    0.8668...
+
+    >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
+    >>> hypothesis = ['a'] * 7
+    >>> hyp_len = len(hypothesis)
+    >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+    >>> brevity_penalty(closest_ref_len, hyp_len)
+    1.0
+
+    :param hyp_len: The length of the hypothesis for a single sentence OR the
+        sum of all the hypotheses' lengths for a corpus
+    :type hyp_len: int
+    :param closest_ref_len: The length of the closest reference for a single
+        hypothesis OR the sum of all the closest references for every hypotheses.
+    :type closest_ref_len: int
+    :return: BLEU's brevity penalty.
+    :rtype: float
+    """
+    if hyp_len > closest_ref_len:
+        return 1
+    # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
+    elif hyp_len == 0:
+        return 0
+    else:
+        return math.exp(1 - closest_ref_len / hyp_len)
+
+
+class SmoothingFunction:
+    """
+    This is an implementation of the smoothing techniques
+    for segment-level BLEU scores that was presented in
+    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
+    Smoothing Techniques for Sentence-Level BLEU. In WMT14.
+    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
+    """
+
+    def __init__(self, epsilon=0.1, alpha=5, k=5):
+        """
+        This will initialize the parameters required for the various smoothing
+        techniques, the default values are set to the numbers used in the
+        experiments from Chen and Cherry (2014).
+
+        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
+        ...                 'that', 'the', 'military', 'always', 'obeys', 'the',
+        ...                 'commands', 'of', 'the', 'party']
+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
+        ...               'that', 'the', 'military', 'will', 'forever', 'heed',
+        ...               'Party', 'commands']
+
+        >>> chencherry = SmoothingFunction()
+        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
+        0.4452...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
+        0.4905...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
+        0.4135...
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
+        0.4905...
+
+        :param epsilon: the epsilon value use in method 1
+        :type epsilon: float
+        :param alpha: the alpha value use in method 6
+        :type alpha: int
+        :param k: the k value use in method 4
+        :type k: int
+        """
+        self.epsilon = epsilon
+        self.alpha = alpha
+        self.k = k
+
+    def method0(self, p_n, *args, **kwargs):
+        """
+        No smoothing.
+        """
+        p_n_new = []
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator != 0:
+                p_n_new.append(p_i)
+            else:
+                _msg = str(
+                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
+                    "Therefore the BLEU score evaluates to 0, independently of\n"
+                    "how many N-gram overlaps of lower order it contains.\n"
+                    "Consider using lower n-gram order or use "
+                    "SmoothingFunction()"
+                ).format(i + 1)
+                warnings.warn(_msg)
+                # When numerator==0 where denonminator==0 or !=0, the result
+                # for the precision score should be equal to 0 or undefined.
+                # Due to BLEU geometric mean computation in logarithm space,
+                # we we need to take the return sys.float_info.min such that
+                # math.log(sys.float_info.min) returns a 0 precision score.
+                p_n_new.append(sys.float_info.min)
+        return p_n_new
+
+    def method1(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
+        """
+        return [
+            (
+                (p_i.numerator + self.epsilon) / p_i.denominator
+                if p_i.numerator == 0
+                else p_i
+            )
+            for p_i in p_n
+        ]
+
+    def method2(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 2: Add 1 to both numerator and denominator from
+        Chin-Yew Lin and Franz Josef Och (2004) ORANGE: a Method for
+        Evaluating Automatic Evaluation Metrics for Machine Translation.
+        In COLING 2004.
+        """
+        return [
+            (
+                Fraction(p_n[i].numerator + 1, p_n[i].denominator + 1, _normalize=False)
+                if i != 0
+                else p_n[0]
+            )
+            for i in range(len(p_n))
+        ]
+
+    def method3(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 3: NIST geometric sequence smoothing
+        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
+        precision score whose matching n-gram count is null.
+        k is 1 for the first 'n' value for which the n-gram match count is null/
+
+        For example, if the text contains:
+
+        - one 2-gram match
+        - and (consequently) two 1-gram matches
+
+        the n-gram count for each individual precision score would be:
+
+        - n=1  =>  prec_count = 2     (two unigrams)
+        - n=2  =>  prec_count = 1     (one bigram)
+        - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
+        - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
+        """
+        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator == 0:
+                p_n[i] = 1 / (2**incvnt * p_i.denominator)
+                incvnt += 1
+        return p_n
+
+    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 4:
+        Shorter translations may have inflated precision values due to having
+        smaller denominators; therefore, we give them proportionally
+        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
+        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
+        """
+        incvnt = 1
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator == 0 and hyp_len > 1:
+                # incvnt = i + 1 * self.k / math.log(
+                #     hyp_len
+                # )  # Note that this K is different from the K from NIST.
+                # p_n[i] = incvnt / p_i.denominator\
+                numerator = 1 / (2**incvnt * self.k / math.log(hyp_len))
+                p_n[i] = numerator / p_i.denominator
+                incvnt += 1
+        return p_n
+
+    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 5:
+        The matched counts for similar values of n should be similar. To a
+        calculate the n-gram matched count, it averages the n−1, n and n+1 gram
+        matched counts.
+        """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        m = {}
+        # Requires an precision value for an addition ngram order.
+        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
+        m[-1] = p_n[0] + 1
+        for i, p_i in enumerate(p_n):
+            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
+            m[i] = p_n[i]
+        return p_n
+
+    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 6:
+        Interpolates the maximum likelihood estimate of the precision *p_n* with
+        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
+        between pn and pn−1 will be the same as that between pn−1 and pn−2; from
+        Gao and He (2013) Training MRF-Based Phrase Translation Models using
+        Gradient Ascent. In NAACL.
+        """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        # This smoothing only works when p_1 and p_2 is non-zero.
+        # Raise an error with an appropriate message when the input is too short
+        # to use this smoothing technique.
+        assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
+        for i, p_i in enumerate(p_n):
+            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
+                continue
+            else:
+                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
+                # No. of ngrams in translation that matches the reference.
+                m = p_i.numerator
+                # No. of ngrams in translation.
+                l = sum(1 for _ in ngrams(hypothesis, i + 1))
+                # Calculates the interpolated precision.
+                p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
+        return p_n
+
+    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
+        """
+        Smoothing method 7:
+        Interpolates methods 4 and 5.
+        """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
+        p_n = self.method4(p_n, references, hypothesis, hyp_len)
+        p_n = self.method5(p_n, references, hypothesis, hyp_len)
+        return p_n
--- a/backend/venv/Lib/site-packages/nltk/translate/chrf_score.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/chrf_score.py
@@ -0,0 +1,221 @@
+# Natural Language Toolkit: ChrF score
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Authors: Maja Popovic
+# Contributors: Liling Tan, Aleš Tamchyna (Memsource)
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+""" ChrF score implementation """
+import re
+from collections import Counter, defaultdict
+
+from nltk.util import ngrams
+
+
+def sentence_chrf(
+    reference, hypothesis, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True
+):
+    """
+    Calculates the sentence level CHRF (Character n-gram F-score) described in
+     - Maja Popovic. 2015. CHRF: Character n-gram F-score for Automatic MT Evaluation.
+       In Proceedings of the 10th Workshop on Machine Translation.
+       https://www.statmt.org/wmt15/pdf/WMT49.pdf
+     - Maja Popovic. 2016. CHRF Deconstructed: β Parameters and n-gram Weights.
+       In Proceedings of the 1st Conference on Machine Translation.
+       https://www.statmt.org/wmt16/pdf/W16-2341.pdf
+
+    This implementation of CHRF only supports a single reference at the moment.
+
+    For details not reported in the paper, consult Maja Popovic's original
+    implementation: https://github.com/m-popovic/chrF
+
+    The code should output results equivalent to running CHRF++ with the
+    following options: -nw 0 -b 3
+
+    An example from the original BLEU paper
+    https://www.aclweb.org/anthology/P02-1040.pdf
+
+        >>> ref1 = str('It is a guide to action that ensures that the military '
+        ...            'will forever heed Party commands').split()
+        >>> hyp1 = str('It is a guide to action which ensures that the military '
+        ...            'always obeys the commands of the party').split()
+        >>> hyp2 = str('It is to insure the troops forever hearing the activity '
+        ...            'guidebook that party direct').split()
+        >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS
+        0.6349...
+        >>> sentence_chrf(ref1, hyp2) # doctest: +ELLIPSIS
+        0.3330...
+
+    The infamous "the the the ... " example
+
+        >>> ref = 'the cat is on the mat'.split()
+        >>> hyp = 'the the the the the the the'.split()
+        >>> sentence_chrf(ref, hyp)  # doctest: +ELLIPSIS
+        0.1468...
+
+    An example to show that this function allows users to use strings instead of
+    tokens, i.e. list(str) as inputs.
+
+        >>> ref1 = str('It is a guide to action that ensures that the military '
+        ...            'will forever heed Party commands')
+        >>> hyp1 = str('It is a guide to action which ensures that the military '
+        ...            'always obeys the commands of the party')
+        >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS
+        0.6349...
+        >>> type(ref1) == type(hyp1) == str
+        True
+        >>> sentence_chrf(ref1.split(), hyp1.split()) # doctest: +ELLIPSIS
+        0.6349...
+
+    To skip the unigrams and only use 2- to 3-grams:
+
+        >>> sentence_chrf(ref1, hyp1, min_len=2, max_len=3) # doctest: +ELLIPSIS
+        0.6617...
+
+    :param references: reference sentence
+    :type references: list(str) / str
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str) / str
+    :param min_len: The minimum order of n-gram this function should extract.
+    :type min_len: int
+    :param max_len: The maximum order of n-gram this function should extract.
+    :type max_len: int
+    :param beta: the parameter to assign more importance to recall over precision
+    :type beta: float
+    :param ignore_whitespace: ignore whitespace characters in scoring
+    :type ignore_whitespace: bool
+    :return: the sentence level CHRF score.
+    :rtype: float
+    """
+    return corpus_chrf(
+        [reference],
+        [hypothesis],
+        min_len,
+        max_len,
+        beta=beta,
+        ignore_whitespace=ignore_whitespace,
+    )
+
+
+def _preprocess(sent, ignore_whitespace):
+    if type(sent) != str:
+        # turn list of tokens into a string
+        sent = " ".join(sent)
+
+    if ignore_whitespace:
+        sent = re.sub(r"\s+", "", sent)
+    return sent
+
+
+def chrf_precision_recall_fscore_support(
+    reference, hypothesis, n, beta=3.0, epsilon=1e-16
+):
+    """
+    This function computes the precision, recall and fscore from the ngram
+    overlaps. It returns the `support` which is the true positive score.
+
+    By underspecifying the input type, the function will be agnostic as to how
+    it computes the ngrams and simply take the whichever element in the list;
+    it could be either token or character.
+
+    :param reference: The reference sentence.
+    :type reference: list
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: list
+    :param n: Extract up to the n-th order ngrams
+    :type n: int
+    :param beta: The parameter to assign more importance to recall over precision.
+    :type beta: float
+    :param epsilon: The fallback value if the hypothesis or reference is empty.
+    :type epsilon: float
+    :return: Returns the precision, recall and f-score and support (true positive).
+    :rtype: tuple(float)
+    """
+    ref_ngrams = Counter(ngrams(reference, n))
+    hyp_ngrams = Counter(ngrams(hypothesis, n))
+
+    # calculate the number of ngram matches
+    overlap_ngrams = ref_ngrams & hyp_ngrams
+    tp = sum(overlap_ngrams.values())  # True positives.
+    tpfp = sum(hyp_ngrams.values())  # True positives + False positives.
+    tpfn = sum(ref_ngrams.values())  # True positives + False negatives.
+
+    try:
+        prec = tp / tpfp  # precision
+        rec = tp / tpfn  # recall
+        factor = beta**2
+        fscore = (1 + factor) * (prec * rec) / (factor * prec + rec)
+    except ZeroDivisionError:
+        prec = rec = fscore = epsilon
+    return prec, rec, fscore, tp
+
+
+def corpus_chrf(
+    references, hypotheses, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True
+):
+    """
+    Calculates the corpus level CHRF (Character n-gram F-score), it is the
+    macro-averaged value of the sentence/segment level CHRF score.
+
+    This implementation of CHRF only supports a single reference at the moment.
+
+        >>> ref1 = str('It is a guide to action that ensures that the military '
+        ...            'will forever heed Party commands').split()
+        >>> ref2 = str('It is the guiding principle which guarantees the military '
+        ...            'forces always being under the command of the Party').split()
+        >>>
+        >>> hyp1 = str('It is a guide to action which ensures that the military '
+        ...            'always obeys the commands of the party').split()
+        >>> hyp2 = str('It is to insure the troops forever hearing the activity '
+        ...            'guidebook that party direct')
+        >>> corpus_chrf([ref1, ref2, ref1, ref2], [hyp1, hyp2, hyp2, hyp1]) # doctest: +ELLIPSIS
+        0.3910...
+
+    :param references: a corpus of list of reference sentences, w.r.t. hypotheses
+    :type references: list(list(str))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param min_len: The minimum order of n-gram this function should extract.
+    :type min_len: int
+    :param max_len: The maximum order of n-gram this function should extract.
+    :type max_len: int
+    :param beta: the parameter to assign more importance to recall over precision
+    :type beta: float
+    :param ignore_whitespace: ignore whitespace characters in scoring
+    :type ignore_whitespace: bool
+    :return: the sentence level CHRF score.
+    :rtype: float
+    """
+
+    assert len(references) == len(
+        hypotheses
+    ), "The number of hypotheses and their references should be the same"
+    num_sents = len(hypotheses)
+
+    # Keep f-scores for each n-gram order separate
+    ngram_fscores = defaultdict(list)
+
+    # Iterate through each hypothesis and their corresponding references.
+    for reference, hypothesis in zip(references, hypotheses):
+        # preprocess both reference and hypothesis
+        reference = _preprocess(reference, ignore_whitespace)
+        hypothesis = _preprocess(hypothesis, ignore_whitespace)
+
+        # Calculate f-scores for each sentence and for each n-gram order
+        # separately.
+        for n in range(min_len, max_len + 1):
+            # Compute the precision, recall, fscore and support.
+            prec, rec, fscore, tp = chrf_precision_recall_fscore_support(
+                reference, hypothesis, n, beta=beta
+            )
+            ngram_fscores[n].append(fscore)
+
+    # how many n-gram sizes
+    num_ngram_sizes = len(ngram_fscores)
+
+    # sum of f-scores over all sentences for each n-gram order
+    total_scores = [sum(fscores) for n, fscores in ngram_fscores.items()]
+
+    # macro-average over n-gram orders and over all sentences
+    return (sum(total_scores) / num_ngram_sizes) / num_sents
--- a/backend/venv/Lib/site-packages/nltk/translate/gale_church.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/gale_church.py
@@ -0,0 +1,263 @@
+# Natural Language Toolkit: Gale-Church Aligner
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Torsten Marek <marek@ifi.uzh.ch>
+# Contributor: Cassidy Laidlaw, Liling Tan
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+
+A port of the Gale-Church Aligner.
+
+Gale & Church (1993), A Program for Aligning Sentences in Bilingual Corpora.
+https://aclweb.org/anthology/J93-1004.pdf
+
+"""
+
+import math
+
+try:
+    from norm import logsf as norm_logsf
+    from scipy.stats import norm
+except ImportError:
+
+    def erfcc(x):
+        """Complementary error function."""
+        z = abs(x)
+        t = 1 / (1 + 0.5 * z)
+        r = t * math.exp(
+            -z * z
+            - 1.26551223
+            + t
+            * (
+                1.00002368
+                + t
+                * (
+                    0.37409196
+                    + t
+                    * (
+                        0.09678418
+                        + t
+                        * (
+                            -0.18628806
+                            + t
+                            * (
+                                0.27886807
+                                + t
+                                * (
+                                    -1.13520398
+                                    + t
+                                    * (1.48851587 + t * (-0.82215223 + t * 0.17087277))
+                                )
+                            )
+                        )
+                    )
+                )
+            )
+        )
+        if x >= 0.0:
+            return r
+        else:
+            return 2.0 - r
+
+    def norm_cdf(x):
+        """Return the area under the normal distribution from M{-∞..x}."""
+        return 1 - 0.5 * erfcc(x / math.sqrt(2))
+
+    def norm_logsf(x):
+        try:
+            return math.log(1 - norm_cdf(x))
+        except ValueError:
+            return float("-inf")
+
+
+LOG2 = math.log(2)
+
+
+class LanguageIndependent:
+    # These are the language-independent probabilities and parameters
+    # given in Gale & Church
+
+    # for the computation, l_1 is always the language with less characters
+    PRIORS = {
+        (1, 0): 0.0099,
+        (0, 1): 0.0099,
+        (1, 1): 0.89,
+        (2, 1): 0.089,
+        (1, 2): 0.089,
+        (2, 2): 0.011,
+    }
+
+    AVERAGE_CHARACTERS = 1
+    VARIANCE_CHARACTERS = 6.8
+
+
+def trace(backlinks, source_sents_lens, target_sents_lens):
+    """
+    Traverse the alignment cost from the tracebacks and retrieves
+    appropriate sentence pairs.
+
+    :param backlinks: A dictionary where the key is the alignment points and value is the cost (referencing the LanguageIndependent.PRIORS)
+    :type backlinks: dict
+    :param source_sents_lens: A list of target sentences' lengths
+    :type source_sents_lens: list(int)
+    :param target_sents_lens: A list of target sentences' lengths
+    :type target_sents_lens: list(int)
+    """
+    links = []
+    position = (len(source_sents_lens), len(target_sents_lens))
+    while position != (0, 0) and all(p >= 0 for p in position):
+        try:
+            s, t = backlinks[position]
+        except TypeError:
+            position = (position[0] - 1, position[1] - 1)
+            continue
+        for i in range(s):
+            for j in range(t):
+                links.append((position[0] - i - 1, position[1] - j - 1))
+        position = (position[0] - s, position[1] - t)
+
+    return links[::-1]
+
+
+def align_log_prob(i, j, source_sents, target_sents, alignment, params):
+    """Returns the log probability of the two sentences C{source_sents[i]}, C{target_sents[j]}
+    being aligned with a specific C{alignment}.
+
+    @param i: The offset of the source sentence.
+    @param j: The offset of the target sentence.
+    @param source_sents: The list of source sentence lengths.
+    @param target_sents: The list of target sentence lengths.
+    @param alignment: The alignment type, a tuple of two integers.
+    @param params: The sentence alignment parameters.
+
+    @returns: The log probability of a specific alignment between the two sentences, given the parameters.
+    """
+    l_s = sum(source_sents[i - offset - 1] for offset in range(alignment[0]))
+    l_t = sum(target_sents[j - offset - 1] for offset in range(alignment[1]))
+    try:
+        # actually, the paper says l_s * params.VARIANCE_CHARACTERS, this is based on the C
+        # reference implementation. With l_s in the denominator, insertions are impossible.
+        m = (l_s + l_t / params.AVERAGE_CHARACTERS) / 2
+        delta = (l_s * params.AVERAGE_CHARACTERS - l_t) / math.sqrt(
+            m * params.VARIANCE_CHARACTERS
+        )
+    except ZeroDivisionError:
+        return float("-inf")
+
+    return -(LOG2 + norm_logsf(abs(delta)) + math.log(params.PRIORS[alignment]))
+
+
+def align_blocks(source_sents_lens, target_sents_lens, params=LanguageIndependent):
+    """Return the sentence alignment of two text blocks (usually paragraphs).
+
+        >>> align_blocks([5,5,5], [7,7,7])
+        [(0, 0), (1, 1), (2, 2)]
+        >>> align_blocks([10,5,5], [12,20])
+        [(0, 0), (1, 1), (2, 1)]
+        >>> align_blocks([12,20], [10,5,5])
+        [(0, 0), (1, 1), (1, 2)]
+        >>> align_blocks([10,2,10,10,2,10], [12,3,20,3,12])
+        [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4)]
+
+    @param source_sents_lens: The list of source sentence lengths.
+    @param target_sents_lens: The list of target sentence lengths.
+    @param params: the sentence alignment parameters.
+    @return: The sentence alignments, a list of index pairs.
+    """
+
+    alignment_types = list(params.PRIORS.keys())
+
+    # there are always three rows in the history (with the last of them being filled)
+    D = [[]]
+
+    backlinks = {}
+
+    for i in range(len(source_sents_lens) + 1):
+        for j in range(len(target_sents_lens) + 1):
+            min_dist = float("inf")
+            min_align = None
+            for a in alignment_types:
+                prev_i = -1 - a[0]
+                prev_j = j - a[1]
+                if prev_i < -len(D) or prev_j < 0:
+                    continue
+                p = D[prev_i][prev_j] + align_log_prob(
+                    i, j, source_sents_lens, target_sents_lens, a, params
+                )
+                if p < min_dist:
+                    min_dist = p
+                    min_align = a
+
+            if min_dist == float("inf"):
+                min_dist = 0
+
+            backlinks[(i, j)] = min_align
+            D[-1].append(min_dist)
+
+        if len(D) > 2:
+            D.pop(0)
+        D.append([])
+
+    return trace(backlinks, source_sents_lens, target_sents_lens)
+
+
+def align_texts(source_blocks, target_blocks, params=LanguageIndependent):
+    """Creates the sentence alignment of two texts.
+
+    Texts can consist of several blocks. Block boundaries cannot be crossed by sentence
+    alignment links.
+
+    Each block consists of a list that contains the lengths (in characters) of the sentences
+    in this block.
+
+    @param source_blocks: The list of blocks in the source text.
+    @param target_blocks: The list of blocks in the target text.
+    @param params: the sentence alignment parameters.
+
+    @returns: A list of sentence alignment lists
+    """
+    if len(source_blocks) != len(target_blocks):
+        raise ValueError(
+            "Source and target texts do not have the same number of blocks."
+        )
+
+    return [
+        align_blocks(source_block, target_block, params)
+        for source_block, target_block in zip(source_blocks, target_blocks)
+    ]
+
+
+# File I/O functions; may belong in a corpus reader
+
+
+def split_at(it, split_value):
+    """Splits an iterator C{it} at values of C{split_value}.
+
+    Each instance of C{split_value} is swallowed. The iterator produces
+    subiterators which need to be consumed fully before the next subiterator
+    can be used.
+    """
+
+    def _chunk_iterator(first):
+        v = first
+        while v != split_value:
+            yield v
+            v = it.next()
+
+    while True:
+        yield _chunk_iterator(it.next())
+
+
+def parse_token_stream(stream, soft_delimiter, hard_delimiter):
+    """Parses a stream of tokens and splits it into sentences (using C{soft_delimiter} tokens)
+    and blocks (using C{hard_delimiter} tokens) for use with the L{align_texts} function.
+    """
+    return [
+        [
+            sum(len(token) for token in sentence_it)
+            for sentence_it in split_at(block_it, soft_delimiter)
+        ]
+        for block_it in split_at(stream, hard_delimiter)
+    ]
--- a/backend/venv/Lib/site-packages/nltk/translate/gdfa.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/gdfa.py
@@ -0,0 +1,138 @@
+# Natural Language Toolkit: GDFA word alignment symmetrization
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Authors: Liling Tan
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+from collections import defaultdict
+
+
+def grow_diag_final_and(srclen, trglen, e2f, f2e):
+    """
+    This module symmetrisatizes the source-to-target and target-to-source
+    word alignment output and produces, aka. GDFA algorithm (Koehn, 2005).
+
+    Step 1: Find the intersection of the bidirectional alignment.
+
+    Step 2: Search for additional neighbor alignment points to be added, given
+            these criteria: (i) neighbor alignments points are not in the
+            intersection and (ii) neighbor alignments are in the union.
+
+    Step 3: Add all other alignment points that are not in the intersection, not in
+            the neighboring alignments that met the criteria but in the original
+            forward/backward alignment outputs.
+
+        >>> forw = ('0-0 2-1 9-2 21-3 10-4 7-5 11-6 9-7 12-8 1-9 3-10 '
+        ...         '4-11 17-12 17-13 25-14 13-15 24-16 11-17 28-18')
+        >>> back = ('0-0 1-9 2-9 3-10 4-11 5-12 6-6 7-5 8-6 9-7 10-4 '
+        ...         '11-6 12-8 13-12 15-12 17-13 18-13 19-12 20-13 '
+        ...         '21-3 22-12 23-14 24-17 25-15 26-17 27-18 28-18')
+        >>> srctext = ("この よう な ハロー 白色 わい 星 の Ｌ 関数 "
+        ...            "は Ｌ と 共 に 不連続 に 増加 する こと が "
+        ...            "期待 さ れる こと を 示し た 。")
+        >>> trgtext = ("Therefore , we expect that the luminosity function "
+        ...            "of such halo white dwarfs increases discontinuously "
+        ...            "with the luminosity .")
+        >>> srclen = len(srctext.split())
+        >>> trglen = len(trgtext.split())
+        >>>
+        >>> gdfa = grow_diag_final_and(srclen, trglen, forw, back)
+        >>> gdfa == sorted(set([(28, 18), (6, 6), (24, 17), (2, 1), (15, 12), (13, 12),
+        ...         (2, 9), (3, 10), (26, 17), (25, 15), (8, 6), (9, 7), (20,
+        ...         13), (18, 13), (0, 0), (10, 4), (13, 15), (23, 14), (7, 5),
+        ...         (25, 14), (1, 9), (17, 13), (4, 11), (11, 17), (9, 2), (22,
+        ...         12), (27, 18), (24, 16), (21, 3), (19, 12), (17, 12), (5,
+        ...         12), (11, 6), (12, 8)]))
+        True
+
+    References:
+    Koehn, P., A. Axelrod, A. Birch, C. Callison, M. Osborne, and D. Talbot.
+    2005. Edinburgh System Description for the 2005 IWSLT Speech
+    Translation Evaluation. In MT Eval Workshop.
+
+    :type srclen: int
+    :param srclen: the number of tokens in the source language
+    :type trglen: int
+    :param trglen: the number of tokens in the target language
+    :type e2f: str
+    :param e2f: the forward word alignment outputs from source-to-target
+                language (in pharaoh output format)
+    :type f2e: str
+    :param f2e: the backward word alignment outputs from target-to-source
+                language (in pharaoh output format)
+    :rtype: set(tuple(int))
+    :return: the symmetrized alignment points from the GDFA algorithm
+    """
+
+    # Converts pharaoh text format into list of tuples.
+    e2f = [tuple(map(int, a.split("-"))) for a in e2f.split()]
+    f2e = [tuple(map(int, a.split("-"))) for a in f2e.split()]
+
+    neighbors = [(-1, 0), (0, -1), (1, 0), (0, 1), (-1, -1), (-1, 1), (1, -1), (1, 1)]
+    alignment = set(e2f).intersection(set(f2e))  # Find the intersection.
+    union = set(e2f).union(set(f2e))
+
+    # *aligned* is used to check if neighbors are aligned in grow_diag()
+    aligned = defaultdict(set)
+    for i, j in alignment:
+        aligned["e"].add(i)
+        aligned["f"].add(j)
+
+    def grow_diag():
+        """
+        Search for the neighbor points and them to the intersected alignment
+        points if criteria are met.
+        """
+        prev_len = len(alignment) - 1
+        # iterate until no new points added
+        while prev_len < len(alignment):
+            no_new_points = True
+            # for english word e = 0 ... en
+            for e in range(srclen):
+                # for foreign word f = 0 ... fn
+                for f in range(trglen):
+                    # if ( e aligned with f)
+                    if (e, f) in alignment:
+                        # for each neighboring point (e-new, f-new)
+                        for neighbor in neighbors:
+                            neighbor = tuple(i + j for i, j in zip((e, f), neighbor))
+                            e_new, f_new = neighbor
+                            # if ( ( e-new not aligned and f-new not aligned)
+                            # and (e-new, f-new in union(e2f, f2e) )
+                            if (
+                                e_new not in aligned and f_new not in aligned
+                            ) and neighbor in union:
+                                alignment.add(neighbor)
+                                aligned["e"].add(e_new)
+                                aligned["f"].add(f_new)
+                                prev_len += 1
+                                no_new_points = False
+            # iterate until no new points added
+            if no_new_points:
+                break
+
+    def final_and(a):
+        """
+        Adds remaining points that are not in the intersection, not in the
+        neighboring alignments but in the original *e2f* and *f2e* alignments
+        """
+        # for english word e = 0 ... en
+        for e_new in range(srclen):
+            # for foreign word f = 0 ... fn
+            for f_new in range(trglen):
+                # if ( ( e-new not aligned and f-new not aligned)
+                # and (e-new, f-new in union(e2f, f2e) )
+                if (
+                    e_new not in aligned
+                    and f_new not in aligned
+                    and (e_new, f_new) in union
+                ):
+                    alignment.add((e_new, f_new))
+                    aligned["e"].add(e_new)
+                    aligned["f"].add(f_new)
+
+    grow_diag()
+    final_and(e2f)
+    final_and(f2e)
+    return sorted(alignment)
--- a/backend/venv/Lib/site-packages/nltk/translate/gleu_score.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/gleu_score.py
@@ -0,0 +1,190 @@
+# Natural Language Toolkit: GLEU Score
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Authors:
+# Contributors: Mike Schuster, Michael Wayne Goodman, Liling Tan
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+""" GLEU score implementation. """
+
+from collections import Counter
+
+from nltk.util import everygrams, ngrams
+
+
+def sentence_gleu(references, hypothesis, min_len=1, max_len=4):
+    """
+    Calculates the sentence level GLEU (Google-BLEU) score described in
+
+        Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
+        Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
+        Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
+        Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
+        George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith,
+        Jason Riesa, Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes,
+        Jeffrey Dean. (2016) Google’s Neural Machine Translation System:
+        Bridging the Gap between Human and Machine Translation.
+        eprint arXiv:1609.08144. https://arxiv.org/pdf/1609.08144v2.pdf
+        Retrieved on 27 Oct 2016.
+
+    From Wu et al. (2016):
+        "The BLEU score has some undesirable properties when used for single
+         sentences, as it was designed to be a corpus measure. We therefore
+         use a slightly different score for our RL experiments which we call
+         the 'GLEU score'. For the GLEU score, we record all sub-sequences of
+         1, 2, 3 or 4 tokens in output and target sequence (n-grams). We then
+         compute a recall, which is the ratio of the number of matching n-grams
+         to the number of total n-grams in the target (ground truth) sequence,
+         and a precision, which is the ratio of the number of matching n-grams
+         to the number of total n-grams in the generated output sequence. Then
+         GLEU score is simply the minimum of recall and precision. This GLEU
+         score's range is always between 0 (no matches) and 1 (all match) and
+         it is symmetrical when switching output and target. According to
+         our experiments, GLEU score correlates quite well with the BLEU
+         metric on a corpus level but does not have its drawbacks for our per
+         sentence reward objective."
+
+    Note: The initial implementation only allowed a single reference, but now
+          a list of references is required (which is consistent with
+          bleu_score.sentence_bleu()).
+
+    The infamous "the the the ... " example
+
+        >>> ref = 'the cat is on the mat'.split()
+        >>> hyp = 'the the the the the the the'.split()
+        >>> sentence_gleu([ref], hyp)  # doctest: +ELLIPSIS
+        0.0909...
+
+    An example to evaluate normal machine translation outputs
+
+        >>> ref1 = str('It is a guide to action that ensures that the military '
+        ...            'will forever heed Party commands').split()
+        >>> hyp1 = str('It is a guide to action which ensures that the military '
+        ...            'always obeys the commands of the party').split()
+        >>> hyp2 = str('It is to insure the troops forever hearing the activity '
+        ...            'guidebook that party direct').split()
+        >>> sentence_gleu([ref1], hyp1) # doctest: +ELLIPSIS
+        0.4393...
+        >>> sentence_gleu([ref1], hyp2) # doctest: +ELLIPSIS
+        0.1206...
+
+    :param references: a list of reference sentences
+    :type references: list(list(str))
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    :param min_len: The minimum order of n-gram this function should extract.
+    :type min_len: int
+    :param max_len: The maximum order of n-gram this function should extract.
+    :type max_len: int
+    :return: the sentence level GLEU score.
+    :rtype: float
+    """
+    return corpus_gleu([references], [hypothesis], min_len=min_len, max_len=max_len)
+
+
+def corpus_gleu(list_of_references, hypotheses, min_len=1, max_len=4):
+    """
+    Calculate a single corpus-level GLEU score (aka. system-level GLEU) for all
+    the hypotheses and their respective references.
+
+    Instead of averaging the sentence level GLEU scores (i.e. macro-average
+    precision), Wu et al. (2016) sum up the matching tokens and the max of
+    hypothesis and reference tokens for each sentence, then compute using the
+    aggregate values.
+
+    From Mike Schuster (via email):
+        "For the corpus, we just add up the two statistics n_match and
+         n_all = max(n_all_output, n_all_target) for all sentences, then
+         calculate gleu_score = n_match / n_all, so it is not just a mean of
+         the sentence gleu scores (in our case, longer sentences count more,
+         which I think makes sense as they are more difficult to translate)."
+
+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...         'ensures', 'that', 'the', 'military', 'always',
+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...          'heed', 'Party', 'commands']
+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...          'guarantees', 'the', 'military', 'forces', 'always',
+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...          'of', 'the', 'party']
+
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
+    ...         'interested', 'in', 'world', 'history']
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
+    ...          'because', 'he', 'read', 'the', 'book']
+
+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+    >>> hypotheses = [hyp1, hyp2]
+    >>> corpus_gleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
+    0.5673...
+
+    The example below show that corpus_gleu() is different from averaging
+    sentence_gleu() for hypotheses
+
+    >>> score1 = sentence_gleu([ref1a], hyp1)
+    >>> score2 = sentence_gleu([ref2a], hyp2)
+    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
+    0.6144...
+
+    :param list_of_references: a list of reference sentences, w.r.t. hypotheses
+    :type list_of_references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param min_len: The minimum order of n-gram this function should extract.
+    :type min_len: int
+    :param max_len: The maximum order of n-gram this function should extract.
+    :type max_len: int
+    :return: The corpus-level GLEU score.
+    :rtype: float
+    """
+    # sanity check
+    assert len(list_of_references) == len(
+        hypotheses
+    ), "The number of hypotheses and their reference(s) should be the same"
+
+    # sum matches and max-token-lengths over all sentences
+    corpus_n_match = 0
+    corpus_n_all = 0
+
+    for references, hypothesis in zip(list_of_references, hypotheses):
+        hyp_ngrams = Counter(everygrams(hypothesis, min_len, max_len))
+        tpfp = sum(hyp_ngrams.values())  # True positives + False positives.
+
+        hyp_counts = []
+        for reference in references:
+            ref_ngrams = Counter(everygrams(reference, min_len, max_len))
+            tpfn = sum(ref_ngrams.values())  # True positives + False negatives.
+
+            overlap_ngrams = ref_ngrams & hyp_ngrams
+            tp = sum(overlap_ngrams.values())  # True positives.
+
+            # While GLEU is defined as the minimum of precision and
+            # recall, we can reduce the number of division operations by one by
+            # instead finding the maximum of the denominators for the precision
+            # and recall formulae, since the numerators are the same:
+            #     precision = tp / tpfp
+            #     recall = tp / tpfn
+            #     gleu_score = min(precision, recall) == tp / max(tpfp, tpfn)
+            n_all = max(tpfp, tpfn)
+
+            if n_all > 0:
+                hyp_counts.append((tp, n_all))
+
+        # use the reference yielding the highest score
+        if hyp_counts:
+            n_match, n_all = max(hyp_counts, key=lambda hc: hc[0] / hc[1])
+            corpus_n_match += n_match
+            corpus_n_all += n_all
+
+    # corner case: empty corpus or empty references---don't divide by zero!
+    if corpus_n_all == 0:
+        gleu_score = 0.0
+    else:
+        gleu_score = corpus_n_match / corpus_n_all
+
+    return gleu_score
--- a/backend/venv/Lib/site-packages/nltk/translate/ibm1.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/ibm1.py
@@ -0,0 +1,251 @@
+# Natural Language Toolkit: IBM Model 1
+#
+# Copyright (C) 2001-2013 NLTK Project
+# Author: Chin Yee Lee <c.lee32@student.unimelb.edu.au>
+#         Hengfeng Li <hengfeng12345@gmail.com>
+#         Ruxin Hou <r.hou@student.unimelb.edu.au>
+#         Calvin Tanujaya Lim <c.tanujayalim@gmail.com>
+# Based on earlier version by:
+#         Will Zhang <wilzzha@gmail.com>
+#         Guan Gui <ggui@student.unimelb.edu.au>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Lexical translation model that ignores word order.
+
+In IBM Model 1, word order is ignored for simplicity. As long as the
+word alignments are equivalent, it doesn't matter where the word occurs
+in the source or target sentence. Thus, the following three alignments
+are equally likely::
+
+    Source: je mange du jambon
+    Target: i eat some ham
+    Alignment: (0,0) (1,1) (2,2) (3,3)
+
+    Source: je mange du jambon
+    Target: some ham eat i
+    Alignment: (0,2) (1,3) (2,1) (3,1)
+
+    Source: du jambon je mange
+    Target: eat i some ham
+    Alignment: (0,3) (1,2) (2,0) (3,1)
+
+Note that an alignment is represented here as
+(word_index_in_target, word_index_in_source).
+
+The EM algorithm used in Model 1 is:
+
+:E step: In the training data, count how many times a source language
+         word is translated into a target language word, weighted by
+         the prior probability of the translation.
+
+:M step: Estimate the new probability of translation based on the
+         counts from the Expectation step.
+
+Notations
+---------
+
+:i: Position in the source sentence
+     Valid values are 0 (for NULL), 1, 2, ..., length of source sentence
+:j: Position in the target sentence
+     Valid values are 1, 2, ..., length of target sentence
+:s: A word in the source language
+:t: A word in the target language
+
+References
+----------
+
+Philipp Koehn. 2010. Statistical Machine Translation.
+Cambridge University Press, New York.
+
+Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and
+Robert L. Mercer. 1993. The Mathematics of Statistical Machine
+Translation: Parameter Estimation. Computational Linguistics, 19 (2),
+263-311.
+"""
+
+import warnings
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, Alignment, IBMModel
+from nltk.translate.ibm_model import Counts
+
+
+class IBMModel1(IBMModel):
+    """
+    Lexical translation model that ignores word order
+
+    >>> bitext = []
+    >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus', 'ist', 'ja', 'groß'], ['the', 'house', 'is', 'big']))
+    >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house']))
+    >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book']))
+    >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book']))
+
+    >>> ibm1 = IBMModel1(bitext, 5)
+
+    >>> print(round(ibm1.translation_table['buch']['book'], 3))
+    0.889
+    >>> print(round(ibm1.translation_table['das']['book'], 3))
+    0.062
+    >>> print(round(ibm1.translation_table['buch'][None], 3))
+    0.113
+    >>> print(round(ibm1.translation_table['ja'][None], 3))
+    0.073
+
+    >>> test_sentence = bitext[2]
+    >>> test_sentence.words
+    ['das', 'buch', 'ist', 'ja', 'klein']
+    >>> test_sentence.mots
+    ['the', 'book', 'is', 'small']
+    >>> test_sentence.alignment
+    Alignment([(0, 0), (1, 1), (2, 2), (3, 2), (4, 3)])
+
+    """
+
+    def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None):
+        """
+        Train on ``sentence_aligned_corpus`` and create a lexical
+        translation model.
+
+        Translation direction is from ``AlignedSent.mots`` to
+        ``AlignedSent.words``.
+
+        :param sentence_aligned_corpus: Sentence-aligned parallel corpus
+        :type sentence_aligned_corpus: list(AlignedSent)
+
+        :param iterations: Number of iterations to run training algorithm
+        :type iterations: int
+
+        :param probability_tables: Optional. Use this to pass in custom
+            probability values. If not specified, probabilities will be
+            set to a uniform distribution, or some other sensible value.
+            If specified, the following entry must be present:
+            ``translation_table``.
+            See ``IBMModel`` for the type and purpose of this table.
+        :type probability_tables: dict[str]: object
+        """
+        super().__init__(sentence_aligned_corpus)
+
+        if probability_tables is None:
+            self.set_uniform_probabilities(sentence_aligned_corpus)
+        else:
+            # Set user-defined probabilities
+            self.translation_table = probability_tables["translation_table"]
+
+        for n in range(0, iterations):
+            self.train(sentence_aligned_corpus)
+
+        self.align_all(sentence_aligned_corpus)
+
+    def set_uniform_probabilities(self, sentence_aligned_corpus):
+        initial_prob = 1 / len(self.trg_vocab)
+        if initial_prob < IBMModel.MIN_PROB:
+            warnings.warn(
+                "Target language vocabulary is too large ("
+                + str(len(self.trg_vocab))
+                + " words). "
+                "Results may be less accurate."
+            )
+
+        for t in self.trg_vocab:
+            self.translation_table[t] = defaultdict(lambda: initial_prob)
+
+    def train(self, parallel_corpus):
+        counts = Counts()
+        for aligned_sentence in parallel_corpus:
+            trg_sentence = aligned_sentence.words
+            src_sentence = [None] + aligned_sentence.mots
+
+            # E step (a): Compute normalization factors to weigh counts
+            total_count = self.prob_all_alignments(src_sentence, trg_sentence)
+
+            # E step (b): Collect counts
+            for t in trg_sentence:
+                for s in src_sentence:
+                    count = self.prob_alignment_point(s, t)
+                    normalized_count = count / total_count[t]
+                    counts.t_given_s[t][s] += normalized_count
+                    counts.any_t_given_s[s] += normalized_count
+
+        # M step: Update probabilities with maximum likelihood estimate
+        self.maximize_lexical_translation_probabilities(counts)
+
+    def prob_all_alignments(self, src_sentence, trg_sentence):
+        """
+        Computes the probability of all possible word alignments,
+        expressed as a marginal distribution over target words t
+
+        Each entry in the return value represents the contribution to
+        the total alignment probability by the target word t.
+
+        To obtain probability(alignment | src_sentence, trg_sentence),
+        simply sum the entries in the return value.
+
+        :return: Probability of t for all s in ``src_sentence``
+        :rtype: dict(str): float
+        """
+        alignment_prob_for_t = defaultdict(float)
+        for t in trg_sentence:
+            for s in src_sentence:
+                alignment_prob_for_t[t] += self.prob_alignment_point(s, t)
+        return alignment_prob_for_t
+
+    def prob_alignment_point(self, s, t):
+        """
+        Probability that word ``t`` in the target sentence is aligned to
+        word ``s`` in the source sentence
+        """
+        return self.translation_table[t][s]
+
+    def prob_t_a_given_s(self, alignment_info):
+        """
+        Probability of target sentence and an alignment given the
+        source sentence
+        """
+        prob = 1.0
+
+        for j, i in enumerate(alignment_info.alignment):
+            if j == 0:
+                continue  # skip the dummy zeroeth element
+            trg_word = alignment_info.trg_sentence[j]
+            src_word = alignment_info.src_sentence[i]
+            prob *= self.translation_table[trg_word][src_word]
+
+        return max(prob, IBMModel.MIN_PROB)
+
+    def align_all(self, parallel_corpus):
+        for sentence_pair in parallel_corpus:
+            self.align(sentence_pair)
+
+    def align(self, sentence_pair):
+        """
+        Determines the best word alignment for one sentence pair from
+        the corpus that the model was trained on.
+
+        The best alignment will be set in ``sentence_pair`` when the
+        method returns. In contrast with the internal implementation of
+        IBM models, the word indices in the ``Alignment`` are zero-
+        indexed, not one-indexed.
+
+        :param sentence_pair: A sentence in the source language and its
+            counterpart sentence in the target language
+        :type sentence_pair: AlignedSent
+        """
+        best_alignment = []
+
+        for j, trg_word in enumerate(sentence_pair.words):
+            # Initialize trg_word to align with the NULL token
+            best_prob = max(self.translation_table[trg_word][None], IBMModel.MIN_PROB)
+            best_alignment_point = None
+            for i, src_word in enumerate(sentence_pair.mots):
+                align_prob = self.translation_table[trg_word][src_word]
+                if align_prob >= best_prob:  # prefer newer word in case of tie
+                    best_prob = align_prob
+                    best_alignment_point = i
+
+            best_alignment.append((j, best_alignment_point))
+
+        sentence_pair.alignment = Alignment(best_alignment)
--- a/backend/venv/Lib/site-packages/nltk/translate/ibm2.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/ibm2.py
@@ -0,0 +1,319 @@
+# Natural Language Toolkit: IBM Model 2
+#
+# Copyright (C) 2001-2013 NLTK Project
+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Lexical translation model that considers word order.
+
+IBM Model 2 improves on Model 1 by accounting for word order.
+An alignment probability is introduced, a(i | j,l,m), which predicts
+a source word position, given its aligned target word's position.
+
+The EM algorithm used in Model 2 is:
+
+:E step: In the training data, collect counts, weighted by prior
+         probabilities.
+
+         - (a) count how many times a source language word is translated
+               into a target language word
+         - (b) count how many times a particular position in the source
+               sentence is aligned to a particular position in the target
+               sentence
+
+:M step: Estimate new probabilities based on the counts from the E step
+
+Notations
+---------
+
+:i: Position in the source sentence
+     Valid values are 0 (for NULL), 1, 2, ..., length of source sentence
+:j: Position in the target sentence
+     Valid values are 1, 2, ..., length of target sentence
+:l: Number of words in the source sentence, excluding NULL
+:m: Number of words in the target sentence
+:s: A word in the source language
+:t: A word in the target language
+
+References
+----------
+
+Philipp Koehn. 2010. Statistical Machine Translation.
+Cambridge University Press, New York.
+
+Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and
+Robert L. Mercer. 1993. The Mathematics of Statistical Machine
+Translation: Parameter Estimation. Computational Linguistics, 19 (2),
+263-311.
+"""
+
+import warnings
+from collections import defaultdict
+
+from nltk.translate import AlignedSent, Alignment, IBMModel, IBMModel1
+from nltk.translate.ibm_model import Counts
+
+
+class IBMModel2(IBMModel):
+    """
+    Lexical translation model that considers word order
+
+    >>> bitext = []
+    >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus', 'ist', 'ja', 'groß'], ['the', 'house', 'is', 'big']))
+    >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house']))
+    >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book']))
+    >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book']))
+
+    >>> ibm2 = IBMModel2(bitext, 5)
+
+    >>> print(round(ibm2.translation_table['buch']['book'], 3))
+    1.0
+    >>> print(round(ibm2.translation_table['das']['book'], 3))
+    0.0
+    >>> print(round(ibm2.translation_table['buch'][None], 3))
+    0.0
+    >>> print(round(ibm2.translation_table['ja'][None], 3))
+    0.0
+
+    >>> print(round(ibm2.alignment_table[1][1][2][2], 3))
+    0.939
+    >>> print(round(ibm2.alignment_table[1][2][2][2], 3))
+    0.0
+    >>> print(round(ibm2.alignment_table[2][2][4][5], 3))
+    1.0
+
+    >>> test_sentence = bitext[2]
+    >>> test_sentence.words
+    ['das', 'buch', 'ist', 'ja', 'klein']
+    >>> test_sentence.mots
+    ['the', 'book', 'is', 'small']
+    >>> test_sentence.alignment
+    Alignment([(0, 0), (1, 1), (2, 2), (3, 2), (4, 3)])
+
+    """
+
+    def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None):
+        """
+        Train on ``sentence_aligned_corpus`` and create a lexical
+        translation model and an alignment model.
+
+        Translation direction is from ``AlignedSent.mots`` to
+        ``AlignedSent.words``.
+
+        :param sentence_aligned_corpus: Sentence-aligned parallel corpus
+        :type sentence_aligned_corpus: list(AlignedSent)
+
+        :param iterations: Number of iterations to run training algorithm
+        :type iterations: int
+
+        :param probability_tables: Optional. Use this to pass in custom
+            probability values. If not specified, probabilities will be
+            set to a uniform distribution, or some other sensible value.
+            If specified, all the following entries must be present:
+            ``translation_table``, ``alignment_table``.
+            See ``IBMModel`` for the type and purpose of these tables.
+        :type probability_tables: dict[str]: object
+        """
+        super().__init__(sentence_aligned_corpus)
+
+        if probability_tables is None:
+            # Get translation probabilities from IBM Model 1
+            # Run more iterations of training for Model 1, since it is
+            # faster than Model 2
+            ibm1 = IBMModel1(sentence_aligned_corpus, 2 * iterations)
+            self.translation_table = ibm1.translation_table
+            self.set_uniform_probabilities(sentence_aligned_corpus)
+        else:
+            # Set user-defined probabilities
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
+
+        for n in range(0, iterations):
+            self.train(sentence_aligned_corpus)
+
+        self.align_all(sentence_aligned_corpus)
+
+    def set_uniform_probabilities(self, sentence_aligned_corpus):
+        # a(i | j,l,m) = 1 / (l+1) for all i, j, l, m
+        l_m_combinations = set()
+        for aligned_sentence in sentence_aligned_corpus:
+            l = len(aligned_sentence.mots)
+            m = len(aligned_sentence.words)
+            if (l, m) not in l_m_combinations:
+                l_m_combinations.add((l, m))
+                initial_prob = 1 / (l + 1)
+                if initial_prob < IBMModel.MIN_PROB:
+                    warnings.warn(
+                        "A source sentence is too long ("
+                        + str(l)
+                        + " words). Results may be less accurate."
+                    )
+
+                for i in range(0, l + 1):
+                    for j in range(1, m + 1):
+                        self.alignment_table[i][j][l][m] = initial_prob
+
+    def train(self, parallel_corpus):
+        counts = Model2Counts()
+        for aligned_sentence in parallel_corpus:
+            src_sentence = [None] + aligned_sentence.mots
+            trg_sentence = ["UNUSED"] + aligned_sentence.words  # 1-indexed
+            l = len(aligned_sentence.mots)
+            m = len(aligned_sentence.words)
+
+            # E step (a): Compute normalization factors to weigh counts
+            total_count = self.prob_all_alignments(src_sentence, trg_sentence)
+
+            # E step (b): Collect counts
+            for j in range(1, m + 1):
+                t = trg_sentence[j]
+                for i in range(0, l + 1):
+                    s = src_sentence[i]
+                    count = self.prob_alignment_point(i, j, src_sentence, trg_sentence)
+                    normalized_count = count / total_count[t]
+
+                    counts.update_lexical_translation(normalized_count, s, t)
+                    counts.update_alignment(normalized_count, i, j, l, m)
+
+        # M step: Update probabilities with maximum likelihood estimates
+        self.maximize_lexical_translation_probabilities(counts)
+        self.maximize_alignment_probabilities(counts)
+
+    def maximize_alignment_probabilities(self, counts):
+        MIN_PROB = IBMModel.MIN_PROB
+        for i, j_s in counts.alignment.items():
+            for j, src_sentence_lengths in j_s.items():
+                for l, trg_sentence_lengths in src_sentence_lengths.items():
+                    for m in trg_sentence_lengths:
+                        estimate = (
+                            counts.alignment[i][j][l][m]
+                            / counts.alignment_for_any_i[j][l][m]
+                        )
+                        self.alignment_table[i][j][l][m] = max(estimate, MIN_PROB)
+
+    def prob_all_alignments(self, src_sentence, trg_sentence):
+        """
+        Computes the probability of all possible word alignments,
+        expressed as a marginal distribution over target words t
+
+        Each entry in the return value represents the contribution to
+        the total alignment probability by the target word t.
+
+        To obtain probability(alignment | src_sentence, trg_sentence),
+        simply sum the entries in the return value.
+
+        :return: Probability of t for all s in ``src_sentence``
+        :rtype: dict(str): float
+        """
+        alignment_prob_for_t = defaultdict(float)
+        for j in range(1, len(trg_sentence)):
+            t = trg_sentence[j]
+            for i in range(0, len(src_sentence)):
+                alignment_prob_for_t[t] += self.prob_alignment_point(
+                    i, j, src_sentence, trg_sentence
+                )
+        return alignment_prob_for_t
+
+    def prob_alignment_point(self, i, j, src_sentence, trg_sentence):
+        """
+        Probability that position j in ``trg_sentence`` is aligned to
+        position i in the ``src_sentence``
+        """
+        l = len(src_sentence) - 1
+        m = len(trg_sentence) - 1
+        s = src_sentence[i]
+        t = trg_sentence[j]
+        return self.translation_table[t][s] * self.alignment_table[i][j][l][m]
+
+    def prob_t_a_given_s(self, alignment_info):
+        """
+        Probability of target sentence and an alignment given the
+        source sentence
+        """
+        prob = 1.0
+        l = len(alignment_info.src_sentence) - 1
+        m = len(alignment_info.trg_sentence) - 1
+
+        for j, i in enumerate(alignment_info.alignment):
+            if j == 0:
+                continue  # skip the dummy zeroeth element
+            trg_word = alignment_info.trg_sentence[j]
+            src_word = alignment_info.src_sentence[i]
+            prob *= (
+                self.translation_table[trg_word][src_word]
+                * self.alignment_table[i][j][l][m]
+            )
+
+        return max(prob, IBMModel.MIN_PROB)
+
+    def align_all(self, parallel_corpus):
+        for sentence_pair in parallel_corpus:
+            self.align(sentence_pair)
+
+    def align(self, sentence_pair):
+        """
+        Determines the best word alignment for one sentence pair from
+        the corpus that the model was trained on.
+
+        The best alignment will be set in ``sentence_pair`` when the
+        method returns. In contrast with the internal implementation of
+        IBM models, the word indices in the ``Alignment`` are zero-
+        indexed, not one-indexed.
+
+        :param sentence_pair: A sentence in the source language and its
+            counterpart sentence in the target language
+        :type sentence_pair: AlignedSent
+        """
+        best_alignment = []
+
+        l = len(sentence_pair.mots)
+        m = len(sentence_pair.words)
+
+        for j, trg_word in enumerate(sentence_pair.words):
+            # Initialize trg_word to align with the NULL token
+            best_prob = (
+                self.translation_table[trg_word][None]
+                * self.alignment_table[0][j + 1][l][m]
+            )
+            best_prob = max(best_prob, IBMModel.MIN_PROB)
+            best_alignment_point = None
+            for i, src_word in enumerate(sentence_pair.mots):
+                align_prob = (
+                    self.translation_table[trg_word][src_word]
+                    * self.alignment_table[i + 1][j + 1][l][m]
+                )
+                if align_prob >= best_prob:
+                    best_prob = align_prob
+                    best_alignment_point = i
+
+            best_alignment.append((j, best_alignment_point))
+
+        sentence_pair.alignment = Alignment(best_alignment)
+
+
+class Model2Counts(Counts):
+    """
+    Data object to store counts of various parameters during training.
+    Includes counts for alignment.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.alignment = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        self.alignment_for_any_i = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+
+    def update_lexical_translation(self, count, s, t):
+        self.t_given_s[t][s] += count
+        self.any_t_given_s[s] += count
+
+    def update_alignment(self, count, i, j, l, m):
+        self.alignment[i][j][l][m] += count
+        self.alignment_for_any_i[j][l][m] += count
--- a/backend/venv/Lib/site-packages/nltk/translate/ibm3.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/ibm3.py
@@ -0,0 +1,346 @@
+# Natural Language Toolkit: IBM Model 3
+#
+# Copyright (C) 2001-2013 NLTK Project
+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Translation model that considers how a word can be aligned to
+multiple words in another language.
+
+IBM Model 3 improves on Model 2 by directly modeling the phenomenon
+where a word in one language may be translated into zero or more words
+in another. This is expressed by the fertility probability,
+n(phi | source word).
+
+If a source word translates into more than one word, it is possible to
+generate sentences that have the same alignment in multiple ways. This
+is modeled by a distortion step. The distortion probability, d(j|i,l,m),
+predicts a target word position, given its aligned source word's
+position. The distortion probability replaces the alignment probability
+of Model 2.
+
+The fertility probability is not applicable for NULL. Target words that
+align to NULL are assumed to be distributed uniformly in the target
+sentence. The existence of these words is modeled by p1, the probability
+that a target word produced by a real source word requires another
+target word that is produced by NULL.
+
+The EM algorithm used in Model 3 is:
+
+:E step: In the training data, collect counts, weighted by prior
+         probabilities.
+
+         - (a) count how many times a source language word is translated
+               into a target language word
+         - (b) count how many times a particular position in the target
+               sentence is aligned to a particular position in the source
+               sentence
+         - (c) count how many times a source word is aligned to phi number
+               of target words
+         - (d) count how many times NULL is aligned to a target word
+
+:M step: Estimate new probabilities based on the counts from the E step
+
+Because there are too many possible alignments, only the most probable
+ones are considered. First, the best alignment is determined using prior
+probabilities. Then, a hill climbing approach is used to find other good
+candidates.
+
+Notations
+---------
+
+:i: Position in the source sentence
+     Valid values are 0 (for NULL), 1, 2, ..., length of source sentence
+:j: Position in the target sentence
+     Valid values are 1, 2, ..., length of target sentence
+:l: Number of words in the source sentence, excluding NULL
+:m: Number of words in the target sentence
+:s: A word in the source language
+:t: A word in the target language
+:phi: Fertility, the number of target words produced by a source word
+:p1: Probability that a target word produced by a source word is
+     accompanied by another target word that is aligned to NULL
+:p0: 1 - p1
+
+References
+----------
+
+Philipp Koehn. 2010. Statistical Machine Translation.
+Cambridge University Press, New York.
+
+Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and
+Robert L. Mercer. 1993. The Mathematics of Statistical Machine
+Translation: Parameter Estimation. Computational Linguistics, 19 (2),
+263-311.
+"""
+
+import warnings
+from collections import defaultdict
+from math import factorial
+
+from nltk.translate import AlignedSent, Alignment, IBMModel, IBMModel2
+from nltk.translate.ibm_model import Counts
+
+
+class IBMModel3(IBMModel):
+    """
+    Translation model that considers how a word can be aligned to
+    multiple words in another language
+
+    >>> bitext = []
+    >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus', 'war', 'ja', 'groß'], ['the', 'house', 'was', 'big']))
+    >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['ein', 'haus', 'ist', 'klein'], ['a', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house']))
+    >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book']))
+    >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book']))
+    >>> bitext.append(AlignedSent(['ich', 'fasse', 'das', 'buch', 'zusammen'], ['i', 'summarize', 'the', 'book']))
+    >>> bitext.append(AlignedSent(['fasse', 'zusammen'], ['summarize']))
+
+    >>> ibm3 = IBMModel3(bitext, 5)
+
+    >>> print(round(ibm3.translation_table['buch']['book'], 3))
+    1.0
+    >>> print(round(ibm3.translation_table['das']['book'], 3))
+    0.0
+    >>> print(round(ibm3.translation_table['ja'][None], 3))
+    1.0
+
+    >>> print(round(ibm3.distortion_table[1][1][2][2], 3))
+    1.0
+    >>> print(round(ibm3.distortion_table[1][2][2][2], 3))
+    0.0
+    >>> print(round(ibm3.distortion_table[2][2][4][5], 3))
+    0.75
+
+    >>> print(round(ibm3.fertility_table[2]['summarize'], 3))
+    1.0
+    >>> print(round(ibm3.fertility_table[1]['book'], 3))
+    1.0
+
+    >>> print(round(ibm3.p1, 3))
+    0.054
+
+    >>> test_sentence = bitext[2]
+    >>> test_sentence.words
+    ['das', 'buch', 'ist', 'ja', 'klein']
+    >>> test_sentence.mots
+    ['the', 'book', 'is', 'small']
+    >>> test_sentence.alignment
+    Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)])
+
+    """
+
+    def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None):
+        """
+        Train on ``sentence_aligned_corpus`` and create a lexical
+        translation model, a distortion model, a fertility model, and a
+        model for generating NULL-aligned words.
+
+        Translation direction is from ``AlignedSent.mots`` to
+        ``AlignedSent.words``.
+
+        :param sentence_aligned_corpus: Sentence-aligned parallel corpus
+        :type sentence_aligned_corpus: list(AlignedSent)
+
+        :param iterations: Number of iterations to run training algorithm
+        :type iterations: int
+
+        :param probability_tables: Optional. Use this to pass in custom
+            probability values. If not specified, probabilities will be
+            set to a uniform distribution, or some other sensible value.
+            If specified, all the following entries must be present:
+            ``translation_table``, ``alignment_table``,
+            ``fertility_table``, ``p1``, ``distortion_table``.
+            See ``IBMModel`` for the type and purpose of these tables.
+        :type probability_tables: dict[str]: object
+        """
+        super().__init__(sentence_aligned_corpus)
+        self.reset_probabilities()
+
+        if probability_tables is None:
+            # Get translation and alignment probabilities from IBM Model 2
+            ibm2 = IBMModel2(sentence_aligned_corpus, iterations)
+            self.translation_table = ibm2.translation_table
+            self.alignment_table = ibm2.alignment_table
+            self.set_uniform_probabilities(sentence_aligned_corpus)
+        else:
+            # Set user-defined probabilities
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
+            self.fertility_table = probability_tables["fertility_table"]
+            self.p1 = probability_tables["p1"]
+            self.distortion_table = probability_tables["distortion_table"]
+
+        for n in range(0, iterations):
+            self.train(sentence_aligned_corpus)
+
+    def reset_probabilities(self):
+        super().reset_probabilities()
+        self.distortion_table = defaultdict(
+            lambda: defaultdict(
+                lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+            )
+        )
+        """
+        dict[int][int][int][int]: float. Probability(j | i,l,m).
+        Values accessed as ``distortion_table[j][i][l][m]``.
+        """
+
+    def set_uniform_probabilities(self, sentence_aligned_corpus):
+        # d(j | i,l,m) = 1 / m for all i, j, l, m
+        l_m_combinations = set()
+        for aligned_sentence in sentence_aligned_corpus:
+            l = len(aligned_sentence.mots)
+            m = len(aligned_sentence.words)
+            if (l, m) not in l_m_combinations:
+                l_m_combinations.add((l, m))
+                initial_prob = 1 / m
+                if initial_prob < IBMModel.MIN_PROB:
+                    warnings.warn(
+                        "A target sentence is too long ("
+                        + str(m)
+                        + " words). Results may be less accurate."
+                    )
+                for j in range(1, m + 1):
+                    for i in range(0, l + 1):
+                        self.distortion_table[j][i][l][m] = initial_prob
+
+        # simple initialization, taken from GIZA++
+        self.fertility_table[0] = defaultdict(lambda: 0.2)
+        self.fertility_table[1] = defaultdict(lambda: 0.65)
+        self.fertility_table[2] = defaultdict(lambda: 0.1)
+        self.fertility_table[3] = defaultdict(lambda: 0.04)
+        MAX_FERTILITY = 10
+        initial_fert_prob = 0.01 / (MAX_FERTILITY - 4)
+        for phi in range(4, MAX_FERTILITY):
+            self.fertility_table[phi] = defaultdict(lambda: initial_fert_prob)
+
+        self.p1 = 0.5
+
+    def train(self, parallel_corpus):
+        counts = Model3Counts()
+        for aligned_sentence in parallel_corpus:
+            l = len(aligned_sentence.mots)
+            m = len(aligned_sentence.words)
+
+            # Sample the alignment space
+            sampled_alignments, best_alignment = self.sample(aligned_sentence)
+            # Record the most probable alignment
+            aligned_sentence.alignment = Alignment(
+                best_alignment.zero_indexed_alignment()
+            )
+
+            # E step (a): Compute normalization factors to weigh counts
+            total_count = self.prob_of_alignments(sampled_alignments)
+
+            # E step (b): Collect counts
+            for alignment_info in sampled_alignments:
+                count = self.prob_t_a_given_s(alignment_info)
+                normalized_count = count / total_count
+
+                for j in range(1, m + 1):
+                    counts.update_lexical_translation(
+                        normalized_count, alignment_info, j
+                    )
+                    counts.update_distortion(normalized_count, alignment_info, j, l, m)
+
+                counts.update_null_generation(normalized_count, alignment_info)
+                counts.update_fertility(normalized_count, alignment_info)
+
+        # M step: Update probabilities with maximum likelihood estimates
+        # If any probability is less than MIN_PROB, clamp it to MIN_PROB
+        existing_alignment_table = self.alignment_table
+        self.reset_probabilities()
+        self.alignment_table = existing_alignment_table  # don't retrain
+
+        self.maximize_lexical_translation_probabilities(counts)
+        self.maximize_distortion_probabilities(counts)
+        self.maximize_fertility_probabilities(counts)
+        self.maximize_null_generation_probabilities(counts)
+
+    def maximize_distortion_probabilities(self, counts):
+        MIN_PROB = IBMModel.MIN_PROB
+        for j, i_s in counts.distortion.items():
+            for i, src_sentence_lengths in i_s.items():
+                for l, trg_sentence_lengths in src_sentence_lengths.items():
+                    for m in trg_sentence_lengths:
+                        estimate = (
+                            counts.distortion[j][i][l][m]
+                            / counts.distortion_for_any_j[i][l][m]
+                        )
+                        self.distortion_table[j][i][l][m] = max(estimate, MIN_PROB)
+
+    def prob_t_a_given_s(self, alignment_info):
+        """
+        Probability of target sentence and an alignment given the
+        source sentence
+        """
+        src_sentence = alignment_info.src_sentence
+        trg_sentence = alignment_info.trg_sentence
+        l = len(src_sentence) - 1  # exclude NULL
+        m = len(trg_sentence) - 1
+        p1 = self.p1
+        p0 = 1 - p1
+
+        probability = 1.0
+        MIN_PROB = IBMModel.MIN_PROB
+
+        # Combine NULL insertion probability
+        null_fertility = alignment_info.fertility_of_i(0)
+        probability *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility)
+        if probability < MIN_PROB:
+            return MIN_PROB
+
+        # Compute combination (m - null_fertility) choose null_fertility
+        for i in range(1, null_fertility + 1):
+            probability *= (m - null_fertility - i + 1) / i
+            if probability < MIN_PROB:
+                return MIN_PROB
+
+        # Combine fertility probabilities
+        for i in range(1, l + 1):
+            fertility = alignment_info.fertility_of_i(i)
+            probability *= (
+                factorial(fertility) * self.fertility_table[fertility][src_sentence[i]]
+            )
+            if probability < MIN_PROB:
+                return MIN_PROB
+
+        # Combine lexical and distortion probabilities
+        for j in range(1, m + 1):
+            t = trg_sentence[j]
+            i = alignment_info.alignment[j]
+            s = src_sentence[i]
+
+            probability *= (
+                self.translation_table[t][s] * self.distortion_table[j][i][l][m]
+            )
+            if probability < MIN_PROB:
+                return MIN_PROB
+
+        return probability
+
+
+class Model3Counts(Counts):
+    """
+    Data object to store counts of various parameters during training.
+    Includes counts for distortion.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.distortion = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        self.distortion_for_any_j = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+
+    def update_distortion(self, count, alignment_info, j, l, m):
+        i = alignment_info.alignment[j]
+        self.distortion[j][i][l][m] += count
+        self.distortion_for_any_j[i][l][m] += count
--- a/backend/venv/Lib/site-packages/nltk/translate/ibm4.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/ibm4.py
@@ -0,0 +1,490 @@
+# Natural Language Toolkit: IBM Model 4
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Translation model that reorders output words based on their type and
+distance from other related words in the output sentence.
+
+IBM Model 4 improves the distortion model of Model 3, motivated by the
+observation that certain words tend to be re-ordered in a predictable
+way relative to one another. For example, <adjective><noun> in English
+usually has its order flipped as <noun><adjective> in French.
+
+Model 4 requires words in the source and target vocabularies to be
+categorized into classes. This can be linguistically driven, like parts
+of speech (adjective, nouns, prepositions, etc). Word classes can also
+be obtained by statistical methods. The original IBM Model 4 uses an
+information theoretic approach to group words into 50 classes for each
+vocabulary.
+
+Terminology
+-----------
+
+:Cept:
+    A source word with non-zero fertility i.e. aligned to one or more
+    target words.
+:Tablet:
+    The set of target word(s) aligned to a cept.
+:Head of cept:
+    The first word of the tablet of that cept.
+:Center of cept:
+    The average position of the words in that cept's tablet. If the
+    value is not an integer, the ceiling is taken.
+    For example, for a tablet with words in positions 2, 5, 6 in the
+    target sentence, the center of the corresponding cept is
+    ceil((2 + 5 + 6) / 3) = 5
+:Displacement:
+    For a head word, defined as (position of head word - position of
+    previous cept's center). Can be positive or negative.
+    For a non-head word, defined as (position of non-head word -
+    position of previous word in the same tablet). Always positive,
+    because successive words in a tablet are assumed to appear to the
+    right of the previous word.
+
+In contrast to Model 3 which reorders words in a tablet independently of
+other words, Model 4 distinguishes between three cases.
+
+1. Words generated by NULL are distributed uniformly.
+2. For a head word t, its position is modeled by the probability
+   d_head(displacement | word_class_s(s),word_class_t(t)),
+   where s is the previous cept, and word_class_s and word_class_t maps
+   s and t to a source and target language word class respectively.
+3. For a non-head word t, its position is modeled by the probability
+   d_non_head(displacement | word_class_t(t))
+
+The EM algorithm used in Model 4 is:
+
+:E step: In the training data, collect counts, weighted by prior
+         probabilities.
+
+         - (a) count how many times a source language word is translated
+               into a target language word
+         - (b) for a particular word class, count how many times a head
+               word is located at a particular displacement from the
+               previous cept's center
+         - (c) for a particular word class, count how many times a
+               non-head word is located at a particular displacement from
+               the previous target word
+         - (d) count how many times a source word is aligned to phi number
+               of target words
+         - (e) count how many times NULL is aligned to a target word
+
+:M step: Estimate new probabilities based on the counts from the E step
+
+Like Model 3, there are too many possible alignments to consider. Thus,
+a hill climbing approach is used to sample good candidates.
+
+Notations
+---------
+
+:i: Position in the source sentence
+     Valid values are 0 (for NULL), 1, 2, ..., length of source sentence
+:j: Position in the target sentence
+     Valid values are 1, 2, ..., length of target sentence
+:l: Number of words in the source sentence, excluding NULL
+:m: Number of words in the target sentence
+:s: A word in the source language
+:t: A word in the target language
+:phi: Fertility, the number of target words produced by a source word
+:p1: Probability that a target word produced by a source word is
+     accompanied by another target word that is aligned to NULL
+:p0: 1 - p1
+:dj: Displacement, Δj
+
+References
+----------
+
+Philipp Koehn. 2010. Statistical Machine Translation.
+Cambridge University Press, New York.
+
+Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and
+Robert L. Mercer. 1993. The Mathematics of Statistical Machine
+Translation: Parameter Estimation. Computational Linguistics, 19 (2),
+263-311.
+"""
+
+import warnings
+from collections import defaultdict
+from math import factorial
+
+from nltk.translate import AlignedSent, Alignment, IBMModel, IBMModel3
+from nltk.translate.ibm_model import Counts, longest_target_sentence_length
+
+
+class IBMModel4(IBMModel):
+    """
+    Translation model that reorders output words based on their type and
+    their distance from other related words in the output sentence
+
+    >>> bitext = []
+    >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus', 'war', 'ja', 'groß'], ['the', 'house', 'was', 'big']))
+    >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['ein', 'haus', 'ist', 'klein'], ['a', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house']))
+    >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book']))
+    >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book']))
+    >>> bitext.append(AlignedSent(['ich', 'fasse', 'das', 'buch', 'zusammen'], ['i', 'summarize', 'the', 'book']))
+    >>> bitext.append(AlignedSent(['fasse', 'zusammen'], ['summarize']))
+    >>> src_classes = {'the': 0, 'a': 0, 'small': 1, 'big': 1, 'house': 2, 'book': 2, 'is': 3, 'was': 3, 'i': 4, 'summarize': 5 }
+    >>> trg_classes = {'das': 0, 'ein': 0, 'haus': 1, 'buch': 1, 'klein': 2, 'groß': 2, 'ist': 3, 'war': 3, 'ja': 4, 'ich': 5, 'fasse': 6, 'zusammen': 6 }
+
+    >>> ibm4 = IBMModel4(bitext, 5, src_classes, trg_classes)
+
+    >>> print(round(ibm4.translation_table['buch']['book'], 3))
+    1.0
+    >>> print(round(ibm4.translation_table['das']['book'], 3))
+    0.0
+    >>> print(round(ibm4.translation_table['ja'][None], 3))
+    1.0
+
+    >>> print(round(ibm4.head_distortion_table[1][0][1], 3))
+    1.0
+    >>> print(round(ibm4.head_distortion_table[2][0][1], 3))
+    0.0
+    >>> print(round(ibm4.non_head_distortion_table[3][6], 3))
+    0.5
+
+    >>> print(round(ibm4.fertility_table[2]['summarize'], 3))
+    1.0
+    >>> print(round(ibm4.fertility_table[1]['book'], 3))
+    1.0
+
+    >>> print(round(ibm4.p1, 3))
+    0.033
+
+    >>> test_sentence = bitext[2]
+    >>> test_sentence.words
+    ['das', 'buch', 'ist', 'ja', 'klein']
+    >>> test_sentence.mots
+    ['the', 'book', 'is', 'small']
+    >>> test_sentence.alignment
+    Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)])
+
+    """
+
+    def __init__(
+        self,
+        sentence_aligned_corpus,
+        iterations,
+        source_word_classes,
+        target_word_classes,
+        probability_tables=None,
+    ):
+        """
+        Train on ``sentence_aligned_corpus`` and create a lexical
+        translation model, distortion models, a fertility model, and a
+        model for generating NULL-aligned words.
+
+        Translation direction is from ``AlignedSent.mots`` to
+        ``AlignedSent.words``.
+
+        :param sentence_aligned_corpus: Sentence-aligned parallel corpus
+        :type sentence_aligned_corpus: list(AlignedSent)
+
+        :param iterations: Number of iterations to run training algorithm
+        :type iterations: int
+
+        :param source_word_classes: Lookup table that maps a source word
+            to its word class, the latter represented by an integer id
+        :type source_word_classes: dict[str]: int
+
+        :param target_word_classes: Lookup table that maps a target word
+            to its word class, the latter represented by an integer id
+        :type target_word_classes: dict[str]: int
+
+        :param probability_tables: Optional. Use this to pass in custom
+            probability values. If not specified, probabilities will be
+            set to a uniform distribution, or some other sensible value.
+            If specified, all the following entries must be present:
+            ``translation_table``, ``alignment_table``,
+            ``fertility_table``, ``p1``, ``head_distortion_table``,
+            ``non_head_distortion_table``. See ``IBMModel`` and
+            ``IBMModel4`` for the type and purpose of these tables.
+        :type probability_tables: dict[str]: object
+        """
+        super().__init__(sentence_aligned_corpus)
+        self.reset_probabilities()
+        self.src_classes = source_word_classes
+        self.trg_classes = target_word_classes
+
+        if probability_tables is None:
+            # Get probabilities from IBM model 3
+            ibm3 = IBMModel3(sentence_aligned_corpus, iterations)
+            self.translation_table = ibm3.translation_table
+            self.alignment_table = ibm3.alignment_table
+            self.fertility_table = ibm3.fertility_table
+            self.p1 = ibm3.p1
+            self.set_uniform_probabilities(sentence_aligned_corpus)
+        else:
+            # Set user-defined probabilities
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
+            self.fertility_table = probability_tables["fertility_table"]
+            self.p1 = probability_tables["p1"]
+            self.head_distortion_table = probability_tables["head_distortion_table"]
+            self.non_head_distortion_table = probability_tables[
+                "non_head_distortion_table"
+            ]
+
+        for n in range(0, iterations):
+            self.train(sentence_aligned_corpus)
+
+    def reset_probabilities(self):
+        super().reset_probabilities()
+        self.head_distortion_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+        )
+        """
+        dict[int][int][int]: float. Probability(displacement of head
+        word | word class of previous cept,target word class).
+        Values accessed as ``distortion_table[dj][src_class][trg_class]``.
+        """
+
+        self.non_head_distortion_table = defaultdict(
+            lambda: defaultdict(lambda: self.MIN_PROB)
+        )
+        """
+        dict[int][int]: float. Probability(displacement of non-head
+        word | target word class).
+        Values accessed as ``distortion_table[dj][trg_class]``.
+        """
+
+    def set_uniform_probabilities(self, sentence_aligned_corpus):
+        """
+        Set distortion probabilities uniformly to
+        1 / cardinality of displacement values
+        """
+        max_m = longest_target_sentence_length(sentence_aligned_corpus)
+
+        # The maximum displacement is m-1, when a word is in the last
+        # position m of the target sentence and the previously placed
+        # word is in the first position.
+        # Conversely, the minimum displacement is -(m-1).
+        # Thus, the displacement range is (m-1) - (-(m-1)). Note that
+        # displacement cannot be zero and is not included in the range.
+        if max_m <= 1:
+            initial_prob = IBMModel.MIN_PROB
+        else:
+            initial_prob = 1 / (2 * (max_m - 1))
+        if initial_prob < IBMModel.MIN_PROB:
+            warnings.warn(
+                "A target sentence is too long ("
+                + str(max_m)
+                + " words). Results may be less accurate."
+            )
+
+        for dj in range(1, max_m):
+            self.head_distortion_table[dj] = defaultdict(
+                lambda: defaultdict(lambda: initial_prob)
+            )
+            self.head_distortion_table[-dj] = defaultdict(
+                lambda: defaultdict(lambda: initial_prob)
+            )
+            self.non_head_distortion_table[dj] = defaultdict(lambda: initial_prob)
+            self.non_head_distortion_table[-dj] = defaultdict(lambda: initial_prob)
+
+    def train(self, parallel_corpus):
+        counts = Model4Counts()
+        for aligned_sentence in parallel_corpus:
+            m = len(aligned_sentence.words)
+
+            # Sample the alignment space
+            sampled_alignments, best_alignment = self.sample(aligned_sentence)
+            # Record the most probable alignment
+            aligned_sentence.alignment = Alignment(
+                best_alignment.zero_indexed_alignment()
+            )
+
+            # E step (a): Compute normalization factors to weigh counts
+            total_count = self.prob_of_alignments(sampled_alignments)
+
+            # E step (b): Collect counts
+            for alignment_info in sampled_alignments:
+                count = self.prob_t_a_given_s(alignment_info)
+                normalized_count = count / total_count
+
+                for j in range(1, m + 1):
+                    counts.update_lexical_translation(
+                        normalized_count, alignment_info, j
+                    )
+                    counts.update_distortion(
+                        normalized_count,
+                        alignment_info,
+                        j,
+                        self.src_classes,
+                        self.trg_classes,
+                    )
+
+                counts.update_null_generation(normalized_count, alignment_info)
+                counts.update_fertility(normalized_count, alignment_info)
+
+        # M step: Update probabilities with maximum likelihood estimates
+        # If any probability is less than MIN_PROB, clamp it to MIN_PROB
+        existing_alignment_table = self.alignment_table
+        self.reset_probabilities()
+        self.alignment_table = existing_alignment_table  # don't retrain
+
+        self.maximize_lexical_translation_probabilities(counts)
+        self.maximize_distortion_probabilities(counts)
+        self.maximize_fertility_probabilities(counts)
+        self.maximize_null_generation_probabilities(counts)
+
+    def maximize_distortion_probabilities(self, counts):
+        head_d_table = self.head_distortion_table
+        for dj, src_classes in counts.head_distortion.items():
+            for s_cls, trg_classes in src_classes.items():
+                for t_cls in trg_classes:
+                    estimate = (
+                        counts.head_distortion[dj][s_cls][t_cls]
+                        / counts.head_distortion_for_any_dj[s_cls][t_cls]
+                    )
+                    head_d_table[dj][s_cls][t_cls] = max(estimate, IBMModel.MIN_PROB)
+
+        non_head_d_table = self.non_head_distortion_table
+        for dj, trg_classes in counts.non_head_distortion.items():
+            for t_cls in trg_classes:
+                estimate = (
+                    counts.non_head_distortion[dj][t_cls]
+                    / counts.non_head_distortion_for_any_dj[t_cls]
+                )
+                non_head_d_table[dj][t_cls] = max(estimate, IBMModel.MIN_PROB)
+
+    def prob_t_a_given_s(self, alignment_info):
+        """
+        Probability of target sentence and an alignment given the
+        source sentence
+        """
+        return IBMModel4.model4_prob_t_a_given_s(alignment_info, self)
+
+    @staticmethod  # exposed for Model 5 to use
+    def model4_prob_t_a_given_s(alignment_info, ibm_model):
+        probability = 1.0
+        MIN_PROB = IBMModel.MIN_PROB
+
+        def null_generation_term():
+            # Binomial distribution: B(m - null_fertility, p1)
+            value = 1.0
+            p1 = ibm_model.p1
+            p0 = 1 - p1
+            null_fertility = alignment_info.fertility_of_i(0)
+            m = len(alignment_info.trg_sentence) - 1
+            value *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility)
+            if value < MIN_PROB:
+                return MIN_PROB
+
+            # Combination: (m - null_fertility) choose null_fertility
+            for i in range(1, null_fertility + 1):
+                value *= (m - null_fertility - i + 1) / i
+            return value
+
+        def fertility_term():
+            value = 1.0
+            src_sentence = alignment_info.src_sentence
+            for i in range(1, len(src_sentence)):
+                fertility = alignment_info.fertility_of_i(i)
+                value *= (
+                    factorial(fertility)
+                    * ibm_model.fertility_table[fertility][src_sentence[i]]
+                )
+                if value < MIN_PROB:
+                    return MIN_PROB
+            return value
+
+        def lexical_translation_term(j):
+            t = alignment_info.trg_sentence[j]
+            i = alignment_info.alignment[j]
+            s = alignment_info.src_sentence[i]
+            return ibm_model.translation_table[t][s]
+
+        def distortion_term(j):
+            t = alignment_info.trg_sentence[j]
+            i = alignment_info.alignment[j]
+            if i == 0:
+                # case 1: t is aligned to NULL
+                return 1.0
+            if alignment_info.is_head_word(j):
+                # case 2: t is the first word of a tablet
+                previous_cept = alignment_info.previous_cept(j)
+                src_class = None
+                if previous_cept is not None:
+                    previous_s = alignment_info.src_sentence[previous_cept]
+                    src_class = ibm_model.src_classes[previous_s]
+                trg_class = ibm_model.trg_classes[t]
+                dj = j - alignment_info.center_of_cept(previous_cept)
+                return ibm_model.head_distortion_table[dj][src_class][trg_class]
+
+            # case 3: t is a subsequent word of a tablet
+            previous_position = alignment_info.previous_in_tablet(j)
+            trg_class = ibm_model.trg_classes[t]
+            dj = j - previous_position
+            return ibm_model.non_head_distortion_table[dj][trg_class]
+
+        # end nested functions
+
+        # Abort computation whenever probability falls below MIN_PROB at
+        # any point, since MIN_PROB can be considered as zero
+        probability *= null_generation_term()
+        if probability < MIN_PROB:
+            return MIN_PROB
+
+        probability *= fertility_term()
+        if probability < MIN_PROB:
+            return MIN_PROB
+
+        for j in range(1, len(alignment_info.trg_sentence)):
+            probability *= lexical_translation_term(j)
+            if probability < MIN_PROB:
+                return MIN_PROB
+
+            probability *= distortion_term(j)
+            if probability < MIN_PROB:
+                return MIN_PROB
+
+        return probability
+
+
+class Model4Counts(Counts):
+    """
+    Data object to store counts of various parameters during training.
+    Includes counts for distortion.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.head_distortion = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        self.head_distortion_for_any_dj = defaultdict(lambda: defaultdict(float))
+        self.non_head_distortion = defaultdict(lambda: defaultdict(float))
+        self.non_head_distortion_for_any_dj = defaultdict(float)
+
+    def update_distortion(self, count, alignment_info, j, src_classes, trg_classes):
+        i = alignment_info.alignment[j]
+        t = alignment_info.trg_sentence[j]
+        if i == 0:
+            # case 1: t is aligned to NULL
+            pass
+        elif alignment_info.is_head_word(j):
+            # case 2: t is the first word of a tablet
+            previous_cept = alignment_info.previous_cept(j)
+            if previous_cept is not None:
+                previous_src_word = alignment_info.src_sentence[previous_cept]
+                src_class = src_classes[previous_src_word]
+            else:
+                src_class = None
+            trg_class = trg_classes[t]
+            dj = j - alignment_info.center_of_cept(previous_cept)
+            self.head_distortion[dj][src_class][trg_class] += count
+            self.head_distortion_for_any_dj[src_class][trg_class] += count
+        else:
+            # case 3: t is a subsequent word of a tablet
+            previous_j = alignment_info.previous_in_tablet(j)
+            trg_class = trg_classes[t]
+            dj = j - previous_j
+            self.non_head_distortion[dj][trg_class] += count
+            self.non_head_distortion_for_any_dj[trg_class] += count
--- a/backend/venv/Lib/site-packages/nltk/translate/ibm5.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/ibm5.py
@@ -0,0 +1,661 @@
+# Natural Language Toolkit: IBM Model 5
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Translation model that keeps track of vacant positions in the target
+sentence to decide where to place translated words.
+
+Translation can be viewed as a process where each word in the source
+sentence is stepped through sequentially, generating translated words
+for each source word. The target sentence can be viewed as being made
+up of ``m`` empty slots initially, which gradually fill up as generated
+words are placed in them.
+
+Models 3 and 4 use distortion probabilities to decide how to place
+translated words. For simplicity, these models ignore the history of
+which slots have already been occupied with translated words.
+Consider the placement of the last translated word: there is only one
+empty slot left in the target sentence, so the distortion probability
+should be 1.0 for that position and 0.0 everywhere else. However, the
+distortion probabilities for Models 3 and 4 are set up such that all
+positions are under consideration.
+
+IBM Model 5 fixes this deficiency by accounting for occupied slots
+during translation. It introduces the vacancy function v(j), the number
+of vacancies up to, and including, position j in the target sentence.
+
+Terminology
+-----------
+
+:Maximum vacancy:
+    The number of valid slots that a word can be placed in.
+    This is not necessarily the same as the number of vacant slots.
+    For example, if a tablet contains more than one word, the head word
+    cannot be placed at the last vacant slot because there will be no
+    space for the other words in the tablet. The number of valid slots
+    has to take into account the length of the tablet.
+    Non-head words cannot be placed before the head word, so vacancies
+    to the left of the head word are ignored.
+:Vacancy difference:
+    For a head word: (v(j) - v(center of previous cept))
+    Can be positive or negative.
+    For a non-head word: (v(j) - v(position of previously placed word))
+    Always positive, because successive words in a tablet are assumed to
+    appear to the right of the previous word.
+
+Positioning of target words fall under three cases:
+
+1. Words generated by NULL are distributed uniformly
+2. For a head word t, its position is modeled by the probability
+   v_head(dv | max_v,word_class_t(t))
+3. For a non-head word t, its position is modeled by the probability
+   v_non_head(dv | max_v,word_class_t(t))
+
+dv and max_v are defined differently for head and non-head words.
+
+The EM algorithm used in Model 5 is:
+
+:E step: In the training data, collect counts, weighted by prior
+         probabilities.
+
+         - (a) count how many times a source language word is translated
+               into a target language word
+         - (b) for a particular word class and maximum vacancy, count how
+               many times a head word and the previous cept's center have
+               a particular difference in number of vacancies
+         - (b) for a particular word class and maximum vacancy, count how
+               many times a non-head word and the previous target word
+               have a particular difference in number of vacancies
+         - (d) count how many times a source word is aligned to phi number
+               of target words
+         - (e) count how many times NULL is aligned to a target word
+
+:M step: Estimate new probabilities based on the counts from the E step
+
+Like Model 4, there are too many possible alignments to consider. Thus,
+a hill climbing approach is used to sample good candidates. In addition,
+pruning is used to weed out unlikely alignments based on Model 4 scores.
+
+Notations
+---------
+
+:i: Position in the source sentence
+     Valid values are 0 (for NULL), 1, 2, ..., length of source sentence
+:j: Position in the target sentence
+     Valid values are 1, 2, ..., length of target sentence
+:l: Number of words in the source sentence, excluding NULL
+:m: Number of words in the target sentence
+:s: A word in the source language
+:t: A word in the target language
+:phi: Fertility, the number of target words produced by a source word
+:p1: Probability that a target word produced by a source word is
+     accompanied by another target word that is aligned to NULL
+:p0: 1 - p1
+:max_v: Maximum vacancy
+:dv: Vacancy difference, Δv
+
+The definition of v_head here differs from GIZA++, section 4.7 of
+[Brown et al., 1993], and [Koehn, 2010]. In the latter cases, v_head is
+v_head(v(j) | v(center of previous cept),max_v,word_class(t)).
+
+Here, we follow appendix B of [Brown et al., 1993] and combine v(j) with
+v(center of previous cept) to obtain dv:
+v_head(v(j) - v(center of previous cept) | max_v,word_class(t)).
+
+References
+----------
+
+Philipp Koehn. 2010. Statistical Machine Translation.
+Cambridge University Press, New York.
+
+Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and
+Robert L. Mercer. 1993. The Mathematics of Statistical Machine
+Translation: Parameter Estimation. Computational Linguistics, 19 (2),
+263-311.
+"""
+
+import warnings
+from collections import defaultdict
+from math import factorial
+
+from nltk.translate import AlignedSent, Alignment, IBMModel, IBMModel4
+from nltk.translate.ibm_model import Counts, longest_target_sentence_length
+
+
+class IBMModel5(IBMModel):
+    """
+    Translation model that keeps track of vacant positions in the target
+    sentence to decide where to place translated words
+
+    >>> bitext = []
+    >>> bitext.append(AlignedSent(['klein', 'ist', 'das', 'haus'], ['the', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus', 'war', 'ja', 'groß'], ['the', 'house', 'was', 'big']))
+    >>> bitext.append(AlignedSent(['das', 'buch', 'ist', 'ja', 'klein'], ['the', 'book', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['ein', 'haus', 'ist', 'klein'], ['a', 'house', 'is', 'small']))
+    >>> bitext.append(AlignedSent(['das', 'haus'], ['the', 'house']))
+    >>> bitext.append(AlignedSent(['das', 'buch'], ['the', 'book']))
+    >>> bitext.append(AlignedSent(['ein', 'buch'], ['a', 'book']))
+    >>> bitext.append(AlignedSent(['ich', 'fasse', 'das', 'buch', 'zusammen'], ['i', 'summarize', 'the', 'book']))
+    >>> bitext.append(AlignedSent(['fasse', 'zusammen'], ['summarize']))
+    >>> src_classes = {'the': 0, 'a': 0, 'small': 1, 'big': 1, 'house': 2, 'book': 2, 'is': 3, 'was': 3, 'i': 4, 'summarize': 5 }
+    >>> trg_classes = {'das': 0, 'ein': 0, 'haus': 1, 'buch': 1, 'klein': 2, 'groß': 2, 'ist': 3, 'war': 3, 'ja': 4, 'ich': 5, 'fasse': 6, 'zusammen': 6 }
+
+    >>> ibm5 = IBMModel5(bitext, 5, src_classes, trg_classes)
+
+    >>> print(round(ibm5.head_vacancy_table[1][1][1], 3))
+    1.0
+    >>> print(round(ibm5.head_vacancy_table[2][1][1], 3))
+    0.0
+    >>> print(round(ibm5.non_head_vacancy_table[3][3][6], 3))
+    1.0
+
+    >>> print(round(ibm5.fertility_table[2]['summarize'], 3))
+    1.0
+    >>> print(round(ibm5.fertility_table[1]['book'], 3))
+    1.0
+
+    >>> print(round(ibm5.p1, 3))
+    0.033
+
+    >>> test_sentence = bitext[2]
+    >>> test_sentence.words
+    ['das', 'buch', 'ist', 'ja', 'klein']
+    >>> test_sentence.mots
+    ['the', 'book', 'is', 'small']
+    >>> test_sentence.alignment
+    Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)])
+
+    """
+
+    MIN_SCORE_FACTOR = 0.2
+    """
+    Alignments with scores below this factor are pruned during sampling
+    """
+
+    def __init__(
+        self,
+        sentence_aligned_corpus,
+        iterations,
+        source_word_classes,
+        target_word_classes,
+        probability_tables=None,
+    ):
+        """
+        Train on ``sentence_aligned_corpus`` and create a lexical
+        translation model, vacancy models, a fertility model, and a
+        model for generating NULL-aligned words.
+
+        Translation direction is from ``AlignedSent.mots`` to
+        ``AlignedSent.words``.
+
+        :param sentence_aligned_corpus: Sentence-aligned parallel corpus
+        :type sentence_aligned_corpus: list(AlignedSent)
+
+        :param iterations: Number of iterations to run training algorithm
+        :type iterations: int
+
+        :param source_word_classes: Lookup table that maps a source word
+            to its word class, the latter represented by an integer id
+        :type source_word_classes: dict[str]: int
+
+        :param target_word_classes: Lookup table that maps a target word
+            to its word class, the latter represented by an integer id
+        :type target_word_classes: dict[str]: int
+
+        :param probability_tables: Optional. Use this to pass in custom
+            probability values. If not specified, probabilities will be
+            set to a uniform distribution, or some other sensible value.
+            If specified, all the following entries must be present:
+            ``translation_table``, ``alignment_table``,
+            ``fertility_table``, ``p1``, ``head_distortion_table``,
+            ``non_head_distortion_table``, ``head_vacancy_table``,
+            ``non_head_vacancy_table``. See ``IBMModel``, ``IBMModel4``,
+            and ``IBMModel5`` for the type and purpose of these tables.
+        :type probability_tables: dict[str]: object
+        """
+        super().__init__(sentence_aligned_corpus)
+        self.reset_probabilities()
+        self.src_classes = source_word_classes
+        self.trg_classes = target_word_classes
+
+        if probability_tables is None:
+            # Get probabilities from IBM model 4
+            ibm4 = IBMModel4(
+                sentence_aligned_corpus,
+                iterations,
+                source_word_classes,
+                target_word_classes,
+            )
+            self.translation_table = ibm4.translation_table
+            self.alignment_table = ibm4.alignment_table
+            self.fertility_table = ibm4.fertility_table
+            self.p1 = ibm4.p1
+            self.head_distortion_table = ibm4.head_distortion_table
+            self.non_head_distortion_table = ibm4.non_head_distortion_table
+            self.set_uniform_probabilities(sentence_aligned_corpus)
+        else:
+            # Set user-defined probabilities
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
+            self.fertility_table = probability_tables["fertility_table"]
+            self.p1 = probability_tables["p1"]
+            self.head_distortion_table = probability_tables["head_distortion_table"]
+            self.non_head_distortion_table = probability_tables[
+                "non_head_distortion_table"
+            ]
+            self.head_vacancy_table = probability_tables["head_vacancy_table"]
+            self.non_head_vacancy_table = probability_tables["non_head_vacancy_table"]
+
+        for n in range(0, iterations):
+            self.train(sentence_aligned_corpus)
+
+    def reset_probabilities(self):
+        super().reset_probabilities()
+        self.head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+        )
+        """
+        dict[int][int][int]: float. Probability(vacancy difference |
+        number of remaining valid positions,target word class).
+        Values accessed as ``head_vacancy_table[dv][v_max][trg_class]``.
+        """
+
+        self.non_head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+        )
+        """
+        dict[int][int][int]: float. Probability(vacancy difference |
+        number of remaining valid positions,target word class).
+        Values accessed as ``non_head_vacancy_table[dv][v_max][trg_class]``.
+        """
+
+    def set_uniform_probabilities(self, sentence_aligned_corpus):
+        """
+        Set vacancy probabilities uniformly to
+        1 / cardinality of vacancy difference values
+        """
+        max_m = longest_target_sentence_length(sentence_aligned_corpus)
+
+        # The maximum vacancy difference occurs when a word is placed in
+        # the last available position m of the target sentence and the
+        # previous word position has no vacancies.
+        # The minimum is 1-max_v, when a word is placed in the first
+        # available position and the previous word is placed beyond the
+        # last available position.
+        # Thus, the number of possible vacancy difference values is
+        # (max_v) - (1-max_v) + 1 = 2 * max_v.
+        if max_m > 0 and (1 / (2 * max_m)) < IBMModel.MIN_PROB:
+            warnings.warn(
+                "A target sentence is too long ("
+                + str(max_m)
+                + " words). Results may be less accurate."
+            )
+
+        for max_v in range(1, max_m + 1):
+            for dv in range(1, max_m + 1):
+                initial_prob = 1 / (2 * max_v)
+                self.head_vacancy_table[dv][max_v] = defaultdict(lambda: initial_prob)
+                self.head_vacancy_table[-(dv - 1)][max_v] = defaultdict(
+                    lambda: initial_prob
+                )
+                self.non_head_vacancy_table[dv][max_v] = defaultdict(
+                    lambda: initial_prob
+                )
+                self.non_head_vacancy_table[-(dv - 1)][max_v] = defaultdict(
+                    lambda: initial_prob
+                )
+
+    def train(self, parallel_corpus):
+        counts = Model5Counts()
+        for aligned_sentence in parallel_corpus:
+            l = len(aligned_sentence.mots)
+            m = len(aligned_sentence.words)
+
+            # Sample the alignment space
+            sampled_alignments, best_alignment = self.sample(aligned_sentence)
+            # Record the most probable alignment
+            aligned_sentence.alignment = Alignment(
+                best_alignment.zero_indexed_alignment()
+            )
+
+            # E step (a): Compute normalization factors to weigh counts
+            total_count = self.prob_of_alignments(sampled_alignments)
+
+            # E step (b): Collect counts
+            for alignment_info in sampled_alignments:
+                count = self.prob_t_a_given_s(alignment_info)
+                normalized_count = count / total_count
+
+                for j in range(1, m + 1):
+                    counts.update_lexical_translation(
+                        normalized_count, alignment_info, j
+                    )
+
+                slots = Slots(m)
+                for i in range(1, l + 1):
+                    counts.update_vacancy(
+                        normalized_count, alignment_info, i, self.trg_classes, slots
+                    )
+
+                counts.update_null_generation(normalized_count, alignment_info)
+                counts.update_fertility(normalized_count, alignment_info)
+
+        # M step: Update probabilities with maximum likelihood estimates
+        # If any probability is less than MIN_PROB, clamp it to MIN_PROB
+        existing_alignment_table = self.alignment_table
+        self.reset_probabilities()
+        self.alignment_table = existing_alignment_table  # don't retrain
+
+        self.maximize_lexical_translation_probabilities(counts)
+        self.maximize_vacancy_probabilities(counts)
+        self.maximize_fertility_probabilities(counts)
+        self.maximize_null_generation_probabilities(counts)
+
+    def sample(self, sentence_pair):
+        """
+        Sample the most probable alignments from the entire alignment
+        space according to Model 4
+
+        Note that Model 4 scoring is used instead of Model 5 because the
+        latter is too expensive to compute.
+
+        First, determine the best alignment according to IBM Model 2.
+        With this initial alignment, use hill climbing to determine the
+        best alignment according to a IBM Model 4. Add this
+        alignment and its neighbors to the sample set. Repeat this
+        process with other initial alignments obtained by pegging an
+        alignment point. Finally, prune alignments that have
+        substantially lower Model 4 scores than the best alignment.
+
+        :param sentence_pair: Source and target language sentence pair
+            to generate a sample of alignments from
+        :type sentence_pair: AlignedSent
+
+        :return: A set of best alignments represented by their ``AlignmentInfo``
+            and the best alignment of the set for convenience
+        :rtype: set(AlignmentInfo), AlignmentInfo
+        """
+        sampled_alignments, best_alignment = super().sample(sentence_pair)
+        return self.prune(sampled_alignments), best_alignment
+
+    def prune(self, alignment_infos):
+        """
+        Removes alignments from ``alignment_infos`` that have
+        substantially lower Model 4 scores than the best alignment
+
+        :return: Pruned alignments
+        :rtype: set(AlignmentInfo)
+        """
+        alignments = []
+        best_score = 0
+
+        for alignment_info in alignment_infos:
+            score = IBMModel4.model4_prob_t_a_given_s(alignment_info, self)
+            best_score = max(score, best_score)
+            alignments.append((alignment_info, score))
+
+        threshold = IBMModel5.MIN_SCORE_FACTOR * best_score
+        alignments = [a[0] for a in alignments if a[1] > threshold]
+        return set(alignments)
+
+    def hillclimb(self, alignment_info, j_pegged=None):
+        """
+        Starting from the alignment in ``alignment_info``, look at
+        neighboring alignments iteratively for the best one, according
+        to Model 4
+
+        Note that Model 4 scoring is used instead of Model 5 because the
+        latter is too expensive to compute.
+
+        There is no guarantee that the best alignment in the alignment
+        space will be found, because the algorithm might be stuck in a
+        local maximum.
+
+        :param j_pegged: If specified, the search will be constrained to
+            alignments where ``j_pegged`` remains unchanged
+        :type j_pegged: int
+
+        :return: The best alignment found from hill climbing
+        :rtype: AlignmentInfo
+        """
+        alignment = alignment_info  # alias with shorter name
+        max_probability = IBMModel4.model4_prob_t_a_given_s(alignment, self)
+
+        while True:
+            old_alignment = alignment
+            for neighbor_alignment in self.neighboring(alignment, j_pegged):
+                neighbor_probability = IBMModel4.model4_prob_t_a_given_s(
+                    neighbor_alignment, self
+                )
+
+                if neighbor_probability > max_probability:
+                    alignment = neighbor_alignment
+                    max_probability = neighbor_probability
+
+            if alignment == old_alignment:
+                # Until there are no better alignments
+                break
+
+        alignment.score = max_probability
+        return alignment
+
+    def prob_t_a_given_s(self, alignment_info):
+        """
+        Probability of target sentence and an alignment given the
+        source sentence
+        """
+        probability = 1.0
+        MIN_PROB = IBMModel.MIN_PROB
+        slots = Slots(len(alignment_info.trg_sentence) - 1)
+
+        def null_generation_term():
+            # Binomial distribution: B(m - null_fertility, p1)
+            value = 1.0
+            p1 = self.p1
+            p0 = 1 - p1
+            null_fertility = alignment_info.fertility_of_i(0)
+            m = len(alignment_info.trg_sentence) - 1
+            value *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility)
+            if value < MIN_PROB:
+                return MIN_PROB
+
+            # Combination: (m - null_fertility) choose null_fertility
+            for i in range(1, null_fertility + 1):
+                value *= (m - null_fertility - i + 1) / i
+            return value
+
+        def fertility_term():
+            value = 1.0
+            src_sentence = alignment_info.src_sentence
+            for i in range(1, len(src_sentence)):
+                fertility = alignment_info.fertility_of_i(i)
+                value *= (
+                    factorial(fertility)
+                    * self.fertility_table[fertility][src_sentence[i]]
+                )
+                if value < MIN_PROB:
+                    return MIN_PROB
+            return value
+
+        def lexical_translation_term(j):
+            t = alignment_info.trg_sentence[j]
+            i = alignment_info.alignment[j]
+            s = alignment_info.src_sentence[i]
+            return self.translation_table[t][s]
+
+        def vacancy_term(i):
+            value = 1.0
+            tablet = alignment_info.cepts[i]
+            tablet_length = len(tablet)
+            total_vacancies = slots.vacancies_at(len(slots))
+
+            # case 1: NULL-aligned words
+            if tablet_length == 0:
+                return value
+
+            # case 2: head word
+            j = tablet[0]
+            previous_cept = alignment_info.previous_cept(j)
+            previous_center = alignment_info.center_of_cept(previous_cept)
+            dv = slots.vacancies_at(j) - slots.vacancies_at(previous_center)
+            max_v = total_vacancies - tablet_length + 1
+            trg_class = self.trg_classes[alignment_info.trg_sentence[j]]
+            value *= self.head_vacancy_table[dv][max_v][trg_class]
+            slots.occupy(j)  # mark position as occupied
+            total_vacancies -= 1
+            if value < MIN_PROB:
+                return MIN_PROB
+
+            # case 3: non-head words
+            for k in range(1, tablet_length):
+                previous_position = tablet[k - 1]
+                previous_vacancies = slots.vacancies_at(previous_position)
+                j = tablet[k]
+                dv = slots.vacancies_at(j) - previous_vacancies
+                max_v = total_vacancies - tablet_length + k + 1 - previous_vacancies
+                trg_class = self.trg_classes[alignment_info.trg_sentence[j]]
+                value *= self.non_head_vacancy_table[dv][max_v][trg_class]
+                slots.occupy(j)  # mark position as occupied
+                total_vacancies -= 1
+                if value < MIN_PROB:
+                    return MIN_PROB
+
+            return value
+
+        # end nested functions
+
+        # Abort computation whenever probability falls below MIN_PROB at
+        # any point, since MIN_PROB can be considered as zero
+        probability *= null_generation_term()
+        if probability < MIN_PROB:
+            return MIN_PROB
+
+        probability *= fertility_term()
+        if probability < MIN_PROB:
+            return MIN_PROB
+
+        for j in range(1, len(alignment_info.trg_sentence)):
+            probability *= lexical_translation_term(j)
+            if probability < MIN_PROB:
+                return MIN_PROB
+
+        for i in range(1, len(alignment_info.src_sentence)):
+            probability *= vacancy_term(i)
+            if probability < MIN_PROB:
+                return MIN_PROB
+
+        return probability
+
+    def maximize_vacancy_probabilities(self, counts):
+        MIN_PROB = IBMModel.MIN_PROB
+        head_vacancy_table = self.head_vacancy_table
+        for dv, max_vs in counts.head_vacancy.items():
+            for max_v, trg_classes in max_vs.items():
+                for t_cls in trg_classes:
+                    estimate = (
+                        counts.head_vacancy[dv][max_v][t_cls]
+                        / counts.head_vacancy_for_any_dv[max_v][t_cls]
+                    )
+                    head_vacancy_table[dv][max_v][t_cls] = max(estimate, MIN_PROB)
+
+        non_head_vacancy_table = self.non_head_vacancy_table
+        for dv, max_vs in counts.non_head_vacancy.items():
+            for max_v, trg_classes in max_vs.items():
+                for t_cls in trg_classes:
+                    estimate = (
+                        counts.non_head_vacancy[dv][max_v][t_cls]
+                        / counts.non_head_vacancy_for_any_dv[max_v][t_cls]
+                    )
+                    non_head_vacancy_table[dv][max_v][t_cls] = max(estimate, MIN_PROB)
+
+
+class Model5Counts(Counts):
+    """
+    Data object to store counts of various parameters during training.
+    Includes counts for vacancies.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.head_vacancy = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        self.head_vacancy_for_any_dv = defaultdict(lambda: defaultdict(float))
+        self.non_head_vacancy = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        self.non_head_vacancy_for_any_dv = defaultdict(lambda: defaultdict(float))
+
+    def update_vacancy(self, count, alignment_info, i, trg_classes, slots):
+        """
+        :param count: Value to add to the vacancy counts
+        :param alignment_info: Alignment under consideration
+        :param i: Source word position under consideration
+        :param trg_classes: Target word classes
+        :param slots: Vacancy states of the slots in the target sentence.
+            Output parameter that will be modified as new words are placed
+            in the target sentence.
+        """
+        tablet = alignment_info.cepts[i]
+        tablet_length = len(tablet)
+        total_vacancies = slots.vacancies_at(len(slots))
+
+        # case 1: NULL aligned words
+        if tablet_length == 0:
+            return  # ignore zero fertility words
+
+        # case 2: head word
+        j = tablet[0]
+        previous_cept = alignment_info.previous_cept(j)
+        previous_center = alignment_info.center_of_cept(previous_cept)
+        dv = slots.vacancies_at(j) - slots.vacancies_at(previous_center)
+        max_v = total_vacancies - tablet_length + 1
+        trg_class = trg_classes[alignment_info.trg_sentence[j]]
+        self.head_vacancy[dv][max_v][trg_class] += count
+        self.head_vacancy_for_any_dv[max_v][trg_class] += count
+        slots.occupy(j)  # mark position as occupied
+        total_vacancies -= 1
+
+        # case 3: non-head words
+        for k in range(1, tablet_length):
+            previous_position = tablet[k - 1]
+            previous_vacancies = slots.vacancies_at(previous_position)
+            j = tablet[k]
+            dv = slots.vacancies_at(j) - previous_vacancies
+            max_v = total_vacancies - tablet_length + k + 1 - previous_vacancies
+            trg_class = trg_classes[alignment_info.trg_sentence[j]]
+            self.non_head_vacancy[dv][max_v][trg_class] += count
+            self.non_head_vacancy_for_any_dv[max_v][trg_class] += count
+            slots.occupy(j)  # mark position as occupied
+            total_vacancies -= 1
+
+
+class Slots:
+    """
+    Represents positions in a target sentence. Used to keep track of
+    which slot (position) is occupied.
+    """
+
+    def __init__(self, target_sentence_length):
+        self._slots = [False] * (target_sentence_length + 1)  # 1-indexed
+
+    def occupy(self, position):
+        """
+        :return: Mark slot at ``position`` as occupied
+        """
+        self._slots[position] = True
+
+    def vacancies_at(self, position):
+        """
+        :return: Number of vacant slots up to, and including, ``position``
+        """
+        vacancies = 0
+        for k in range(1, position + 1):
+            if not self._slots[k]:
+                vacancies += 1
+        return vacancies
+
+    def __len__(self):
+        return len(self._slots) - 1  # exclude dummy zeroeth element
--- a/backend/venv/Lib/site-packages/nltk/translate/ibm_model.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/ibm_model.py
@@ -0,0 +1,549 @@
+# Natural Language Toolkit: IBM Model Core
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Common methods and classes for all IBM models. See ``IBMModel1``,
+``IBMModel2``, ``IBMModel3``, ``IBMModel4``, and ``IBMModel5``
+for specific implementations.
+
+The IBM models are a series of generative models that learn lexical
+translation probabilities, p(target language word|source language word),
+given a sentence-aligned parallel corpus.
+
+The models increase in sophistication from model 1 to 5. Typically, the
+output of lower models is used to seed the higher models. All models
+use the Expectation-Maximization (EM) algorithm to learn various
+probability tables.
+
+Words in a sentence are one-indexed. The first word of a sentence has
+position 1, not 0. Index 0 is reserved in the source sentence for the
+NULL token. The concept of position does not apply to NULL, but it is
+indexed at 0 by convention.
+
+Each target word is aligned to exactly one source word or the NULL
+token.
+
+References:
+Philipp Koehn. 2010. Statistical Machine Translation.
+Cambridge University Press, New York.
+
+Peter E Brown, Stephen A. Della Pietra, Vincent J. Della Pietra, and
+Robert L. Mercer. 1993. The Mathematics of Statistical Machine
+Translation: Parameter Estimation. Computational Linguistics, 19 (2),
+263-311.
+"""
+
+from bisect import insort_left
+from collections import defaultdict
+from copy import deepcopy
+from math import ceil
+
+
+def longest_target_sentence_length(sentence_aligned_corpus):
+    """
+    :param sentence_aligned_corpus: Parallel corpus under consideration
+    :type sentence_aligned_corpus: list(AlignedSent)
+    :return: Number of words in the longest target language sentence
+        of ``sentence_aligned_corpus``
+    """
+    max_m = 0
+    for aligned_sentence in sentence_aligned_corpus:
+        m = len(aligned_sentence.words)
+        max_m = max(m, max_m)
+    return max_m
+
+
+class IBMModel:
+    """
+    Abstract base class for all IBM models
+    """
+
+    # Avoid division by zero and precision errors by imposing a minimum
+    # value for probabilities. Note that this approach is theoretically
+    # incorrect, since it may create probabilities that sum to more
+    # than 1. In practice, the contribution of probabilities with MIN_PROB
+    # is tiny enough that the value of MIN_PROB can be treated as zero.
+    MIN_PROB = 1.0e-12  # GIZA++ is more liberal and uses 1.0e-7
+
+    def __init__(self, sentence_aligned_corpus):
+        self.init_vocab(sentence_aligned_corpus)
+        self.reset_probabilities()
+
+    def reset_probabilities(self):
+        self.translation_table = defaultdict(
+            lambda: defaultdict(lambda: IBMModel.MIN_PROB)
+        )
+        """
+        dict[str][str]: float. Probability(target word | source word).
+        Values accessed as ``translation_table[target_word][source_word]``.
+        """
+
+        self.alignment_table = defaultdict(
+            lambda: defaultdict(
+                lambda: defaultdict(lambda: defaultdict(lambda: IBMModel.MIN_PROB))
+            )
+        )
+        """
+        dict[int][int][int][int]: float. Probability(i | j,l,m).
+        Values accessed as ``alignment_table[i][j][l][m]``.
+        Used in model 2 and hill climbing in models 3 and above
+        """
+
+        self.fertility_table = defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+        """
+        dict[int][str]: float. Probability(fertility | source word).
+        Values accessed as ``fertility_table[fertility][source_word]``.
+        Used in model 3 and higher.
+        """
+
+        self.p1 = 0.5
+        """
+        Probability that a generated word requires another target word
+        that is aligned to NULL.
+        Used in model 3 and higher.
+        """
+
+    def set_uniform_probabilities(self, sentence_aligned_corpus):
+        """
+        Initialize probability tables to a uniform distribution
+
+        Derived classes should implement this accordingly.
+        """
+        pass
+
+    def init_vocab(self, sentence_aligned_corpus):
+        src_vocab = set()
+        trg_vocab = set()
+        for aligned_sentence in sentence_aligned_corpus:
+            trg_vocab.update(aligned_sentence.words)
+            src_vocab.update(aligned_sentence.mots)
+        # Add the NULL token
+        src_vocab.add(None)
+
+        self.src_vocab = src_vocab
+        """
+        set(str): All source language words used in training
+        """
+
+        self.trg_vocab = trg_vocab
+        """
+        set(str): All target language words used in training
+        """
+
+    def sample(self, sentence_pair):
+        """
+        Sample the most probable alignments from the entire alignment
+        space
+
+        First, determine the best alignment according to IBM Model 2.
+        With this initial alignment, use hill climbing to determine the
+        best alignment according to a higher IBM Model. Add this
+        alignment and its neighbors to the sample set. Repeat this
+        process with other initial alignments obtained by pegging an
+        alignment point.
+
+        Hill climbing may be stuck in a local maxima, hence the pegging
+        and trying out of different alignments.
+
+        :param sentence_pair: Source and target language sentence pair
+            to generate a sample of alignments from
+        :type sentence_pair: AlignedSent
+
+        :return: A set of best alignments represented by their ``AlignmentInfo``
+            and the best alignment of the set for convenience
+        :rtype: set(AlignmentInfo), AlignmentInfo
+        """
+        sampled_alignments = set()
+        l = len(sentence_pair.mots)
+        m = len(sentence_pair.words)
+
+        # Start from the best model 2 alignment
+        initial_alignment = self.best_model2_alignment(sentence_pair)
+        potential_alignment = self.hillclimb(initial_alignment)
+        sampled_alignments.update(self.neighboring(potential_alignment))
+        best_alignment = potential_alignment
+
+        # Start from other model 2 alignments,
+        # with the constraint that j is aligned (pegged) to i
+        for j in range(1, m + 1):
+            for i in range(0, l + 1):
+                initial_alignment = self.best_model2_alignment(sentence_pair, j, i)
+                potential_alignment = self.hillclimb(initial_alignment, j)
+                neighbors = self.neighboring(potential_alignment, j)
+                sampled_alignments.update(neighbors)
+                if potential_alignment.score > best_alignment.score:
+                    best_alignment = potential_alignment
+
+        return sampled_alignments, best_alignment
+
+    def best_model2_alignment(self, sentence_pair, j_pegged=None, i_pegged=0):
+        """
+        Finds the best alignment according to IBM Model 2
+
+        Used as a starting point for hill climbing in Models 3 and
+        above, because it is easier to compute than the best alignments
+        in higher models
+
+        :param sentence_pair: Source and target language sentence pair
+            to be word-aligned
+        :type sentence_pair: AlignedSent
+
+        :param j_pegged: If specified, the alignment point of j_pegged
+            will be fixed to i_pegged
+        :type j_pegged: int
+
+        :param i_pegged: Alignment point to j_pegged
+        :type i_pegged: int
+        """
+        src_sentence = [None] + sentence_pair.mots
+        trg_sentence = ["UNUSED"] + sentence_pair.words  # 1-indexed
+
+        l = len(src_sentence) - 1  # exclude NULL
+        m = len(trg_sentence) - 1
+
+        alignment = [0] * (m + 1)  # init all alignments to NULL
+        cepts = [[] for i in range(l + 1)]  # init all cepts to empty list
+
+        for j in range(1, m + 1):
+            if j == j_pegged:
+                # use the pegged alignment instead of searching for best one
+                best_i = i_pegged
+            else:
+                best_i = 0
+                max_alignment_prob = IBMModel.MIN_PROB
+                t = trg_sentence[j]
+
+                for i in range(0, l + 1):
+                    s = src_sentence[i]
+                    alignment_prob = (
+                        self.translation_table[t][s] * self.alignment_table[i][j][l][m]
+                    )
+
+                    if alignment_prob >= max_alignment_prob:
+                        max_alignment_prob = alignment_prob
+                        best_i = i
+
+            alignment[j] = best_i
+            cepts[best_i].append(j)
+
+        return AlignmentInfo(
+            tuple(alignment), tuple(src_sentence), tuple(trg_sentence), cepts
+        )
+
+    def hillclimb(self, alignment_info, j_pegged=None):
+        """
+        Starting from the alignment in ``alignment_info``, look at
+        neighboring alignments iteratively for the best one
+
+        There is no guarantee that the best alignment in the alignment
+        space will be found, because the algorithm might be stuck in a
+        local maximum.
+
+        :param j_pegged: If specified, the search will be constrained to
+            alignments where ``j_pegged`` remains unchanged
+        :type j_pegged: int
+
+        :return: The best alignment found from hill climbing
+        :rtype: AlignmentInfo
+        """
+        alignment = alignment_info  # alias with shorter name
+        max_probability = self.prob_t_a_given_s(alignment)
+
+        while True:
+            old_alignment = alignment
+            for neighbor_alignment in self.neighboring(alignment, j_pegged):
+                neighbor_probability = self.prob_t_a_given_s(neighbor_alignment)
+
+                if neighbor_probability > max_probability:
+                    alignment = neighbor_alignment
+                    max_probability = neighbor_probability
+
+            if alignment == old_alignment:
+                # Until there are no better alignments
+                break
+
+        alignment.score = max_probability
+        return alignment
+
+    def neighboring(self, alignment_info, j_pegged=None):
+        """
+        Determine the neighbors of ``alignment_info``, obtained by
+        moving or swapping one alignment point
+
+        :param j_pegged: If specified, neighbors that have a different
+            alignment point from j_pegged will not be considered
+        :type j_pegged: int
+
+        :return: A set neighboring alignments represented by their
+            ``AlignmentInfo``
+        :rtype: set(AlignmentInfo)
+        """
+        neighbors = set()
+
+        l = len(alignment_info.src_sentence) - 1  # exclude NULL
+        m = len(alignment_info.trg_sentence) - 1
+        original_alignment = alignment_info.alignment
+        original_cepts = alignment_info.cepts
+
+        for j in range(1, m + 1):
+            if j != j_pegged:
+                # Add alignments that differ by one alignment point
+                for i in range(0, l + 1):
+                    new_alignment = list(original_alignment)
+                    new_cepts = deepcopy(original_cepts)
+                    old_i = original_alignment[j]
+
+                    # update alignment
+                    new_alignment[j] = i
+
+                    # update cepts
+                    insort_left(new_cepts[i], j)
+                    new_cepts[old_i].remove(j)
+
+                    new_alignment_info = AlignmentInfo(
+                        tuple(new_alignment),
+                        alignment_info.src_sentence,
+                        alignment_info.trg_sentence,
+                        new_cepts,
+                    )
+                    neighbors.add(new_alignment_info)
+
+        for j in range(1, m + 1):
+            if j != j_pegged:
+                # Add alignments that have two alignment points swapped
+                for other_j in range(1, m + 1):
+                    if other_j != j_pegged and other_j != j:
+                        new_alignment = list(original_alignment)
+                        new_cepts = deepcopy(original_cepts)
+                        other_i = original_alignment[other_j]
+                        i = original_alignment[j]
+
+                        # update alignments
+                        new_alignment[j] = other_i
+                        new_alignment[other_j] = i
+
+                        # update cepts
+                        new_cepts[other_i].remove(other_j)
+                        insort_left(new_cepts[other_i], j)
+                        new_cepts[i].remove(j)
+                        insort_left(new_cepts[i], other_j)
+
+                        new_alignment_info = AlignmentInfo(
+                            tuple(new_alignment),
+                            alignment_info.src_sentence,
+                            alignment_info.trg_sentence,
+                            new_cepts,
+                        )
+                        neighbors.add(new_alignment_info)
+
+        return neighbors
+
+    def maximize_lexical_translation_probabilities(self, counts):
+        for t, src_words in counts.t_given_s.items():
+            for s in src_words:
+                estimate = counts.t_given_s[t][s] / counts.any_t_given_s[s]
+                self.translation_table[t][s] = max(estimate, IBMModel.MIN_PROB)
+
+    def maximize_fertility_probabilities(self, counts):
+        for phi, src_words in counts.fertility.items():
+            for s in src_words:
+                estimate = counts.fertility[phi][s] / counts.fertility_for_any_phi[s]
+                self.fertility_table[phi][s] = max(estimate, IBMModel.MIN_PROB)
+
+    def maximize_null_generation_probabilities(self, counts):
+        p1_estimate = counts.p1 / (counts.p1 + counts.p0)
+        p1_estimate = max(p1_estimate, IBMModel.MIN_PROB)
+        # Clip p1 if it is too large, because p0 = 1 - p1 should not be
+        # smaller than MIN_PROB
+        self.p1 = min(p1_estimate, 1 - IBMModel.MIN_PROB)
+
+    def prob_of_alignments(self, alignments):
+        probability = 0
+        for alignment_info in alignments:
+            probability += self.prob_t_a_given_s(alignment_info)
+        return probability
+
+    def prob_t_a_given_s(self, alignment_info):
+        """
+        Probability of target sentence and an alignment given the
+        source sentence
+
+        All required information is assumed to be in ``alignment_info``
+        and self.
+
+        Derived classes should override this method
+        """
+        return 0.0
+
+
+class AlignmentInfo:
+    """
+    Helper data object for training IBM Models 3 and up
+
+    Read-only. For a source sentence and its counterpart in the target
+    language, this class holds information about the sentence pair's
+    alignment, cepts, and fertility.
+
+    Warning: Alignments are one-indexed here, in contrast to
+    nltk.translate.Alignment and AlignedSent, which are zero-indexed
+    This class is not meant to be used outside of IBM models.
+    """
+
+    def __init__(self, alignment, src_sentence, trg_sentence, cepts):
+        if not isinstance(alignment, tuple):
+            raise TypeError(
+                "The alignment must be a tuple because it is used "
+                "to uniquely identify AlignmentInfo objects."
+            )
+
+        self.alignment = alignment
+        """
+        tuple(int): Alignment function. ``alignment[j]`` is the position
+        in the source sentence that is aligned to the position j in the
+        target sentence.
+        """
+
+        self.src_sentence = src_sentence
+        """
+        tuple(str): Source sentence referred to by this object.
+        Should include NULL token (None) in index 0.
+        """
+
+        self.trg_sentence = trg_sentence
+        """
+        tuple(str): Target sentence referred to by this object.
+        Should have a dummy element in index 0 so that the first word
+        starts from index 1.
+        """
+
+        self.cepts = cepts
+        """
+        list(list(int)): The positions of the target words, in
+        ascending order, aligned to a source word position. For example,
+        cepts[4] = (2, 3, 7) means that words in positions 2, 3 and 7
+        of the target sentence are aligned to the word in position 4 of
+        the source sentence
+        """
+
+        self.score = None
+        """
+        float: Optional. Probability of alignment, as defined by the
+        IBM model that assesses this alignment
+        """
+
+    def fertility_of_i(self, i):
+        """
+        Fertility of word in position ``i`` of the source sentence
+        """
+        return len(self.cepts[i])
+
+    def is_head_word(self, j):
+        """
+        :return: Whether the word in position ``j`` of the target
+            sentence is a head word
+        """
+        i = self.alignment[j]
+        return self.cepts[i][0] == j
+
+    def center_of_cept(self, i):
+        """
+        :return: The ceiling of the average positions of the words in
+            the tablet of cept ``i``, or 0 if ``i`` is None
+        """
+        if i is None:
+            return 0
+
+        average_position = sum(self.cepts[i]) / len(self.cepts[i])
+        return int(ceil(average_position))
+
+    def previous_cept(self, j):
+        """
+        :return: The previous cept of ``j``, or None if ``j`` belongs to
+            the first cept
+        """
+        i = self.alignment[j]
+        if i == 0:
+            raise ValueError(
+                "Words aligned to NULL cannot have a previous "
+                "cept because NULL has no position"
+            )
+        previous_cept = i - 1
+        while previous_cept > 0 and self.fertility_of_i(previous_cept) == 0:
+            previous_cept -= 1
+
+        if previous_cept <= 0:
+            previous_cept = None
+        return previous_cept
+
+    def previous_in_tablet(self, j):
+        """
+        :return: The position of the previous word that is in the same
+            tablet as ``j``, or None if ``j`` is the first word of the
+            tablet
+        """
+        i = self.alignment[j]
+        tablet_position = self.cepts[i].index(j)
+        if tablet_position == 0:
+            return None
+        return self.cepts[i][tablet_position - 1]
+
+    def zero_indexed_alignment(self):
+        """
+        :return: Zero-indexed alignment, suitable for use in external
+            ``nltk.translate`` modules like ``nltk.translate.Alignment``
+        :rtype: list(tuple)
+        """
+        zero_indexed_alignment = []
+        for j in range(1, len(self.trg_sentence)):
+            i = self.alignment[j] - 1
+            if i < 0:
+                i = None  # alignment to NULL token
+            zero_indexed_alignment.append((j - 1, i))
+        return zero_indexed_alignment
+
+    def __eq__(self, other):
+        return self.alignment == other.alignment
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        return hash(self.alignment)
+
+
+class Counts:
+    """
+    Data object to store counts of various parameters during training
+    """
+
+    def __init__(self):
+        self.t_given_s = defaultdict(lambda: defaultdict(float))
+        self.any_t_given_s = defaultdict(float)
+        self.p0 = 0.0
+        self.p1 = 0.0
+        self.fertility = defaultdict(lambda: defaultdict(float))
+        self.fertility_for_any_phi = defaultdict(float)
+
+    def update_lexical_translation(self, count, alignment_info, j):
+        i = alignment_info.alignment[j]
+        t = alignment_info.trg_sentence[j]
+        s = alignment_info.src_sentence[i]
+        self.t_given_s[t][s] += count
+        self.any_t_given_s[s] += count
+
+    def update_null_generation(self, count, alignment_info):
+        m = len(alignment_info.trg_sentence) - 1
+        fertility_of_null = alignment_info.fertility_of_i(0)
+        self.p1 += fertility_of_null * count
+        self.p0 += (m - 2 * fertility_of_null) * count
+
+    def update_fertility(self, count, alignment_info):
+        for i in range(0, len(alignment_info.src_sentence)):
+            s = alignment_info.src_sentence[i]
+            phi = alignment_info.fertility_of_i(i)
+            self.fertility[phi][s] += count
+            self.fertility_for_any_phi[s] += count
--- a/backend/venv/Lib/site-packages/nltk/translate/lepor.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/lepor.py
@@ -0,0 +1,332 @@
+# Natural Language Toolkit: LEPOR Score
+#
+# Copyright (C) 2001-2023 NLTK Project
+# Author: Ikram Ul Haq (ulhaqi12)
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""LEPOR score implementation."""
+
+import math
+import re
+import sys
+from typing import Callable, List
+
+import nltk
+
+
+def length_penalty(reference: List[str], hypothesis: List[str]) -> float:
+    """
+    This function calculates the length penalty(LP) for the LEPOR metric, which is defined to embrace the penaltyvfor
+    both longer and shorter hypothesis compared with the reference translations.
+    Refer from Eq (2) on https://aclanthology.org/C12-2044
+
+    :param reference: Reference sentence
+    :type reference: str
+    :param hypothesis: Hypothesis sentence
+    :type hypothesis: str
+
+    :return: Penalty of difference in length in reference and hypothesis sentence.
+    :rtype: float
+    """
+
+    ref_len = len(reference)
+    hyp_len = len(hypothesis)
+
+    if ref_len == hyp_len:
+        return 1
+    elif ref_len < hyp_len:
+        return math.exp(1 - (ref_len / hyp_len))
+    else:  # i.e. r_len > hyp_len
+        return math.exp(1 - (hyp_len / ref_len))
+
+
+def alignment(ref_tokens: List[str], hyp_tokens: List[str]):
+    """
+    This function computes the context-dependent n-gram word alignment tasks that
+    takes into account the surrounding context (neighbouring words) of the potential
+    word to select a better matching pairs between the output and the reference.
+
+    This alignment task is used to compute the ngram positional difference penalty
+    component of the LEPOR score. Generally, the function finds the matching tokens
+    between the reference and hypothesis, then find the indices of longest matching
+    n-grams by checking the left and right unigram window of the matching tokens.
+
+    :param ref_tokens: A list of tokens in reference sentence.
+    :type ref_tokens: List[str]
+    :param hyp_tokens: A list of tokens in hypothesis sentence.
+    :type hyp_tokens: List[str]
+    """
+    alignments = []
+
+    # Store the reference and hypothesis tokens length.
+    hyp_len = len(hyp_tokens)
+    ref_len = len(ref_tokens)
+
+    for hyp_index, hyp_token in enumerate(hyp_tokens):
+        # If no match.
+        if ref_tokens.count(hyp_token) == 0:
+            alignments.append(-1)
+        # If only one match.
+        elif ref_tokens.count(hyp_token) == 1:
+            alignments.append(ref_tokens.index(hyp_token))
+        # Otherwise, compute the multiple possibilities.
+        else:
+            # Keeps an index of where the hypothesis token matches the reference.
+            ref_indexes = [
+                i for i, ref_token in enumerate(ref_tokens) if ref_token == hyp_token
+            ]
+
+            # Iterate through the matched tokens, and check if
+            # the one token to the left/right also matches.
+            is_matched = []
+            for ind, ref_index in enumerate(ref_indexes):
+                # The one to the left token also matches.
+                if (
+                    0 < ref_index - 1 < ref_len
+                    and 0 < hyp_index - 1 < hyp_len
+                    and ref_tokens[ref_index - 1] == hyp_tokens[hyp_index - 1]
+                ):
+                    is_matched[ind] = True
+                # The one to the right token also matches.
+                elif (
+                    0 < ref_index + 1 < ref_len
+                    and 0 < hyp_index + 1 < hyp_len
+                    and ref_tokens[ref_index + 1] == hyp_tokens[hyp_index + 1]
+                ):
+                    is_matched[ind] = True
+                # If the left and right tokens don't match.
+                else:
+                    is_matched[ind] = False
+
+            # Stores the alignments that have matching phrases.
+            # If there's only a single matched alignment.
+            if is_matched.count(True) == 1:
+                alignments.append(ref_indexes[is_matched.index(True)])
+            # If there's multiple matched alignments that have matching
+            # tokens in the left/right window, we shift the index of the
+            # alignment to the right most matching token.
+            elif is_matched.count(True) > 1:
+                min_distance = 0
+                min_index = 0
+                for match, ref_index in zip(is_matched, ref_indexes):
+                    if match:
+                        distance = abs(hyp_index - ref_index)
+                        if distance > min_distance:
+                            min_distance = distance
+                            min_index = ref_index
+                alignments.append(min_index)
+            # If there's no matched alignments,
+            # we still keep indexes of the matching tokens
+            # without explicitly checking for the left/right window.
+            else:
+                min_distance = 0
+                min_index = 0
+                for ref_index in ref_indexes:
+                    distance = abs(hyp_index - ref_index)
+                    if distance > min_distance:
+                        min_distance = distance
+                        min_index = ref_index
+                alignments.append(min_index)
+
+                for ref_index in ref_indexes:
+                    distance = abs(hyp_index - ref_index)
+                    if distance > min_distance:
+                        min_distance = distance
+                        min_index = ref_index
+                alignments.append(min_index)
+
+    # The alignments are one indexed to keep track of the ending slice pointer of the matching ngrams.
+    alignments = [a + 1 for a in alignments if a != -1]
+    return alignments
+
+
+def ngram_positional_penalty(
+    ref_tokens: List[str], hyp_tokens: List[str]
+) -> (float, float):
+    """
+    This function calculates the n-gram position difference penalty (NPosPenal) described in the LEPOR paper.
+    The NPosPenal is an exponential of the length normalized n-gram matches between the reference and the hypothesis.
+
+    :param ref_tokens: A list of words in reference sentence.
+    :type ref_tokens: List[str]
+    :param hyp_tokens: A list of words in hypothesis sentence.
+    :type hyp_tokens: List[str]
+
+    :return: A tuple containing two elements:
+             - NPosPenal: N-gram positional penalty.
+             - match_count: Count of matched n-grams.
+    :rtype: tuple
+    """
+
+    alignments = alignment(ref_tokens, hyp_tokens)
+    match_count = len(alignments)
+
+    # Stores the n-gram position values (difference values) of aligned words
+    # between output and reference sentences,
+    # aka |PD| of eq (4) in https://aclanthology.org/C12-2044
+    pd = []
+    for i, a in enumerate(alignments):
+        pd.append(abs((i + 1) / len(hyp_tokens) - a / len(ref_tokens)))
+
+    npd = sum(pd) / len(hyp_tokens)
+    return math.exp(-npd), match_count
+
+
+def harmonic(
+    match_count: int,
+    reference_length: int,
+    hypothesis_length: int,
+    alpha: float,
+    beta: float,
+) -> float:
+    """
+    Function will calculate the precision and recall of matched words and calculate a final score on wighting
+    using alpha and beta parameters.
+
+    :param match_count: Number of words in hypothesis aligned with reference.
+    :type match_count: int
+    :param reference_length: Length of the reference sentence
+    :type reference_length: int
+    :param hypothesis_length: Length of the hypothesis sentence
+    :type hypothesis_length: int
+    :param alpha: A parameter to set weight fot recall.
+    :type alpha: float
+    :param beta: A parameter to set weight fot precision.
+    :type beta: float
+
+    :return: Harmonic mean.
+    :rtype: float
+    """
+
+    epsilon = sys.float_info.epsilon
+
+    precision = match_count / hypothesis_length
+    recall = match_count / reference_length
+
+    harmonic_score = (alpha + beta) / (
+        (alpha / (recall + epsilon)) + (beta / (precision + epsilon))
+    )
+
+    return harmonic_score
+
+
+def sentence_lepor(
+    references: List[str],
+    hypothesis: str,
+    alpha: float = 1.0,
+    beta: float = 1.0,
+    tokenizer: Callable[[str], List[str]] = None,
+) -> List[float]:
+    """
+    Calculate LEPOR score a sentence from Han, A. L.-F. (2017).
+    LEPOR: An Augmented Machine Translation Evaluation Metric. https://arxiv.org/abs/1703.08748v2
+
+    >>> hypothesis = 'a bird is on a stone.'
+
+    >>> reference1 = 'a bird behind the stone.'
+    >>> reference2 = 'a bird is on the rock.'
+
+    >>> sentence_lepor([reference1, reference2], hypothesis)
+    [0.7824248013113159, 0.7739937377760259]
+
+    :param references: Reference sentences
+    :type references: list(str)
+    :param hypothesis: Hypothesis sentence
+    :type hypothesis: str
+    :param alpha: A parameter to set weight fot recall.
+    :type alpha: float
+    :param beta: A parameter to set weight fot precision.
+    :type beta: float
+    :param tokenizer: A callable tokenizer that will accept a string and returns a list of tokens.
+    :type tokenizer: Callable[[str], List[str]]
+
+    :return: The list of Lepor scores for a hypothesis with all references.
+    :rtype: list(float)
+
+    """
+
+    lepor_scores = list()
+
+    # Tokenize sentences.
+    if tokenizer:
+        hypothesis = tokenizer(hypothesis)
+        for index, reference in enumerate(references):
+            references[index] = tokenizer(reference)
+
+    else:  # If tokenizer is not provided, use the one in NLTK.
+        hypothesis = nltk.word_tokenize(hypothesis)
+        for index, reference in enumerate(references):
+            references[index] = nltk.word_tokenize(reference)
+
+    for reference in references:
+        if len(reference) == 0 or len(hypothesis) == 0:
+            raise ValueError("One of the sentence is empty. Exit.")
+
+        # Calculate the length penalty due to the difference in the length of reference and hypothesis.
+        lp = length_penalty(reference, hypothesis)
+
+        # Calculate the penalty on different positions of same word in translation.
+        npd, match_count = ngram_positional_penalty(reference, hypothesis)
+
+        harmonic_score = harmonic(
+            match_count, len(reference), len(hypothesis), alpha, beta
+        )
+
+        lepor_scores.append(lp * npd * harmonic_score)
+
+    return lepor_scores
+
+
+def corpus_lepor(
+    references: List[List[str]],
+    hypothesis: List[str],
+    alpha: float = 1.0,
+    beta: float = 1.0,
+    tokenizer: Callable[[str], List[str]] = None,
+) -> List[List[float]]:
+    """
+    Calculate LEPOR score for list of sentences from Han, A. L.-F. (2017).
+    LEPOR: An Augmented Machine Translation Evaluation Metric. https://arxiv.org/abs/1703.08748v2
+
+    >>> hypothesis = ['a bird is on a stone.', 'scary crow was not bad.']
+
+    >>> references = [['a bird behind the stone.', 'a bird is on the rock'],
+    ...              ['scary cow was good.', 'scary crow was elegant.']]
+
+    >>> corpus_lepor(references, hypothesis)
+    [[0.7824248013113159, 0.7931427828105261], [0.5639427891892225, 0.7860963170056643]]
+
+
+    :param references: Reference sentences
+    :type references: list(list(str))
+    :param hypothesis: Hypothesis sentences
+    :type hypothesis: list(str)
+    :param alpha: A parameter to set weight fot recall.
+    :type alpha: float
+    :param beta: A parameter to set weight fot precision.
+    :type beta: float
+    :param tokenizer: A callable tokenizer that will accept a string and returns a list of tokens.
+    :type tokenizer: Callable[[str], List[str]]
+
+    :return: The Lepor score. Returns a list for all sentences
+    :rtype: list(list(float))
+
+    """
+
+    if len(references) == 0 or len(hypothesis) == 0:
+        raise ValueError("There is an Empty list. Exit.")
+
+    assert len(references) == len(hypothesis), (
+        "The number of hypothesis and their reference(s) should be the " "same "
+    )
+
+    lepor_scores = list()
+
+    for reference_sen, hypothesis_sen in zip(references, hypothesis):
+        # Calculate Lepor for each sentence separately and append in a list.
+        lepor_scores.append(
+            sentence_lepor(reference_sen, hypothesis_sen, alpha, beta, tokenizer)
+        )
+
+    return lepor_scores
--- a/backend/venv/Lib/site-packages/nltk/translate/meteor_score.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/meteor_score.py
@@ -0,0 +1,409 @@
+# Natural Language Toolkit: Machine Translation
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Uday Krishna <udaykrishna5@gmail.com>
+# Contributor: Tom Aarsen
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+from itertools import chain, product
+from typing import Callable, Iterable, List, Tuple
+
+from nltk.corpus import WordNetCorpusReader, wordnet
+from nltk.stem.api import StemmerI
+from nltk.stem.porter import PorterStemmer
+
+
+def _generate_enums(
+    hypothesis: Iterable[str],
+    reference: Iterable[str],
+    preprocess: Callable[[str], str] = str.lower,
+) -> Tuple[List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    Takes in pre-tokenized inputs for hypothesis and reference and returns
+    enumerated word lists for each of them
+
+    :param hypothesis: pre-tokenized hypothesis
+    :param reference: pre-tokenized reference
+    :preprocess: preprocessing method (default str.lower)
+    :return: enumerated words list
+    """
+    if isinstance(hypothesis, str):
+        raise TypeError(
+            f'"hypothesis" expects pre-tokenized hypothesis (Iterable[str]): {hypothesis}'
+        )
+
+    if isinstance(reference, str):
+        raise TypeError(
+            f'"reference" expects pre-tokenized reference (Iterable[str]): {reference}'
+        )
+
+    enum_hypothesis_list = list(enumerate(map(preprocess, hypothesis)))
+    enum_reference_list = list(enumerate(map(preprocess, reference)))
+    return enum_hypothesis_list, enum_reference_list
+
+
+def exact_match(
+    hypothesis: Iterable[str], reference: Iterable[str]
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    matches exact words in hypothesis and reference
+    and returns a word mapping based on the enumerated
+    word id between hypothesis and reference
+
+    :param hypothesis: pre-tokenized hypothesis
+    :param reference: pre-tokenized reference
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    """
+    enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference)
+    return _match_enums(enum_hypothesis_list, enum_reference_list)
+
+
+def _match_enums(
+    enum_hypothesis_list: List[Tuple[int, str]],
+    enum_reference_list: List[Tuple[int, str]],
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    matches exact words in hypothesis and reference and returns
+    a word mapping between enum_hypothesis_list and enum_reference_list
+    based on the enumerated word id.
+
+    :param enum_hypothesis_list: enumerated hypothesis list
+    :param enum_reference_list: enumerated reference list
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    """
+    word_match = []
+    for i in range(len(enum_hypothesis_list))[::-1]:
+        for j in range(len(enum_reference_list))[::-1]:
+            if enum_hypothesis_list[i][1] == enum_reference_list[j][1]:
+                word_match.append(
+                    (enum_hypothesis_list[i][0], enum_reference_list[j][0])
+                )
+                enum_hypothesis_list.pop(i)
+                enum_reference_list.pop(j)
+                break
+    return word_match, enum_hypothesis_list, enum_reference_list
+
+
+def _enum_stem_match(
+    enum_hypothesis_list: List[Tuple[int, str]],
+    enum_reference_list: List[Tuple[int, str]],
+    stemmer: StemmerI = PorterStemmer(),
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    Stems each word and matches them in hypothesis and reference
+    and returns a word mapping between enum_hypothesis_list and
+    enum_reference_list based on the enumerated word id. The function also
+    returns a enumerated list of unmatched words for hypothesis and reference.
+
+    :param enum_hypothesis_list: enumerated hypothesis list
+    :param enum_reference_list: enumerated reference list
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    """
+    stemmed_enum_hypothesis_list = [
+        (word_pair[0], stemmer.stem(word_pair[1])) for word_pair in enum_hypothesis_list
+    ]
+
+    stemmed_enum_reference_list = [
+        (word_pair[0], stemmer.stem(word_pair[1])) for word_pair in enum_reference_list
+    ]
+
+    return _match_enums(stemmed_enum_hypothesis_list, stemmed_enum_reference_list)
+
+
+def stem_match(
+    hypothesis: Iterable[str],
+    reference: Iterable[str],
+    stemmer: StemmerI = PorterStemmer(),
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    Stems each word and matches them in hypothesis and reference
+    and returns a word mapping between hypothesis and reference
+
+    :param hypothesis: pre-tokenized hypothesis
+    :param reference: pre-tokenized reference
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    """
+    enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference)
+    return _enum_stem_match(enum_hypothesis_list, enum_reference_list, stemmer=stemmer)
+
+
+def _enum_wordnetsyn_match(
+    enum_hypothesis_list: List[Tuple[int, str]],
+    enum_reference_list: List[Tuple[int, str]],
+    wordnet: WordNetCorpusReader = wordnet,
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    Matches each word in reference to a word in hypothesis
+    if any synonym of a hypothesis word is the exact match
+    to the reference word.
+
+    :param enum_hypothesis_list: enumerated hypothesis list
+    :param enum_reference_list: enumerated reference list
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    """
+    word_match = []
+    for i in range(len(enum_hypothesis_list))[::-1]:
+        hypothesis_syns = set(
+            chain.from_iterable(
+                (
+                    lemma.name()
+                    for lemma in synset.lemmas()
+                    if lemma.name().find("_") < 0
+                )
+                for synset in wordnet.synsets(enum_hypothesis_list[i][1])
+            )
+        ).union({enum_hypothesis_list[i][1]})
+        for j in range(len(enum_reference_list))[::-1]:
+            if enum_reference_list[j][1] in hypothesis_syns:
+                word_match.append(
+                    (enum_hypothesis_list[i][0], enum_reference_list[j][0])
+                )
+                enum_hypothesis_list.pop(i)
+                enum_reference_list.pop(j)
+                break
+    return word_match, enum_hypothesis_list, enum_reference_list
+
+
+def wordnetsyn_match(
+    hypothesis: Iterable[str],
+    reference: Iterable[str],
+    wordnet: WordNetCorpusReader = wordnet,
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    Matches each word in reference to a word in hypothesis if any synonym
+    of a hypothesis word is the exact match to the reference word.
+
+    :param hypothesis: pre-tokenized hypothesis
+    :param reference: pre-tokenized reference
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :return: list of mapped tuples
+    """
+    enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference)
+    return _enum_wordnetsyn_match(
+        enum_hypothesis_list, enum_reference_list, wordnet=wordnet
+    )
+
+
+def _enum_align_words(
+    enum_hypothesis_list: List[Tuple[int, str]],
+    enum_reference_list: List[Tuple[int, str]],
+    stemmer: StemmerI = PorterStemmer(),
+    wordnet: WordNetCorpusReader = wordnet,
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    Aligns/matches words in the hypothesis to reference by sequentially
+    applying exact match, stemmed match and wordnet based synonym match.
+    in case there are multiple matches the match which has the least number
+    of crossing is chosen. Takes enumerated list as input instead of
+    string input
+
+    :param enum_hypothesis_list: enumerated hypothesis list
+    :param enum_reference_list: enumerated reference list
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :return: sorted list of matched tuples, unmatched hypothesis list,
+             unmatched reference list
+    """
+    exact_matches, enum_hypothesis_list, enum_reference_list = _match_enums(
+        enum_hypothesis_list, enum_reference_list
+    )
+
+    stem_matches, enum_hypothesis_list, enum_reference_list = _enum_stem_match(
+        enum_hypothesis_list, enum_reference_list, stemmer=stemmer
+    )
+
+    wns_matches, enum_hypothesis_list, enum_reference_list = _enum_wordnetsyn_match(
+        enum_hypothesis_list, enum_reference_list, wordnet=wordnet
+    )
+
+    return (
+        sorted(
+            exact_matches + stem_matches + wns_matches, key=lambda wordpair: wordpair[0]
+        ),
+        enum_hypothesis_list,
+        enum_reference_list,
+    )
+
+
+def align_words(
+    hypothesis: Iterable[str],
+    reference: Iterable[str],
+    stemmer: StemmerI = PorterStemmer(),
+    wordnet: WordNetCorpusReader = wordnet,
+) -> Tuple[List[Tuple[int, int]], List[Tuple[int, str]], List[Tuple[int, str]]]:
+    """
+    Aligns/matches words in the hypothesis to reference by sequentially
+    applying exact match, stemmed match and wordnet based synonym match.
+    In case there are multiple matches the match which has the least number
+    of crossing is chosen.
+
+    :param hypothesis: pre-tokenized hypothesis
+    :param reference: pre-tokenized reference
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :return: sorted list of matched tuples, unmatched hypothesis list, unmatched reference list
+    """
+    enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference)
+    return _enum_align_words(
+        enum_hypothesis_list, enum_reference_list, stemmer=stemmer, wordnet=wordnet
+    )
+
+
+def _count_chunks(matches: List[Tuple[int, int]]) -> int:
+    """
+    Counts the fewest possible number of chunks such that matched unigrams
+    of each chunk are adjacent to each other. This is used to calculate the
+    fragmentation part of the metric.
+
+    :param matches: list containing a mapping of matched words (output of align_words)
+    :return: Number of chunks a sentence is divided into post alignment
+    """
+    i = 0
+    chunks = 1
+    while i < len(matches) - 1:
+        if (matches[i + 1][0] == matches[i][0] + 1) and (
+            matches[i + 1][1] == matches[i][1] + 1
+        ):
+            i += 1
+            continue
+        i += 1
+        chunks += 1
+    return chunks
+
+
+def single_meteor_score(
+    reference: Iterable[str],
+    hypothesis: Iterable[str],
+    preprocess: Callable[[str], str] = str.lower,
+    stemmer: StemmerI = PorterStemmer(),
+    wordnet: WordNetCorpusReader = wordnet,
+    alpha: float = 0.9,
+    beta: float = 3.0,
+    gamma: float = 0.5,
+) -> float:
+    """
+    Calculates METEOR score for single hypothesis and reference as per
+    "Meteor: An Automatic Metric for MT Evaluation with HighLevels of
+    Correlation with Human Judgments" by Alon Lavie and Abhaya Agarwal,
+    in Proceedings of ACL.
+    https://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf
+
+
+    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 'that', 'the', 'military', 'always', 'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures', 'that', 'the', 'military', 'will', 'forever', 'heed', 'Party', 'commands']
+
+
+    >>> round(single_meteor_score(reference1, hypothesis1),4)
+    0.6944
+
+        If there is no words match during the alignment the method returns the
+        score as 0. We can safely  return a zero instead of raising a
+        division by zero error as no match usually implies a bad translation.
+
+    >>> round(single_meteor_score(['this', 'is', 'a', 'cat'], ['non', 'matching', 'hypothesis']),4)
+    0.0
+
+    :param reference: pre-tokenized reference
+    :param hypothesis: pre-tokenized hypothesis
+    :param preprocess: preprocessing function (default str.lower)
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :param alpha: parameter for controlling relative weights of precision and recall.
+    :param beta: parameter for controlling shape of penalty as a
+                 function of as a function of fragmentation.
+    :param gamma: relative weight assigned to fragmentation penalty.
+    :return: The sentence-level METEOR score.
+    """
+    enum_hypothesis, enum_reference = _generate_enums(
+        hypothesis, reference, preprocess=preprocess
+    )
+    translation_length = len(enum_hypothesis)
+    reference_length = len(enum_reference)
+    matches, _, _ = _enum_align_words(
+        enum_hypothesis, enum_reference, stemmer=stemmer, wordnet=wordnet
+    )
+    matches_count = len(matches)
+    try:
+        precision = float(matches_count) / translation_length
+        recall = float(matches_count) / reference_length
+        fmean = (precision * recall) / (alpha * precision + (1 - alpha) * recall)
+        chunk_count = float(_count_chunks(matches))
+        frag_frac = chunk_count / matches_count
+    except ZeroDivisionError:
+        return 0.0
+    penalty = gamma * frag_frac**beta
+    return (1 - penalty) * fmean
+
+
+def meteor_score(
+    references: Iterable[Iterable[str]],
+    hypothesis: Iterable[str],
+    preprocess: Callable[[str], str] = str.lower,
+    stemmer: StemmerI = PorterStemmer(),
+    wordnet: WordNetCorpusReader = wordnet,
+    alpha: float = 0.9,
+    beta: float = 3.0,
+    gamma: float = 0.5,
+) -> float:
+    """
+    Calculates METEOR score for hypothesis with multiple references as
+    described in "Meteor: An Automatic Metric for MT Evaluation with
+    HighLevels of Correlation with Human Judgments" by Alon Lavie and
+    Abhaya Agarwal, in Proceedings of ACL.
+    https://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf
+
+
+    In case of multiple references the best score is chosen. This method
+    iterates over single_meteor_score and picks the best pair among all
+    the references for a given hypothesis
+
+    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 'that', 'the', 'military', 'always', 'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops', 'forever', 'hearing', 'the', 'activity', 'guidebook', 'that', 'party', 'direct']
+
+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures', 'that', 'the', 'military', 'will', 'forever', 'heed', 'Party', 'commands']
+    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which', 'guarantees', 'the', 'military', 'forces', 'always', 'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', 'army', 'always', 'to', 'heed', 'the', 'directions', 'of', 'the', 'party']
+
+    >>> round(meteor_score([reference1, reference2, reference3], hypothesis1),4)
+    0.6944
+
+        If there is no words match during the alignment the method returns the
+        score as 0. We can safely  return a zero instead of raising a
+        division by zero error as no match usually implies a bad translation.
+
+    >>> round(meteor_score([['this', 'is', 'a', 'cat']], ['non', 'matching', 'hypothesis']),4)
+    0.0
+
+    :param references: pre-tokenized reference sentences
+    :param hypothesis: a pre-tokenized hypothesis sentence
+    :param preprocess: preprocessing function (default str.lower)
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :param alpha: parameter for controlling relative weights of precision and recall.
+    :param beta: parameter for controlling shape of penalty as a function
+                 of as a function of fragmentation.
+    :param gamma: relative weight assigned to fragmentation penalty.
+    :return: The sentence-level METEOR score.
+    """
+    return max(
+        single_meteor_score(
+            reference,
+            hypothesis,
+            preprocess=preprocess,
+            stemmer=stemmer,
+            wordnet=wordnet,
+            alpha=alpha,
+            beta=beta,
+            gamma=gamma,
+        )
+        for reference in references
+    )
--- a/backend/venv/Lib/site-packages/nltk/translate/metrics.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/metrics.py
@@ -0,0 +1,41 @@
+# Natural Language Toolkit: Translation metrics
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Will Zhang <wilzzha@gmail.com>
+#         Guan Gui <ggui@student.unimelb.edu.au>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+def alignment_error_rate(reference, hypothesis, possible=None):
+    """
+    Return the Alignment Error Rate (AER) of an alignment
+    with respect to a "gold standard" reference alignment.
+    Return an error rate between 0.0 (perfect alignment) and 1.0 (no
+    alignment).
+
+        >>> from nltk.translate import Alignment
+        >>> ref = Alignment([(0, 0), (1, 1), (2, 2)])
+        >>> test = Alignment([(0, 0), (1, 2), (2, 1)])
+        >>> alignment_error_rate(ref, test) # doctest: +ELLIPSIS
+        0.6666666666666667
+
+    :type reference: Alignment
+    :param reference: A gold standard alignment (sure alignments)
+    :type hypothesis: Alignment
+    :param hypothesis: A hypothesis alignment (aka. candidate alignments)
+    :type possible: Alignment or None
+    :param possible: A gold standard reference of possible alignments
+        (defaults to *reference* if None)
+    :rtype: float or None
+    """
+
+    if possible is None:
+        possible = reference
+    else:
+        assert reference.issubset(possible)  # sanity check
+
+    return 1.0 - (len(hypothesis & reference) + len(hypothesis & possible)) / float(
+        len(hypothesis) + len(reference)
+    )
--- a/backend/venv/Lib/site-packages/nltk/translate/nist_score.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/nist_score.py
@@ -0,0 +1,195 @@
+# Natural Language Toolkit: NIST Score
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Authors:
+# Contributors:
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""NIST score implementation."""
+
+import fractions
+import math
+from collections import Counter
+
+from nltk.util import ngrams
+
+
+def sentence_nist(references, hypothesis, n=5):
+    """
+    Calculate NIST score from
+    George Doddington. 2002. "Automatic evaluation of machine translation quality
+    using n-gram co-occurrence statistics." Proceedings of HLT.
+    Morgan Kaufmann Publishers Inc. https://dl.acm.org/citation.cfm?id=1289189.1289273
+
+    DARPA commissioned NIST to develop an MT evaluation facility based on the BLEU
+    score. The official script used by NIST to compute BLEU and NIST score is
+    mteval-14.pl. The main differences are:
+
+     - BLEU uses geometric mean of the ngram overlaps, NIST uses arithmetic mean.
+     - NIST has a different brevity penalty
+     - NIST score from mteval-14.pl has a self-contained tokenizer
+
+    Note: The mteval-14.pl includes a smoothing function for BLEU score that is NOT
+          used in the NIST score computation.
+
+    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...               'ensures', 'that', 'the', 'military', 'always',
+    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
+    ...               'that', 'party', 'direct']
+
+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...               'heed', 'Party', 'commands']
+
+    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...               'guarantees', 'the', 'military', 'forces', 'always',
+    ...               'being', 'under', 'the', 'command', 'of', 'the',
+    ...               'Party']
+
+    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...               'of', 'the', 'party']
+
+    >>> sentence_nist([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
+    3.3709...
+
+    >>> sentence_nist([reference1, reference2, reference3], hypothesis2) # doctest: +ELLIPSIS
+    1.4619...
+
+    :param references: reference sentences
+    :type references: list(list(str))
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    :param n: highest n-gram order
+    :type n: int
+    """
+    return corpus_nist([references], [hypothesis], n)
+
+
+def corpus_nist(list_of_references, hypotheses, n=5):
+    """
+    Calculate a single corpus-level NIST score (aka. system-level BLEU) for all
+    the hypotheses and their respective references.
+
+    :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param n: highest n-gram order
+    :type n: int
+    """
+    # Before proceeding to compute NIST, perform sanity checks.
+    assert len(list_of_references) == len(
+        hypotheses
+    ), "The number of hypotheses and their reference(s) should be the same"
+
+    # Collect the ngram coounts from the reference sentences.
+    ngram_freq = Counter()
+    total_reference_words = 0
+    for (
+        references
+    ) in list_of_references:  # For each source sent, there's a list of reference sents.
+        for reference in references:
+            # For each order of ngram, count the ngram occurrences.
+            for i in range(1, n + 1):
+                ngram_freq.update(ngrams(reference, i))
+            total_reference_words += len(reference)
+
+    # Compute the information weights based on the reference sentences.
+    # Eqn 2 in Doddington (2002):
+    # Info(w_1 ... w_n) = log_2 [ (# of occurrences of w_1 ... w_n-1) / (# of occurrences of w_1 ... w_n) ]
+    information_weights = {}
+    for _ngram in ngram_freq:  # w_1 ... w_n
+        _mgram = _ngram[:-1]  #  w_1 ... w_n-1
+        # From https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v13a.pl#L546
+        # it's computed as such:
+        #     denominator = ngram_freq[_mgram] if _mgram and _mgram in ngram_freq else denominator = total_reference_words
+        #     information_weights[_ngram] = -1 * math.log(ngram_freq[_ngram]/denominator) / math.log(2)
+        #
+        # Mathematically, it's equivalent to the our implementation:
+        if _mgram and _mgram in ngram_freq:
+            numerator = ngram_freq[_mgram]
+        else:
+            numerator = total_reference_words
+        information_weights[_ngram] = math.log(numerator / ngram_freq[_ngram], 2)
+
+    # Micro-average.
+    nist_precision_numerator_per_ngram = Counter()
+    nist_precision_denominator_per_ngram = Counter()
+    l_ref, l_sys = 0, 0
+    # For each order of ngram.
+    for i in range(1, n + 1):
+        # Iterate through each hypothesis and their corresponding references.
+        for references, hypothesis in zip(list_of_references, hypotheses):
+            hyp_len = len(hypothesis)
+
+            # Find reference with the best NIST score.
+            nist_score_per_ref = []
+            for reference in references:
+                _ref_len = len(reference)
+                # Counter of ngrams in hypothesis.
+                hyp_ngrams = (
+                    Counter(ngrams(hypothesis, i))
+                    if len(hypothesis) >= i
+                    else Counter()
+                )
+                ref_ngrams = (
+                    Counter(ngrams(reference, i)) if len(reference) >= i else Counter()
+                )
+                ngram_overlaps = hyp_ngrams & ref_ngrams
+                # Precision part of the score in Eqn 3
+                _numerator = sum(
+                    information_weights[_ngram] * count
+                    for _ngram, count in ngram_overlaps.items()
+                )
+                _denominator = sum(hyp_ngrams.values())
+                _precision = 0 if _denominator == 0 else _numerator / _denominator
+                nist_score_per_ref.append(
+                    (_precision, _numerator, _denominator, _ref_len)
+                )
+            # Best reference.
+            precision, numerator, denominator, ref_len = max(nist_score_per_ref)
+            nist_precision_numerator_per_ngram[i] += numerator
+            nist_precision_denominator_per_ngram[i] += denominator
+            l_ref += ref_len
+            l_sys += hyp_len
+
+    # Final NIST micro-average mean aggregation.
+    nist_precision = 0
+    for i in nist_precision_numerator_per_ngram:
+        precision = (
+            nist_precision_numerator_per_ngram[i]
+            / nist_precision_denominator_per_ngram[i]
+        )
+        nist_precision += precision
+    # Eqn 3 in Doddington(2002)
+    return nist_precision * nist_length_penalty(l_ref, l_sys)
+
+
+def nist_length_penalty(ref_len, hyp_len):
+    """
+    Calculates the NIST length penalty, from Eq. 3 in Doddington (2002)
+
+        penalty = exp( beta * log( min( len(hyp)/len(ref) , 1.0 )))
+
+    where,
+
+        `beta` is chosen to make the brevity penalty factor = 0.5 when the
+        no. of words in the system output (hyp) is 2/3 of the average
+        no. of words in the reference translation (ref)
+
+    The NIST penalty is different from BLEU's such that it minimize the impact
+    of the score of small variations in the length of a translation.
+    See Fig. 4 in  Doddington (2002)
+    """
+    ratio = hyp_len / ref_len
+    if 0 < ratio < 1:
+        ratio_x, score_x = 1.5, 0.5
+        beta = math.log(score_x) / math.log(ratio_x) ** 2
+        return math.exp(beta * math.log(ratio) ** 2)
+    else:  # ratio <= 0 or ratio >= 1
+        return max(min(ratio, 1.0), 0.0)
--- a/backend/venv/Lib/site-packages/nltk/translate/phrase_based.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/phrase_based.py
@@ -0,0 +1,193 @@
+# Natural Language Toolkit: Phrase Extraction Algorithm
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Authors: Liling Tan, Fredrik Hedman, Petra Barancikova
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+def extract(
+    f_start,
+    f_end,
+    e_start,
+    e_end,
+    alignment,
+    f_aligned,
+    srctext,
+    trgtext,
+    srclen,
+    trglen,
+    max_phrase_length,
+):
+    """
+    This function checks for alignment point consistency and extracts
+    phrases using the chunk of consistent phrases.
+
+    A phrase pair (e, f ) is consistent with an alignment A if and only if:
+
+    (i) No English words in the phrase pair are aligned to words outside it.
+
+           ∀e i ∈ e, (e i , f j ) ∈ A ⇒ f j ∈ f
+
+    (ii) No Foreign words in the phrase pair are aligned to words outside it.
+
+            ∀f j ∈ f , (e i , f j ) ∈ A ⇒ e i ∈ e
+
+    (iii) The phrase pair contains at least one alignment point.
+
+            ∃e i ∈ e  ̄ , f j ∈ f  ̄ s.t. (e i , f j ) ∈ A
+
+    :type f_start: int
+    :param f_start: Starting index of the possible foreign language phrases
+    :type f_end: int
+    :param f_end: End index of the possible foreign language phrases
+    :type e_start: int
+    :param e_start: Starting index of the possible source language phrases
+    :type e_end: int
+    :param e_end: End index of the possible source language phrases
+    :type srctext: list
+    :param srctext: The source language tokens, a list of string.
+    :type trgtext: list
+    :param trgtext: The target language tokens, a list of string.
+    :type srclen: int
+    :param srclen: The number of tokens in the source language tokens.
+    :type trglen: int
+    :param trglen: The number of tokens in the target language tokens.
+    """
+
+    if f_end < 0:  # 0-based indexing.
+        return {}
+    # Check if alignment points are consistent.
+    for e, f in alignment:
+        if (f_start <= f <= f_end) and (e < e_start or e > e_end):
+            return {}
+
+    # Add phrase pairs (incl. additional unaligned f)
+    phrases = set()
+    fs = f_start
+    while True:
+        fe = min(f_end, f_start + max_phrase_length - 1)
+        while True:
+            # add phrase pair ([e_start, e_end], [fs, fe]) to set E
+            # Need to +1 in range  to include the end-point.
+            src_phrase = " ".join(srctext[e_start : e_end + 1])
+            trg_phrase = " ".join(trgtext[fs : fe + 1])
+            # Include more data for later ordering.
+            phrases.add(((e_start, e_end + 1), (fs, fe + 1), src_phrase, trg_phrase))
+            fe += 1
+            if fe in f_aligned or fe >= trglen:
+                break
+        fs -= 1
+        if fs in f_aligned or fs < 0:
+            break
+    return phrases
+
+
+def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0):
+    """
+    Phrase extraction algorithm extracts all consistent phrase pairs from
+    a word-aligned sentence pair.
+
+    The idea is to loop over all possible source language (e) phrases and find
+    the minimal foreign phrase (f) that matches each of them. Matching is done
+    by identifying all alignment points for the source phrase and finding the
+    shortest foreign phrase that includes all the foreign counterparts for the
+    source words.
+
+    In short, a phrase alignment has to
+    (a) contain all alignment points for all covered words
+    (b) contain at least one alignment point
+
+    >>> srctext = "michael assumes that he will stay in the house"
+    >>> trgtext = "michael geht davon aus , dass er im haus bleibt"
+    >>> alignment = [(0,0), (1,1), (1,2), (1,3), (2,5), (3,6), (4,9),
+    ... (5,9), (6,7), (7,7), (8,8)]
+    >>> phrases = phrase_extraction(srctext, trgtext, alignment)
+    >>> for i in sorted(phrases):
+    ...    print(i)
+    ...
+    ((0, 1), (0, 1), 'michael', 'michael')
+    ((0, 2), (0, 4), 'michael assumes', 'michael geht davon aus')
+    ((0, 2), (0, 5), 'michael assumes', 'michael geht davon aus ,')
+    ((0, 3), (0, 6), 'michael assumes that', 'michael geht davon aus , dass')
+    ((0, 4), (0, 7), 'michael assumes that he', 'michael geht davon aus , dass er')
+    ((0, 9), (0, 10), 'michael assumes that he will stay in the house', 'michael geht davon aus , dass er im haus bleibt')
+    ((1, 2), (1, 4), 'assumes', 'geht davon aus')
+    ((1, 2), (1, 5), 'assumes', 'geht davon aus ,')
+    ((1, 3), (1, 6), 'assumes that', 'geht davon aus , dass')
+    ((1, 4), (1, 7), 'assumes that he', 'geht davon aus , dass er')
+    ((1, 9), (1, 10), 'assumes that he will stay in the house', 'geht davon aus , dass er im haus bleibt')
+    ((2, 3), (4, 6), 'that', ', dass')
+    ((2, 3), (5, 6), 'that', 'dass')
+    ((2, 4), (4, 7), 'that he', ', dass er')
+    ((2, 4), (5, 7), 'that he', 'dass er')
+    ((2, 9), (4, 10), 'that he will stay in the house', ', dass er im haus bleibt')
+    ((2, 9), (5, 10), 'that he will stay in the house', 'dass er im haus bleibt')
+    ((3, 4), (6, 7), 'he', 'er')
+    ((3, 9), (6, 10), 'he will stay in the house', 'er im haus bleibt')
+    ((4, 6), (9, 10), 'will stay', 'bleibt')
+    ((4, 9), (7, 10), 'will stay in the house', 'im haus bleibt')
+    ((6, 8), (7, 8), 'in the', 'im')
+    ((6, 9), (7, 9), 'in the house', 'im haus')
+    ((8, 9), (8, 9), 'house', 'haus')
+
+    :type srctext: str
+    :param srctext: The sentence string from the source language.
+    :type trgtext: str
+    :param trgtext: The sentence string from the target language.
+    :type alignment: list(tuple)
+    :param alignment: The word alignment outputs as list of tuples, where
+        the first elements of tuples are the source words' indices and
+        second elements are the target words' indices. This is also the output
+        format of nltk.translate.ibm1
+    :rtype: list(tuple)
+    :return: A list of tuples, each element in a list is a phrase and each
+        phrase is a tuple made up of (i) its source location, (ii) its target
+        location, (iii) the source phrase and (iii) the target phrase. The phrase
+        list of tuples represents all the possible phrases extracted from the
+        word alignments.
+    :type max_phrase_length: int
+    :param max_phrase_length: maximal phrase length, if 0 or not specified
+        it is set to a length of the longer sentence (srctext or trgtext).
+    """
+
+    srctext = srctext.split()  # e
+    trgtext = trgtext.split()  # f
+    srclen = len(srctext)  # len(e)
+    trglen = len(trgtext)  # len(f)
+    # Keeps an index of which source/target words that are aligned.
+    f_aligned = [j for _, j in alignment]
+    max_phrase_length = max_phrase_length or max(srclen, trglen)
+
+    # set of phrase pairs BP
+    bp = set()
+
+    for e_start in range(srclen):
+        max_idx = min(srclen, e_start + max_phrase_length)
+        for e_end in range(e_start, max_idx):
+            # // find the minimally matching foreign phrase
+            # (f start , f end ) = ( length(f), 0 )
+            # f_start ∈ [0, len(f) - 1]; f_end ∈ [0, len(f) - 1]
+            f_start, f_end = trglen - 1, -1  #  0-based indexing
+
+            for e, f in alignment:
+                if e_start <= e <= e_end:
+                    f_start = min(f, f_start)
+                    f_end = max(f, f_end)
+            # add extract (f start , f end , e start , e end ) to set BP
+            phrases = extract(
+                f_start,
+                f_end,
+                e_start,
+                e_end,
+                alignment,
+                f_aligned,
+                srctext,
+                trgtext,
+                srclen,
+                trglen,
+                max_phrase_length,
+            )
+            if phrases:
+                bp.update(phrases)
+    return bp
--- a/backend/venv/Lib/site-packages/nltk/translate/ribes_score.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/ribes_score.py
@@ -0,0 +1,330 @@
+# Natural Language Toolkit: RIBES Score
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Contributors: Katsuhito Sudoh, Liling Tan, Kasramvd, J.F.Sebastian
+#               Mark Byers, ekhumoro, P. Ortiz
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+""" RIBES score implementation """
+
+import math
+from itertools import islice
+
+from nltk.util import choose, ngrams
+
+
+def sentence_ribes(references, hypothesis, alpha=0.25, beta=0.10):
+    """
+    The RIBES (Rank-based Intuitive Bilingual Evaluation Score) from
+    Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh and
+    Hajime Tsukada. 2010. "Automatic Evaluation of Translation Quality for
+    Distant Language Pairs". In Proceedings of EMNLP.
+    https://www.aclweb.org/anthology/D/D10/D10-1092.pdf
+
+    The generic RIBES scores used in shared task, e.g. Workshop for
+    Asian Translation (WAT) uses the following RIBES calculations:
+
+        RIBES = kendall_tau * (alpha**p1) * (beta**bp)
+
+    Please note that this re-implementation differs from the official
+    RIBES implementation and though it emulates the results as describe
+    in the original paper, there are further optimization implemented
+    in the official RIBES script.
+
+    Users are encouraged to use the official RIBES script instead of this
+    implementation when evaluating your machine translation system. Refer
+    to https://www.kecl.ntt.co.jp/icl/lirg/ribes/ for the official script.
+
+    :param references: a list of reference sentences
+    :type references: list(list(str))
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    :param alpha: hyperparameter used as a prior for the unigram precision.
+    :type alpha: float
+    :param beta: hyperparameter used as a prior for the brevity penalty.
+    :type beta: float
+    :return: The best ribes score from one of the references.
+    :rtype: float
+    """
+    best_ribes = -1.0
+    # Calculates RIBES for each reference and returns the best score.
+    for reference in references:
+        # Collects the *worder* from the ranked correlation alignments.
+        worder = word_rank_alignment(reference, hypothesis)
+        nkt = kendall_tau(worder)
+
+        # Calculates the brevity penalty
+        bp = min(1.0, math.exp(1.0 - len(reference) / len(hypothesis)))
+
+        # Calculates the unigram precision, *p1*
+        p1 = len(worder) / len(hypothesis)
+
+        _ribes = nkt * (p1**alpha) * (bp**beta)
+
+        if _ribes > best_ribes:  # Keeps the best score.
+            best_ribes = _ribes
+
+    return best_ribes
+
+
+def corpus_ribes(list_of_references, hypotheses, alpha=0.25, beta=0.10):
+    """
+    This function "calculates RIBES for a system output (hypothesis) with
+    multiple references, and returns "best" score among multi-references and
+    individual scores. The scores are corpus-wise, i.e., averaged by the number
+    of sentences." (c.f. RIBES version 1.03.1 code).
+
+    Different from BLEU's micro-average precision, RIBES calculates the
+    macro-average precision by averaging the best RIBES score for each pair of
+    hypothesis and its corresponding references
+
+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...         'ensures', 'that', 'the', 'military', 'always',
+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...          'heed', 'Party', 'commands']
+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...          'guarantees', 'the', 'military', 'forces', 'always',
+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...          'of', 'the', 'party']
+
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
+    ...         'interested', 'in', 'world', 'history']
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
+    ...          'because', 'he', 'read', 'the', 'book']
+
+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+    >>> hypotheses = [hyp1, hyp2]
+    >>> round(corpus_ribes(list_of_references, hypotheses),4)
+    0.3597
+
+    :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param alpha: hyperparameter used as a prior for the unigram precision.
+    :type alpha: float
+    :param beta: hyperparameter used as a prior for the brevity penalty.
+    :type beta: float
+    :return: The best ribes score from one of the references.
+    :rtype: float
+    """
+    corpus_best_ribes = 0.0
+    # Iterate through each hypothesis and their corresponding references.
+    for references, hypothesis in zip(list_of_references, hypotheses):
+        corpus_best_ribes += sentence_ribes(references, hypothesis, alpha, beta)
+    return corpus_best_ribes / len(hypotheses)
+
+
+def position_of_ngram(ngram, sentence):
+    """
+    This function returns the position of the first instance of the ngram
+    appearing in a sentence.
+
+    Note that one could also use string as follows but the code is a little
+    convoluted with type casting back and forth:
+
+        char_pos = ' '.join(sent)[:' '.join(sent).index(' '.join(ngram))]
+        word_pos = char_pos.count(' ')
+
+    Another way to conceive this is:
+
+        return next(i for i, ng in enumerate(ngrams(sentence, len(ngram)))
+                    if ng == ngram)
+
+    :param ngram: The ngram that needs to be searched
+    :type ngram: tuple
+    :param sentence: The list of tokens to search from.
+    :type sentence: list(str)
+    """
+    # Iterates through the ngrams in sentence.
+    for i, sublist in enumerate(ngrams(sentence, len(ngram))):
+        # Returns the index of the word when ngram matches.
+        if ngram == sublist:
+            return i
+
+
+def word_rank_alignment(reference, hypothesis, character_based=False):
+    """
+    This is the word rank alignment algorithm described in the paper to produce
+    the *worder* list, i.e. a list of word indices of the hypothesis word orders
+    w.r.t. the list of reference words.
+
+    Below is (H0, R0) example from the Isozaki et al. 2010 paper,
+    note the examples are indexed from 1 but the results here are indexed from 0:
+
+        >>> ref = str('he was interested in world history because he '
+        ... 'read the book').split()
+        >>> hyp = str('he read the book because he was interested in world '
+        ... 'history').split()
+        >>> word_rank_alignment(ref, hyp)
+        [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+
+    The (H1, R1) example from the paper, note the 0th index:
+
+        >>> ref = 'John hit Bob yesterday'.split()
+        >>> hyp = 'Bob hit John yesterday'.split()
+        >>> word_rank_alignment(ref, hyp)
+        [2, 1, 0, 3]
+
+    Here is the (H2, R2) example from the paper, note the 0th index here too:
+
+        >>> ref = 'the boy read the book'.split()
+        >>> hyp = 'the book was read by the boy'.split()
+        >>> word_rank_alignment(ref, hyp)
+        [3, 4, 2, 0, 1]
+
+    :param reference: a reference sentence
+    :type reference: list(str)
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    """
+    worder = []
+    hyp_len = len(hypothesis)
+    # Stores a list of possible ngrams from the reference sentence.
+    # This is used for matching context window later in the algorithm.
+    ref_ngrams = []
+    hyp_ngrams = []
+    for n in range(1, len(reference) + 1):
+        for ng in ngrams(reference, n):
+            ref_ngrams.append(ng)
+        for ng in ngrams(hypothesis, n):
+            hyp_ngrams.append(ng)
+    for i, h_word in enumerate(hypothesis):
+        # If word is not in the reference, continue.
+        if h_word not in reference:
+            continue
+        # If we can determine one-to-one word correspondence for unigrams that
+        # only appear once in both the reference and hypothesis.
+        elif hypothesis.count(h_word) == reference.count(h_word) == 1:
+            worder.append(reference.index(h_word))
+        else:
+            max_window_size = max(i, hyp_len - i + 1)
+            for window in range(1, max_window_size):
+                if i + window < hyp_len:  # If searching the right context is possible.
+                    # Retrieve the right context window.
+                    right_context_ngram = tuple(islice(hypothesis, i, i + window + 1))
+                    num_times_in_ref = ref_ngrams.count(right_context_ngram)
+                    num_times_in_hyp = hyp_ngrams.count(right_context_ngram)
+                    # If ngram appears only once in both ref and hyp.
+                    if num_times_in_ref == num_times_in_hyp == 1:
+                        # Find the position of ngram that matched the reference.
+                        pos = position_of_ngram(right_context_ngram, reference)
+                        worder.append(pos)  # Add the positions of the ngram.
+                        break
+                if window <= i:  # If searching the left context is possible.
+                    # Retrieve the left context window.
+                    left_context_ngram = tuple(islice(hypothesis, i - window, i + 1))
+                    num_times_in_ref = ref_ngrams.count(left_context_ngram)
+                    num_times_in_hyp = hyp_ngrams.count(left_context_ngram)
+                    if num_times_in_ref == num_times_in_hyp == 1:
+                        # Find the position of ngram that matched the reference.
+                        pos = position_of_ngram(left_context_ngram, reference)
+                        # Add the positions of the ngram.
+                        worder.append(pos + len(left_context_ngram) - 1)
+                        break
+    return worder
+
+
+def find_increasing_sequences(worder):
+    """
+    Given the *worder* list, this function groups monotonic +1 sequences.
+
+        >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+        >>> list(find_increasing_sequences(worder))
+        [(7, 8, 9, 10), (0, 1, 2, 3, 4, 5)]
+
+    :param worder: The worder list output from word_rank_alignment
+    :param type: list(int)
+    """
+    items = iter(worder)
+    a, b = None, next(items, None)
+    result = [b]
+    while b is not None:
+        a, b = b, next(items, None)
+        if b is not None and a + 1 == b:
+            result.append(b)
+        else:
+            if len(result) > 1:
+                yield tuple(result)
+            result = [b]
+
+
+def kendall_tau(worder, normalize=True):
+    """
+    Calculates the Kendall's Tau correlation coefficient given the *worder*
+    list of word alignments from word_rank_alignment(), using the formula:
+
+        tau = 2 * num_increasing_pairs / num_possible_pairs -1
+
+    Note that the no. of increasing pairs can be discontinuous in the *worder*
+    list and each each increasing sequence can be tabulated as choose(len(seq), 2)
+    no. of increasing pairs, e.g.
+
+        >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+        >>> number_possible_pairs = choose(len(worder), 2)
+        >>> round(kendall_tau(worder, normalize=False),3)
+        -0.236
+        >>> round(kendall_tau(worder),3)
+        0.382
+
+    :param worder: The worder list output from word_rank_alignment
+    :type worder: list(int)
+    :param normalize: Flag to indicate normalization to between 0.0 and 1.0.
+    :type normalize: boolean
+    :return: The Kendall's Tau correlation coefficient.
+    :rtype: float
+    """
+    worder_len = len(worder)
+    # With worder_len < 2, `choose(worder_len, 2)` will be 0.
+    # As we divide by this, it will give a ZeroDivisionError.
+    # To avoid this, we can just return the lowest possible score.
+    if worder_len < 2:
+        tau = -1
+    else:
+        # Extract the groups of increasing/monotonic sequences.
+        increasing_sequences = find_increasing_sequences(worder)
+        # Calculate no. of increasing_pairs in *worder* list.
+        num_increasing_pairs = sum(choose(len(seq), 2) for seq in increasing_sequences)
+        # Calculate no. of possible pairs.
+        num_possible_pairs = choose(worder_len, 2)
+        # Kendall's Tau computation.
+        tau = 2 * num_increasing_pairs / num_possible_pairs - 1
+    if normalize:  # If normalized, the tau output falls between 0.0 to 1.0
+        return (tau + 1) / 2
+    else:  # Otherwise, the tau outputs falls between -1.0 to +1.0
+        return tau
+
+
+def spearman_rho(worder, normalize=True):
+    """
+    Calculates the Spearman's Rho correlation coefficient given the *worder*
+    list of word alignment from word_rank_alignment(), using the formula:
+
+        rho = 1 - sum(d**2) / choose(len(worder)+1, 3)
+
+    Given that d is the sum of difference between the *worder* list of indices
+    and the original word indices from the reference sentence.
+
+    Using the (H0,R0) and (H5, R5) example from the paper
+
+        >>> worder =  [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+        >>> round(spearman_rho(worder, normalize=False), 3)
+        -0.591
+        >>> round(spearman_rho(worder), 3)
+        0.205
+
+    :param worder: The worder list output from word_rank_alignment
+    :param type: list(int)
+    """
+    worder_len = len(worder)
+    sum_d_square = sum((wi - i) ** 2 for wi, i in zip(worder, range(worder_len)))
+    rho = 1 - sum_d_square / choose(worder_len + 1, 3)
+
+    if normalize:  # If normalized, the rho output falls between 0.0 to 1.0
+        return (rho + 1) / 2
+    else:  # Otherwise, the rho outputs falls between -1.0 to +1.0
+        return rho
--- a/backend/venv/Lib/site-packages/nltk/translate/stack_decoder.py
+++ b/backend/venv/Lib/site-packages/nltk/translate/stack_decoder.py
@@ -0,0 +1,515 @@
+# Natural Language Toolkit: Stack decoder
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A decoder that uses stacks to implement phrase-based translation.
+
+In phrase-based translation, the source sentence is segmented into
+phrases of one or more words, and translations for those phrases are
+used to build the target sentence.
+
+Hypothesis data structures are used to keep track of the source words
+translated so far and the partial output. A hypothesis can be expanded
+by selecting an untranslated phrase, looking up its translation in a
+phrase table, and appending that translation to the partial output.
+Translation is complete when a hypothesis covers all source words.
+
+The search space is huge because the source sentence can be segmented
+in different ways, the source phrases can be selected in any order,
+and there could be multiple translations for the same source phrase in
+the phrase table. To make decoding tractable, stacks are used to limit
+the number of candidate hypotheses by doing histogram and/or threshold
+pruning.
+
+Hypotheses with the same number of words translated are placed in the
+same stack. In histogram pruning, each stack has a size limit, and
+the hypothesis with the lowest score is removed when the stack is full.
+In threshold pruning, hypotheses that score below a certain threshold
+of the best hypothesis in that stack are removed.
+
+Hypothesis scoring can include various factors such as phrase
+translation probability, language model probability, length of
+translation, cost of remaining words to be translated, and so on.
+
+
+References:
+Philipp Koehn. 2010. Statistical Machine Translation.
+Cambridge University Press, New York.
+"""
+
+import warnings
+from collections import defaultdict
+from math import log
+
+
+class StackDecoder:
+    """
+    Phrase-based stack decoder for machine translation
+
+    >>> from nltk.translate import PhraseTable
+    >>> phrase_table = PhraseTable()
+    >>> phrase_table.add(('niemand',), ('nobody',), log(0.8))
+    >>> phrase_table.add(('niemand',), ('no', 'one'), log(0.2))
+    >>> phrase_table.add(('erwartet',), ('expects',), log(0.8))
+    >>> phrase_table.add(('erwartet',), ('expecting',), log(0.2))
+    >>> phrase_table.add(('niemand', 'erwartet'), ('one', 'does', 'not', 'expect'), log(0.1))
+    >>> phrase_table.add(('die', 'spanische', 'inquisition'), ('the', 'spanish', 'inquisition'), log(0.8))
+    >>> phrase_table.add(('!',), ('!',), log(0.8))
+
+    >>> #  nltk.model should be used here once it is implemented
+    >>> from collections import defaultdict
+    >>> language_prob = defaultdict(lambda: -999.0)
+    >>> language_prob[('nobody',)] = log(0.5)
+    >>> language_prob[('expects',)] = log(0.4)
+    >>> language_prob[('the', 'spanish', 'inquisition')] = log(0.2)
+    >>> language_prob[('!',)] = log(0.1)
+    >>> language_model = type('',(object,),{'probability_change': lambda self, context, phrase: language_prob[phrase], 'probability': lambda self, phrase: language_prob[phrase]})()
+
+    >>> stack_decoder = StackDecoder(phrase_table, language_model)
+
+    >>> stack_decoder.translate(['niemand', 'erwartet', 'die', 'spanische', 'inquisition', '!'])
+    ['nobody', 'expects', 'the', 'spanish', 'inquisition', '!']
+
+    """
+
+    def __init__(self, phrase_table, language_model):
+        """
+        :param phrase_table: Table of translations for source language
+            phrases and the log probabilities for those translations.
+        :type phrase_table: PhraseTable
+
+        :param language_model: Target language model. Must define a
+            ``probability_change`` method that calculates the change in
+            log probability of a sentence, if a given string is appended
+            to it.
+            This interface is experimental and will likely be replaced
+            with nltk.model once it is implemented.
+        :type language_model: object
+        """
+        self.phrase_table = phrase_table
+        self.language_model = language_model
+
+        self.word_penalty = 0.0
+        """
+        float: Influences the translation length exponentially.
+            If positive, shorter translations are preferred.
+            If negative, longer translations are preferred.
+            If zero, no penalty is applied.
+        """
+
+        self.beam_threshold = 0.0
+        """
+        float: Hypotheses that score below this factor of the best
+            hypothesis in a stack are dropped from consideration.
+            Value between 0.0 and 1.0.
+        """
+
+        self.stack_size = 100
+        """
+        int: Maximum number of hypotheses to consider in a stack.
+            Higher values increase the likelihood of a good translation,
+            but increases processing time.
+        """
+
+        self.__distortion_factor = 0.5
+        self.__compute_log_distortion()
+
+    @property
+    def distortion_factor(self):
+        """
+        float: Amount of reordering of source phrases.
+            Lower values favour monotone translation, suitable when
+            word order is similar for both source and target languages.
+            Value between 0.0 and 1.0. Default 0.5.
+        """
+        return self.__distortion_factor
+
+    @distortion_factor.setter
+    def distortion_factor(self, d):
+        self.__distortion_factor = d
+        self.__compute_log_distortion()
+
+    def __compute_log_distortion(self):
+        # cache log(distortion_factor) so we don't have to recompute it
+        # when scoring hypotheses
+        if self.__distortion_factor == 0.0:
+            self.__log_distortion_factor = log(1e-9)  # 1e-9 is almost zero
+        else:
+            self.__log_distortion_factor = log(self.__distortion_factor)
+
+    def translate(self, src_sentence):
+        """
+        :param src_sentence: Sentence to be translated
+        :type src_sentence: list(str)
+
+        :return: Translated sentence
+        :rtype: list(str)
+        """
+        sentence = tuple(src_sentence)  # prevent accidental modification
+        sentence_length = len(sentence)
+        stacks = [
+            _Stack(self.stack_size, self.beam_threshold)
+            for _ in range(0, sentence_length + 1)
+        ]
+        empty_hypothesis = _Hypothesis()
+        stacks[0].push(empty_hypothesis)
+
+        all_phrases = self.find_all_src_phrases(sentence)
+        future_score_table = self.compute_future_scores(sentence)
+        for stack in stacks:
+            for hypothesis in stack:
+                possible_expansions = StackDecoder.valid_phrases(
+                    all_phrases, hypothesis
+                )
+                for src_phrase_span in possible_expansions:
+                    src_phrase = sentence[src_phrase_span[0] : src_phrase_span[1]]
+                    for translation_option in self.phrase_table.translations_for(
+                        src_phrase
+                    ):
+                        raw_score = self.expansion_score(
+                            hypothesis, translation_option, src_phrase_span
+                        )
+                        new_hypothesis = _Hypothesis(
+                            raw_score=raw_score,
+                            src_phrase_span=src_phrase_span,
+                            trg_phrase=translation_option.trg_phrase,
+                            previous=hypothesis,
+                        )
+                        new_hypothesis.future_score = self.future_score(
+                            new_hypothesis, future_score_table, sentence_length
+                        )
+                        total_words = new_hypothesis.total_translated_words()
+                        stacks[total_words].push(new_hypothesis)
+
+        if not stacks[sentence_length]:
+            warnings.warn(
+                "Unable to translate all words. "
+                "The source sentence contains words not in "
+                "the phrase table"
+            )
+            # Instead of returning empty output, perhaps a partial
+            # translation could be returned
+            return []
+
+        best_hypothesis = stacks[sentence_length].best()
+        return best_hypothesis.translation_so_far()
+
+    def find_all_src_phrases(self, src_sentence):
+        """
+        Finds all subsequences in src_sentence that have a phrase
+        translation in the translation table
+
+        :type src_sentence: tuple(str)
+
+        :return: Subsequences that have a phrase translation,
+            represented as a table of lists of end positions.
+            For example, if result[2] is [5, 6, 9], then there are
+            three phrases starting from position 2 in ``src_sentence``,
+            ending at positions 5, 6, and 9 exclusive. The list of
+            ending positions are in ascending order.
+        :rtype: list(list(int))
+        """
+        sentence_length = len(src_sentence)
+        phrase_indices = [[] for _ in src_sentence]
+        for start in range(0, sentence_length):
+            for end in range(start + 1, sentence_length + 1):
+                potential_phrase = src_sentence[start:end]
+                if potential_phrase in self.phrase_table:
+                    phrase_indices[start].append(end)
+        return phrase_indices
+
+    def compute_future_scores(self, src_sentence):
+        """
+        Determines the approximate scores for translating every
+        subsequence in ``src_sentence``
+
+        Future scores can be used a look-ahead to determine the
+        difficulty of translating the remaining parts of a src_sentence.
+
+        :type src_sentence: tuple(str)
+
+        :return: Scores of subsequences referenced by their start and
+            end positions. For example, result[2][5] is the score of the
+            subsequence covering positions 2, 3, and 4.
+        :rtype: dict(int: (dict(int): float))
+        """
+        scores = defaultdict(lambda: defaultdict(lambda: float("-inf")))
+        for seq_length in range(1, len(src_sentence) + 1):
+            for start in range(0, len(src_sentence) - seq_length + 1):
+                end = start + seq_length
+                phrase = src_sentence[start:end]
+                if phrase in self.phrase_table:
+                    score = self.phrase_table.translations_for(phrase)[
+                        0
+                    ].log_prob  # pick best (first) translation
+                    # Warning: API of language_model is subject to change
+                    score += self.language_model.probability(phrase)
+                    scores[start][end] = score
+
+                # check if a better score can be obtained by combining
+                # two child subsequences
+                for mid in range(start + 1, end):
+                    combined_score = scores[start][mid] + scores[mid][end]
+                    if combined_score > scores[start][end]:
+                        scores[start][end] = combined_score
+        return scores
+
+    def future_score(self, hypothesis, future_score_table, sentence_length):
+        """
+        Determines the approximate score for translating the
+        untranslated words in ``hypothesis``
+        """
+        score = 0.0
+        for span in hypothesis.untranslated_spans(sentence_length):
+            score += future_score_table[span[0]][span[1]]
+        return score
+
+    def expansion_score(self, hypothesis, translation_option, src_phrase_span):
+        """
+        Calculate the score of expanding ``hypothesis`` with
+        ``translation_option``
+
+        :param hypothesis: Hypothesis being expanded
+        :type hypothesis: _Hypothesis
+
+        :param translation_option: Information about the proposed expansion
+        :type translation_option: PhraseTableEntry
+
+        :param src_phrase_span: Word position span of the source phrase
+        :type src_phrase_span: tuple(int, int)
+        """
+        score = hypothesis.raw_score
+        score += translation_option.log_prob
+        # The API of language_model is subject to change; it could accept
+        # a string, a list of words, and/or some other type
+        score += self.language_model.probability_change(
+            hypothesis, translation_option.trg_phrase
+        )
+        score += self.distortion_score(hypothesis, src_phrase_span)
+        score -= self.word_penalty * len(translation_option.trg_phrase)
+        return score
+
+    def distortion_score(self, hypothesis, next_src_phrase_span):
+        if not hypothesis.src_phrase_span:
+            return 0.0
+        next_src_phrase_start = next_src_phrase_span[0]
+        prev_src_phrase_end = hypothesis.src_phrase_span[1]
+        distortion_distance = next_src_phrase_start - prev_src_phrase_end
+        return abs(distortion_distance) * self.__log_distortion_factor
+
+    @staticmethod
+    def valid_phrases(all_phrases_from, hypothesis):
+        """
+        Extract phrases from ``all_phrases_from`` that contains words
+        that have not been translated by ``hypothesis``
+
+        :param all_phrases_from: Phrases represented by their spans, in
+            the same format as the return value of
+            ``find_all_src_phrases``
+        :type all_phrases_from: list(list(int))
+
+        :type hypothesis: _Hypothesis
+
+        :return: A list of phrases, represented by their spans, that
+            cover untranslated positions.
+        :rtype: list(tuple(int, int))
+        """
+        untranslated_spans = hypothesis.untranslated_spans(len(all_phrases_from))
+        valid_phrases = []
+        for available_span in untranslated_spans:
+            start = available_span[0]
+            available_end = available_span[1]
+            while start < available_end:
+                for phrase_end in all_phrases_from[start]:
+                    if phrase_end > available_end:
+                        # Subsequent elements in all_phrases_from[start]
+                        # will also be > available_end, since the
+                        # elements are in ascending order
+                        break
+                    valid_phrases.append((start, phrase_end))
+                start += 1
+        return valid_phrases
+
+
+class _Hypothesis:
+    """
+    Partial solution to a translation.
+
+    Records the word positions of the phrase being translated, its
+    translation, raw score, and the cost of the untranslated parts of
+    the sentence. When the next phrase is selected to build upon the
+    partial solution, a new _Hypothesis object is created, with a back
+    pointer to the previous hypothesis.
+
+    To find out which words have been translated so far, look at the
+    ``src_phrase_span`` in the hypothesis chain. Similarly, the
+    translation output can be found by traversing up the chain.
+    """
+
+    def __init__(
+        self,
+        raw_score=0.0,
+        src_phrase_span=(),
+        trg_phrase=(),
+        previous=None,
+        future_score=0.0,
+    ):
+        """
+        :param raw_score: Likelihood of hypothesis so far.
+            Higher is better. Does not account for untranslated words.
+        :type raw_score: float
+
+        :param src_phrase_span: Span of word positions covered by the
+            source phrase in this hypothesis expansion. For example,
+            (2, 5) means that the phrase is from the second word up to,
+            but not including the fifth word in the source sentence.
+        :type src_phrase_span: tuple(int)
+
+        :param trg_phrase: Translation of the source phrase in this
+            hypothesis expansion
+        :type trg_phrase: tuple(str)
+
+        :param previous: Previous hypothesis before expansion to this one
+        :type previous: _Hypothesis
+
+        :param future_score: Approximate score for translating the
+            remaining words not covered by this hypothesis. Higher means
+            that the remaining words are easier to translate.
+        :type future_score: float
+        """
+        self.raw_score = raw_score
+        self.src_phrase_span = src_phrase_span
+        self.trg_phrase = trg_phrase
+        self.previous = previous
+        self.future_score = future_score
+
+    def score(self):
+        """
+        Overall score of hypothesis after accounting for local and
+        global features
+        """
+        return self.raw_score + self.future_score
+
+    def untranslated_spans(self, sentence_length):
+        """
+        Starting from each untranslated word, find the longest
+        continuous span of untranslated positions
+
+        :param sentence_length: Length of source sentence being
+            translated by the hypothesis
+        :type sentence_length: int
+
+        :rtype: list(tuple(int, int))
+        """
+        translated_positions = self.translated_positions()
+        translated_positions.sort()
+        translated_positions.append(sentence_length)  # add sentinel position
+
+        untranslated_spans = []
+        start = 0
+        # each untranslated span must end in one of the translated_positions
+        for end in translated_positions:
+            if start < end:
+                untranslated_spans.append((start, end))
+            start = end + 1
+
+        return untranslated_spans
+
+    def translated_positions(self):
+        """
+        List of positions in the source sentence of words already
+        translated. The list is not sorted.
+
+        :rtype: list(int)
+        """
+        translated_positions = []
+        current_hypothesis = self
+        while current_hypothesis.previous is not None:
+            translated_span = current_hypothesis.src_phrase_span
+            translated_positions.extend(range(translated_span[0], translated_span[1]))
+            current_hypothesis = current_hypothesis.previous
+        return translated_positions
+
+    def total_translated_words(self):
+        return len(self.translated_positions())
+
+    def translation_so_far(self):
+        translation = []
+        self.__build_translation(self, translation)
+        return translation
+
+    def __build_translation(self, hypothesis, output):
+        if hypothesis.previous is None:
+            return
+        self.__build_translation(hypothesis.previous, output)
+        output.extend(hypothesis.trg_phrase)
+
+
+class _Stack:
+    """
+    Collection of _Hypothesis objects
+    """
+
+    def __init__(self, max_size=100, beam_threshold=0.0):
+        """
+        :param beam_threshold: Hypotheses that score less than this
+            factor of the best hypothesis are discarded from the stack.
+            Value must be between 0.0 and 1.0.
+        :type beam_threshold: float
+        """
+        self.max_size = max_size
+        self.items = []
+
+        if beam_threshold == 0.0:
+            self.__log_beam_threshold = float("-inf")
+        else:
+            self.__log_beam_threshold = log(beam_threshold)
+
+    def push(self, hypothesis):
+        """
+        Add ``hypothesis`` to the stack.
+        Removes lowest scoring hypothesis if the stack is full.
+        After insertion, hypotheses that score less than
+        ``beam_threshold`` times the score of the best hypothesis
+        are removed.
+        """
+        self.items.append(hypothesis)
+        self.items.sort(key=lambda h: h.score(), reverse=True)
+        while len(self.items) > self.max_size:
+            self.items.pop()
+        self.threshold_prune()
+
+    def threshold_prune(self):
+        if not self.items:
+            return
+        #  log(score * beam_threshold) = log(score) + log(beam_threshold)
+        threshold = self.items[0].score() + self.__log_beam_threshold
+        for hypothesis in reversed(self.items):
+            if hypothesis.score() < threshold:
+                self.items.pop()
+            else:
+                break
+
+    def best(self):
+        """
+        :return: Hypothesis with the highest score in the stack
+        :rtype: _Hypothesis
+        """
+        if self.items:
+            return self.items[0]
+        return None
+
+    def __iter__(self):
+        return iter(self.items)
+
+    def __contains__(self, hypothesis):
+        return hypothesis in self.items
+
+    def __bool__(self):
+        return len(self.items) != 0
+
+    __nonzero__ = __bool__