Initial commit

2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions
--- a/backend/venv/Lib/site-packages/nltk/stem/init.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/init.py
@@ -0,0 +1,34 @@
+# Natural Language Toolkit: Stemmers
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Trevor Cohn <tacohn@cs.mu.oz.au>
+#         Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+NLTK Stemmers
+
+Interfaces used to remove morphological affixes from words, leaving
+only the word stem.  Stemming algorithms aim to remove those affixes
+required for eg. grammatical role, tense, derivational morphology
+leaving only the stem of the word.  This is a difficult problem due to
+irregular words (eg. common verbs in English), complicated
+morphological rules, and part-of-speech and sense ambiguities
+(eg. ``ceil-`` is not the stem of ``ceiling``).
+
+StemmerI defines a standard interface for stemmers.
+"""
+
+from nltk.stem.api import StemmerI
+from nltk.stem.arlstem import ARLSTem
+from nltk.stem.arlstem2 import ARLSTem2
+from nltk.stem.cistem import Cistem
+from nltk.stem.isri import ISRIStemmer
+from nltk.stem.lancaster import LancasterStemmer
+from nltk.stem.porter import PorterStemmer
+from nltk.stem.regexp import RegexpStemmer
+from nltk.stem.rslp import RSLPStemmer
+from nltk.stem.snowball import SnowballStemmer
+from nltk.stem.wordnet import WordNetLemmatizer
--- a/backend/venv/Lib/site-packages/nltk/stem/api.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/api.py
@@ -0,0 +1,27 @@
+# Natural Language Toolkit: Stemmer Interface
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Trevor Cohn <tacohn@cs.mu.oz.au>
+#         Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+from abc import ABCMeta, abstractmethod
+
+
+class StemmerI(metaclass=ABCMeta):
+    """
+    A processing interface for removing morphological affixes from
+    words.  This process is known as stemming.
+
+    """
+
+    @abstractmethod
+    def stem(self, token):
+        """
+        Strip affixes from the token and return the stem.
+
+        :param token: The token that should be stemmed.
+        :type token: str
+        """
--- a/backend/venv/Lib/site-packages/nltk/stem/arlstem.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/arlstem.py
@@ -0,0 +1,361 @@
+#
+# Natural Language Toolkit: ARLSTem Stemmer
+#
+# Copyright (C) 2001-2025 NLTK Project
+#
+# Author: Kheireddine Abainia (x-programer) <k.abainia@gmail.com>
+# Algorithms: Kheireddine Abainia <k.abainia@gmail.com>
+#                         Siham Ouamour
+#                         Halim Sayoud
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+"""
+ARLSTem Arabic Stemmer
+The details about the implementation of this algorithm are described in:
+K. Abainia, S. Ouamour and H. Sayoud, A Novel Robust Arabic Light Stemmer ,
+Journal of Experimental & Theoretical Artificial Intelligence (JETAI'17),
+Vol. 29, No. 3, 2017, pp. 557-573.
+The ARLSTem is a light Arabic stemmer that is based on removing the affixes
+from the word (i.e. prefixes, suffixes and infixes). It was evaluated and
+compared to several other stemmers using Paice's parameters (under-stemming
+index, over-stemming index and stemming weight), and the results showed that
+ARLSTem is promising and producing high performances. This stemmer is not
+based on any dictionary and can be used on-line effectively.
+"""
+import re
+
+from nltk.stem.api import StemmerI
+
+
+class ARLSTem(StemmerI):
+    """
+    ARLSTem stemmer : a light Arabic Stemming algorithm without any dictionary.
+    Department of Telecommunication & Information Processing. USTHB University,
+    Algiers, Algeria.
+    ARLSTem.stem(token) returns the Arabic stem for the input token.
+    The ARLSTem Stemmer requires that all tokens are encoded using Unicode
+    encoding.
+    """
+
+    def __init__(self):
+        # different Alif with hamza
+        self.re_hamzated_alif = re.compile(r"[\u0622\u0623\u0625]")
+        self.re_alifMaqsura = re.compile(r"[\u0649]")
+        self.re_diacritics = re.compile(r"[\u064B-\u065F]")
+
+        # Alif Laam, Laam Laam, Fa Laam, Fa Ba
+        self.pr2 = ["\u0627\u0644", "\u0644\u0644", "\u0641\u0644", "\u0641\u0628"]
+        # Ba Alif Laam, Kaaf Alif Laam, Waaw Alif Laam
+        self.pr3 = ["\u0628\u0627\u0644", "\u0643\u0627\u0644", "\u0648\u0627\u0644"]
+        # Fa Laam Laam, Waaw Laam Laam
+        self.pr32 = ["\u0641\u0644\u0644", "\u0648\u0644\u0644"]
+        # Fa Ba Alif Laam, Waaw Ba Alif Laam, Fa Kaaf Alif Laam
+        self.pr4 = [
+            "\u0641\u0628\u0627\u0644",
+            "\u0648\u0628\u0627\u0644",
+            "\u0641\u0643\u0627\u0644",
+        ]
+
+        # Kaf Yaa, Kaf Miim
+        self.su2 = ["\u0643\u064A", "\u0643\u0645"]
+        # Ha Alif, Ha Miim
+        self.su22 = ["\u0647\u0627", "\u0647\u0645"]
+        # Kaf Miim Alif, Kaf Noon Shadda
+        self.su3 = ["\u0643\u0645\u0627", "\u0643\u0646\u0651"]
+        # Ha Miim Alif, Ha Noon Shadda
+        self.su32 = ["\u0647\u0645\u0627", "\u0647\u0646\u0651"]
+
+        # Alif Noon, Ya Noon, Waaw Noon
+        self.pl_si2 = ["\u0627\u0646", "\u064A\u0646", "\u0648\u0646"]
+        # Taa Alif Noon, Taa Ya Noon
+        self.pl_si3 = ["\u062A\u0627\u0646", "\u062A\u064A\u0646"]
+
+        # Alif Noon, Waaw Noon
+        self.verb_su2 = ["\u0627\u0646", "\u0648\u0646"]
+        # Siin Taa, Siin Yaa
+        self.verb_pr2 = ["\u0633\u062A", "\u0633\u064A"]
+        # Siin Alif, Siin Noon
+        self.verb_pr22 = ["\u0633\u0627", "\u0633\u0646"]
+        # Lam Noon, Lam Taa, Lam Yaa, Lam Hamza
+        self.verb_pr33 = [
+            "\u0644\u0646",
+            "\u0644\u062A",
+            "\u0644\u064A",
+            "\u0644\u0623",
+        ]
+        # Taa Miim Alif, Taa Noon Shadda
+        self.verb_suf3 = ["\u062A\u0645\u0627", "\u062A\u0646\u0651"]
+        # Noon Alif, Taa Miim, Taa Alif, Waaw Alif
+        self.verb_suf2 = [
+            "\u0646\u0627",
+            "\u062A\u0645",
+            "\u062A\u0627",
+            "\u0648\u0627",
+        ]
+        # Taa, Alif, Noon
+        self.verb_suf1 = ["\u062A", "\u0627", "\u0646"]
+
+    def stem(self, token):
+        """
+        call this function to get the word's stem based on ARLSTem .
+        """
+        try:
+            if token is None:
+                raise ValueError(
+                    "The word could not be stemmed, because \
+                                 it is empty !"
+                )
+            # remove Arabic diacritics and replace some letters with others
+            token = self.norm(token)
+            # strip common prefixes of the nouns
+            pre = self.pref(token)
+            if pre is not None:
+                token = pre
+            # strip the suffixes which are common to nouns and verbs
+            token = self.suff(token)
+            # transform a plural noun to a singular noun
+            ps = self.plur2sing(token)
+            if ps is None:
+                # transform from the feminine form to the masculine form
+                fm = self.fem2masc(token)
+                if fm is not None:
+                    return fm
+                else:
+                    if pre is None:  # if the prefixes are not stripped
+                        # strip the verb prefixes and suffixes
+                        return self.verb(token)
+            else:
+                return ps
+            return token
+        except ValueError as e:
+            print(e)
+
+    def norm(self, token):
+        """
+        normalize the word by removing diacritics, replacing hamzated Alif
+        with Alif replacing AlifMaqsura with Yaa and removing Waaw at the
+        beginning.
+        """
+        # strip Arabic diacritics
+        token = self.re_diacritics.sub("", token)
+        # replace Hamzated Alif with Alif bare
+        token = self.re_hamzated_alif.sub("\u0627", token)
+        # replace alifMaqsura with Yaa
+        token = self.re_alifMaqsura.sub("\u064A", token)
+        # strip the Waaw from the word beginning if the remaining is 3 letters
+        # at least
+        if token.startswith("\u0648") and len(token) > 3:
+            token = token[1:]
+        return token
+
+    def pref(self, token):
+        """
+        remove prefixes from the words' beginning.
+        """
+        if len(token) > 5:
+            for p3 in self.pr3:
+                if token.startswith(p3):
+                    return token[3:]
+        if len(token) > 6:
+            for p4 in self.pr4:
+                if token.startswith(p4):
+                    return token[4:]
+        if len(token) > 5:
+            for p3 in self.pr32:
+                if token.startswith(p3):
+                    return token[3:]
+        if len(token) > 4:
+            for p2 in self.pr2:
+                if token.startswith(p2):
+                    return token[2:]
+
+    def suff(self, token):
+        """
+        remove suffixes from the word's end.
+        """
+        if token.endswith("\u0643") and len(token) > 3:
+            return token[:-1]
+        if len(token) > 4:
+            for s2 in self.su2:
+                if token.endswith(s2):
+                    return token[:-2]
+        if len(token) > 5:
+            for s3 in self.su3:
+                if token.endswith(s3):
+                    return token[:-3]
+        if token.endswith("\u0647") and len(token) > 3:
+            token = token[:-1]
+            return token
+        if len(token) > 4:
+            for s2 in self.su22:
+                if token.endswith(s2):
+                    return token[:-2]
+        if len(token) > 5:
+            for s3 in self.su32:
+                if token.endswith(s3):
+                    return token[:-3]
+        if token.endswith("\u0646\u0627") and len(token) > 4:
+            return token[:-2]
+        return token
+
+    def fem2masc(self, token):
+        """
+        transform the word from the feminine form to the masculine form.
+        """
+        if token.endswith("\u0629") and len(token) > 3:
+            return token[:-1]
+
+    def plur2sing(self, token):
+        """
+        transform the word from the plural form to the singular form.
+        """
+        if len(token) > 4:
+            for ps2 in self.pl_si2:
+                if token.endswith(ps2):
+                    return token[:-2]
+        if len(token) > 5:
+            for ps3 in self.pl_si3:
+                if token.endswith(ps3):
+                    return token[:-3]
+        if len(token) > 3 and token.endswith("\u0627\u062A"):
+            return token[:-2]
+        if len(token) > 3 and token.startswith("\u0627") and token[2] == "\u0627":
+            return token[:2] + token[3:]
+        if len(token) > 4 and token.startswith("\u0627") and token[-2] == "\u0627":
+            return token[1:-2] + token[-1]
+
+    def verb(self, token):
+        """
+        stem the verb prefixes and suffixes or both
+        """
+        vb = self.verb_t1(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t2(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t3(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t4(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t5(token)
+        if vb is not None:
+            return vb
+        return self.verb_t6(token)
+
+    def verb_t1(self, token):
+        """
+        stem the present prefixes and suffixes
+        """
+        if len(token) > 5 and token.startswith("\u062A"):  # Taa
+            for s2 in self.pl_si2:
+                if token.endswith(s2):
+                    return token[1:-2]
+        if len(token) > 5 and token.startswith("\u064A"):  # Yaa
+            for s2 in self.verb_su2:
+                if token.endswith(s2):
+                    return token[1:-2]
+        if len(token) > 4 and token.startswith("\u0627"):  # Alif
+            # Waaw Alif
+            if len(token) > 5 and token.endswith("\u0648\u0627"):
+                return token[1:-2]
+            # Yaa
+            if token.endswith("\u064A"):
+                return token[1:-1]
+            # Alif
+            if token.endswith("\u0627"):
+                return token[1:-1]
+            # Noon
+            if token.endswith("\u0646"):
+                return token[1:-1]
+        # ^Yaa, Noon$
+        if len(token) > 4 and token.startswith("\u064A") and token.endswith("\u0646"):
+            return token[1:-1]
+        # ^Taa, Noon$
+        if len(token) > 4 and token.startswith("\u062A") and token.endswith("\u0646"):
+            return token[1:-1]
+
+    def verb_t2(self, token):
+        """
+        stem the future prefixes and suffixes
+        """
+        if len(token) > 6:
+            for s2 in self.pl_si2:
+                # ^Siin Taa
+                if token.startswith(self.verb_pr2[0]) and token.endswith(s2):
+                    return token[2:-2]
+            # ^Siin Yaa, Alif Noon$
+            if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[0]):
+                return token[2:-2]
+            # ^Siin Yaa, Waaw Noon$
+            if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[2]):
+                return token[2:-2]
+        # ^Siin Taa, Noon$
+        if (
+            len(token) > 5
+            and token.startswith(self.verb_pr2[0])
+            and token.endswith("\u0646")
+        ):
+            return token[2:-1]
+        # ^Siin Yaa, Noon$
+        if (
+            len(token) > 5
+            and token.startswith(self.verb_pr2[1])
+            and token.endswith("\u0646")
+        ):
+            return token[2:-1]
+
+    def verb_t3(self, token):
+        """
+        stem the present suffixes
+        """
+        if len(token) > 5:
+            for su3 in self.verb_suf3:
+                if token.endswith(su3):
+                    return token[:-3]
+        if len(token) > 4:
+            for su2 in self.verb_suf2:
+                if token.endswith(su2):
+                    return token[:-2]
+        if len(token) > 3:
+            for su1 in self.verb_suf1:
+                if token.endswith(su1):
+                    return token[:-1]
+
+    def verb_t4(self, token):
+        """
+        stem the present prefixes
+        """
+        if len(token) > 3:
+            for pr1 in self.verb_suf1:
+                if token.startswith(pr1):
+                    return token[1:]
+            if token.startswith("\u064A"):
+                return token[1:]
+
+    def verb_t5(self, token):
+        """
+        stem the future prefixes
+        """
+        if len(token) > 4:
+            for pr2 in self.verb_pr22:
+                if token.startswith(pr2):
+                    return token[2:]
+            for pr2 in self.verb_pr2:
+                if token.startswith(pr2):
+                    return token[2:]
+        return token
+
+    def verb_t6(self, token):
+        """
+        stem the order prefixes
+        """
+        if len(token) > 4:
+            for pr3 in self.verb_pr33:
+                if token.startswith(pr3):
+                    return token[2:]
+        return token
--- a/backend/venv/Lib/site-packages/nltk/stem/arlstem2.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/arlstem2.py
@@ -0,0 +1,457 @@
+#
+# Natural Language Toolkit: ARLSTem Stemmer v2
+#
+# Copyright (C) 2001-2025 NLTK Project
+#
+# Author: Kheireddine Abainia (x-programer) <k.abainia@gmail.com>
+# Algorithms: Kheireddine Abainia <k.abainia@gmail.com>
+#                         Hamza Rebbani <hamrebbani@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+"""
+ARLSTem2 Arabic Light Stemmer
+The details about the implementation of this algorithm are described in:
+K. Abainia and H. Rebbani, Comparing the Effectiveness of the Improved ARLSTem
+Algorithm with Existing Arabic Light Stemmers, International Conference on
+Theoretical and Applicative Aspects of Computer Science (ICTAACS'19), Skikda,
+Algeria, December 15-16, 2019.
+ARLSTem2 is an Arabic light stemmer based on removing the affixes from
+the words (i.e. prefixes, suffixes and infixes). It is an improvement
+of the previous Arabic light stemmer (ARLSTem). The new version was compared to
+the original algorithm and several existing Arabic light stemmers, where the
+results showed that the new version considerably improves the under-stemming
+errors that are common to light stemmers. Both ARLSTem and ARLSTem2 can be run
+online and do not use any dictionary.
+"""
+import re
+
+from nltk.stem.api import StemmerI
+
+
+class ARLSTem2(StemmerI):
+    """
+    Return a stemmed Arabic word after removing affixes. This an improved
+    version of the previous algorithm, which reduces under-stemming errors.
+    Typically used in Arabic search engine, information retrieval and NLP.
+
+        >>> from nltk.stem import arlstem2
+        >>> stemmer = ARLSTem2()
+        >>> word = stemmer.stem('يعمل')
+        >>> print(word)
+        عمل
+
+    :param token: The input Arabic word (unicode) to be stemmed
+    :type token: unicode
+    :return: A unicode Arabic word
+    """
+
+    def __init__(self):
+        # different Alif with hamza
+        self.re_hamzated_alif = re.compile(r"[\u0622\u0623\u0625]")
+        self.re_alifMaqsura = re.compile(r"[\u0649]")
+        self.re_diacritics = re.compile(r"[\u064B-\u065F]")
+
+        # Alif Laam, Laam Laam, Fa Laam, Fa Ba
+        self.pr2 = ["\u0627\u0644", "\u0644\u0644", "\u0641\u0644", "\u0641\u0628"]
+        # Ba Alif Laam, Kaaf Alif Laam, Waaw Alif Laam
+        self.pr3 = ["\u0628\u0627\u0644", "\u0643\u0627\u0644", "\u0648\u0627\u0644"]
+        # Fa Laam Laam, Waaw Laam Laam
+        self.pr32 = ["\u0641\u0644\u0644", "\u0648\u0644\u0644"]
+        # Fa Ba Alif Laam, Waaw Ba Alif Laam, Fa Kaaf Alif Laam
+        self.pr4 = [
+            "\u0641\u0628\u0627\u0644",
+            "\u0648\u0628\u0627\u0644",
+            "\u0641\u0643\u0627\u0644",
+        ]
+
+        # Kaf Yaa, Kaf Miim
+        self.su2 = ["\u0643\u064A", "\u0643\u0645"]
+        # Ha Alif, Ha Miim
+        self.su22 = ["\u0647\u0627", "\u0647\u0645"]
+        # Kaf Miim Alif, Kaf Noon Shadda
+        self.su3 = ["\u0643\u0645\u0627", "\u0643\u0646\u0651"]
+        # Ha Miim Alif, Ha Noon Shadda
+        self.su32 = ["\u0647\u0645\u0627", "\u0647\u0646\u0651"]
+
+        # Alif Noon, Ya Noon, Waaw Noon
+        self.pl_si2 = ["\u0627\u0646", "\u064A\u0646", "\u0648\u0646"]
+        # Taa Alif Noon, Taa Ya Noon
+        self.pl_si3 = ["\u062A\u0627\u0646", "\u062A\u064A\u0646"]
+
+        # Alif Noon, Waaw Noon
+        self.verb_su2 = ["\u0627\u0646", "\u0648\u0646"]
+        # Siin Taa, Siin Yaa
+        self.verb_pr2 = ["\u0633\u062A", "\u0633\u064A"]
+        # Siin Alif, Siin Noon
+        self.verb_pr22 = ["\u0633\u0627", "\u0633\u0646"]
+        # Lam Noon, Lam Taa, Lam Yaa, Lam Hamza
+        self.verb_pr33 = [
+            "\u0644\u0646",
+            "\u0644\u062A",
+            "\u0644\u064A",
+            "\u0644\u0623",
+        ]
+        # Taa Miim Alif, Taa Noon Shadda
+        self.verb_suf3 = ["\u062A\u0645\u0627", "\u062A\u0646\u0651"]
+        # Noon Alif, Taa Miim, Taa Alif, Waaw Alif
+        self.verb_suf2 = [
+            "\u0646\u0627",
+            "\u062A\u0645",
+            "\u062A\u0627",
+            "\u0648\u0627",
+        ]
+        # Taa, Alif, Noon
+        self.verb_suf1 = ["\u062A", "\u0627", "\u0646"]
+
+    def stem1(self, token):
+        """
+        call this function to get the first stem
+        """
+        try:
+            if token is None:
+                raise ValueError(
+                    "The word could not be stemmed, because \
+                                 it is empty !"
+                )
+            self.is_verb = False
+            # remove Arabic diacritics and replace some letters with others
+            token = self.norm(token)
+            # strip the common noun prefixes
+            pre = self.pref(token)
+            if pre is not None:
+                token = pre
+            # transform the feminine form to masculine form
+            fm = self.fem2masc(token)
+            if fm is not None:
+                return fm
+            # strip the adjective affixes
+            adj = self.adjective(token)
+            if adj is not None:
+                return adj
+            # strip the suffixes that are common to nouns and verbs
+            token = self.suff(token)
+            # transform a plural noun to a singular noun
+            ps = self.plur2sing(token)
+            if ps is None:
+                if pre is None:  # if the noun prefixes are not stripped
+                    # strip the verb prefixes and suffixes
+                    verb = self.verb(token)
+                    if verb is not None:
+                        self.is_verb = True
+                        return verb
+            else:
+                return ps
+            return token
+        except ValueError as e:
+            print(e)
+
+    def stem(self, token):
+        # stem the input word
+        try:
+            if token is None:
+                raise ValueError(
+                    "The word could not be stemmed, because \
+                                 it is empty !"
+                )
+            # run the first round of stemming
+            token = self.stem1(token)
+            # check if there is some additional noun affixes
+            if len(token) > 4:
+                # ^Taa, $Yaa + char
+                if token.startswith("\u062A") and token[-2] == "\u064A":
+                    token = token[1:-2] + token[-1]
+                    return token
+                # ^Miim, $Waaw + char
+                if token.startswith("\u0645") and token[-2] == "\u0648":
+                    token = token[1:-2] + token[-1]
+                    return token
+            if len(token) > 3:
+                # !^Alif, $Yaa
+                if not token.startswith("\u0627") and token.endswith("\u064A"):
+                    token = token[:-1]
+                    return token
+                # $Laam
+                if token.startswith("\u0644"):
+                    return token[1:]
+            return token
+        except ValueError as e:
+            print(e)
+
+    def norm(self, token):
+        """
+        normalize the word by removing diacritics, replace hamzated Alif
+        with Alif bare, replace AlifMaqsura with Yaa and remove Waaw at the
+        beginning.
+        """
+        # strip Arabic diacritics
+        token = self.re_diacritics.sub("", token)
+        # replace Hamzated Alif with Alif bare
+        token = self.re_hamzated_alif.sub("\u0627", token)
+        # replace alifMaqsura with Yaa
+        token = self.re_alifMaqsura.sub("\u064A", token)
+        # strip the Waaw from the word beginning if the remaining is
+        # tri-literal at least
+        if token.startswith("\u0648") and len(token) > 3:
+            token = token[1:]
+        return token
+
+    def pref(self, token):
+        """
+        remove prefixes from the words' beginning.
+        """
+        if len(token) > 5:
+            for p3 in self.pr3:
+                if token.startswith(p3):
+                    return token[3:]
+        if len(token) > 6:
+            for p4 in self.pr4:
+                if token.startswith(p4):
+                    return token[4:]
+        if len(token) > 5:
+            for p3 in self.pr32:
+                if token.startswith(p3):
+                    return token[3:]
+        if len(token) > 4:
+            for p2 in self.pr2:
+                if token.startswith(p2):
+                    return token[2:]
+
+    def adjective(self, token):
+        """
+        remove the infixes from adjectives
+        """
+        # ^Alif, Alif, $Yaa
+        if len(token) > 5:
+            if (
+                token.startswith("\u0627")
+                and token[-3] == "\u0627"
+                and token.endswith("\u064A")
+            ):
+                return token[:-3] + token[-2]
+
+    def suff(self, token):
+        """
+        remove the suffixes from the word's ending.
+        """
+        if token.endswith("\u0643") and len(token) > 3:
+            return token[:-1]
+        if len(token) > 4:
+            for s2 in self.su2:
+                if token.endswith(s2):
+                    return token[:-2]
+        if len(token) > 5:
+            for s3 in self.su3:
+                if token.endswith(s3):
+                    return token[:-3]
+        if token.endswith("\u0647") and len(token) > 3:
+            token = token[:-1]
+            return token
+        if len(token) > 4:
+            for s2 in self.su22:
+                if token.endswith(s2):
+                    return token[:-2]
+        if len(token) > 5:
+            for s3 in self.su32:
+                if token.endswith(s3):
+                    return token[:-3]
+        # $Noon and Alif
+        if token.endswith("\u0646\u0627") and len(token) > 4:
+            return token[:-2]
+        return token
+
+    def fem2masc(self, token):
+        """
+        transform the word from the feminine form to the masculine form.
+        """
+        if len(token) > 6:
+            # ^Taa, Yaa, $Yaa and Taa Marbuta
+            if (
+                token.startswith("\u062A")
+                and token[-4] == "\u064A"
+                and token.endswith("\u064A\u0629")
+            ):
+                return token[1:-4] + token[-3]
+            # ^Alif, Yaa, $Yaa and Taa Marbuta
+            if (
+                token.startswith("\u0627")
+                and token[-4] == "\u0627"
+                and token.endswith("\u064A\u0629")
+            ):
+                return token[:-4] + token[-3]
+        # $Alif, Yaa and Taa Marbuta
+        if token.endswith("\u0627\u064A\u0629") and len(token) > 5:
+            return token[:-2]
+        if len(token) > 4:
+            # Alif, $Taa Marbuta
+            if token[1] == "\u0627" and token.endswith("\u0629"):
+                return token[0] + token[2:-1]
+            # $Yaa and Taa Marbuta
+            if token.endswith("\u064A\u0629"):
+                return token[:-2]
+        # $Taa Marbuta
+        if token.endswith("\u0629") and len(token) > 3:
+            return token[:-1]
+
+    def plur2sing(self, token):
+        """
+        transform the word from the plural form to the singular form.
+        """
+        # ^Haa, $Noon, Waaw
+        if len(token) > 5:
+            if token.startswith("\u0645") and token.endswith("\u0648\u0646"):
+                return token[1:-2]
+        if len(token) > 4:
+            for ps2 in self.pl_si2:
+                if token.endswith(ps2):
+                    return token[:-2]
+        if len(token) > 5:
+            for ps3 in self.pl_si3:
+                if token.endswith(ps3):
+                    return token[:-3]
+        if len(token) > 4:
+            # $Alif, Taa
+            if token.endswith("\u0627\u062A"):
+                return token[:-2]
+            # ^Alif Alif
+            if token.startswith("\u0627") and token[2] == "\u0627":
+                return token[:2] + token[3:]
+            # ^Alif Alif
+            if token.startswith("\u0627") and token[-2] == "\u0627":
+                return token[1:-2] + token[-1]
+
+    def verb(self, token):
+        """
+        stem the verb prefixes and suffixes or both
+        """
+        vb = self.verb_t1(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t2(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t3(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t4(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t5(token)
+        if vb is not None:
+            return vb
+        vb = self.verb_t6(token)
+        return vb
+
+    def verb_t1(self, token):
+        """
+        stem the present tense co-occurred prefixes and suffixes
+        """
+        if len(token) > 5 and token.startswith("\u062A"):  # Taa
+            for s2 in self.pl_si2:
+                if token.endswith(s2):
+                    return token[1:-2]
+        if len(token) > 5 and token.startswith("\u064A"):  # Yaa
+            for s2 in self.verb_su2:
+                if token.endswith(s2):
+                    return token[1:-2]
+        if len(token) > 4 and token.startswith("\u0627"):  # Alif
+            # Waaw Alif
+            if len(token) > 5 and token.endswith("\u0648\u0627"):
+                return token[1:-2]
+            # Yaa
+            if token.endswith("\u064A"):
+                return token[1:-1]
+            # Alif
+            if token.endswith("\u0627"):
+                return token[1:-1]
+            # Noon
+            if token.endswith("\u0646"):
+                return token[1:-1]
+        # ^Yaa, Noon$
+        if len(token) > 4 and token.startswith("\u064A") and token.endswith("\u0646"):
+            return token[1:-1]
+        # ^Taa, Noon$
+        if len(token) > 4 and token.startswith("\u062A") and token.endswith("\u0646"):
+            return token[1:-1]
+
+    def verb_t2(self, token):
+        """
+        stem the future tense co-occurred prefixes and suffixes
+        """
+        if len(token) > 6:
+            for s2 in self.pl_si2:
+                # ^Siin Taa
+                if token.startswith(self.verb_pr2[0]) and token.endswith(s2):
+                    return token[2:-2]
+            # ^Siin Yaa, Alif Noon$
+            if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[0]):
+                return token[2:-2]
+            # ^Siin Yaa, Waaw Noon$
+            if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[2]):
+                return token[2:-2]
+        # ^Siin Taa, Noon$
+        if (
+            len(token) > 5
+            and token.startswith(self.verb_pr2[0])
+            and token.endswith("\u0646")
+        ):
+            return token[2:-1]
+        # ^Siin Yaa, Noon$
+        if (
+            len(token) > 5
+            and token.startswith(self.verb_pr2[1])
+            and token.endswith("\u0646")
+        ):
+            return token[2:-1]
+
+    def verb_t3(self, token):
+        """
+        stem the present tense suffixes
+        """
+        if len(token) > 5:
+            for su3 in self.verb_suf3:
+                if token.endswith(su3):
+                    return token[:-3]
+        if len(token) > 4:
+            for su2 in self.verb_suf2:
+                if token.endswith(su2):
+                    return token[:-2]
+        if len(token) > 3:
+            for su1 in self.verb_suf1:
+                if token.endswith(su1):
+                    return token[:-1]
+
+    def verb_t4(self, token):
+        """
+        stem the present tense prefixes
+        """
+        if len(token) > 3:
+            for pr1 in self.verb_suf1:
+                if token.startswith(pr1):
+                    return token[1:]
+            if token.startswith("\u064A"):
+                return token[1:]
+
+    def verb_t5(self, token):
+        """
+        stem the future tense prefixes
+        """
+        if len(token) > 4:
+            for pr2 in self.verb_pr22:
+                if token.startswith(pr2):
+                    return token[2:]
+            for pr2 in self.verb_pr2:
+                if token.startswith(pr2):
+                    return token[2:]
+
+    def verb_t6(self, token):
+        """
+        stem the imperative tense prefixes
+        """
+        if len(token) > 4:
+            for pr3 in self.verb_pr33:
+                if token.startswith(pr3):
+                    return token[2:]
+
+        return token
--- a/backend/venv/Lib/site-packages/nltk/stem/cistem.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/cistem.py
@@ -0,0 +1,209 @@
+# Natural Language Toolkit: CISTEM Stemmer for German
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Leonie Weissweiler <l.weissweiler@outlook.de>
+#         Tom Aarsen <> (modifications)
+# Algorithm: Leonie Weissweiler <l.weissweiler@outlook.de>
+#            Alexander Fraser <fraser@cis.lmu.de>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import re
+from typing import Tuple
+
+from nltk.stem.api import StemmerI
+
+
+class Cistem(StemmerI):
+    """
+    CISTEM Stemmer for German
+
+    This is the official Python implementation of the CISTEM stemmer.
+    It is based on the paper
+    Leonie Weissweiler, Alexander Fraser (2017). Developing a Stemmer for German
+    Based on a Comparative Analysis of Publicly Available Stemmers.
+    In Proceedings of the German Society for Computational Linguistics and Language
+    Technology (GSCL)
+    which can be read here:
+    https://www.cis.lmu.de/~weissweiler/cistem/
+
+    In the paper, we conducted an analysis of publicly available stemmers,
+    developed two gold standards for German stemming and evaluated the stemmers
+    based on the two gold standards. We then proposed the stemmer implemented here
+    and show that it achieves slightly better f-measure than the other stemmers and
+    is thrice as fast as the Snowball stemmer for German while being about as fast
+    as most other stemmers.
+
+    case_insensitive is a a boolean specifying if case-insensitive stemming
+    should be used. Case insensitivity improves performance only if words in the
+    text may be incorrectly upper case. For all-lowercase and correctly cased
+    text, best performance is achieved by setting case_insensitive for false.
+
+    :param case_insensitive: if True, the stemming is case insensitive. False by default.
+    :type case_insensitive: bool
+    """
+
+    strip_ge = re.compile(r"^ge(.{4,})")
+    repl_xx = re.compile(r"(.)\1")
+    strip_emr = re.compile(r"e[mr]$")
+    strip_nd = re.compile(r"nd$")
+    strip_t = re.compile(r"t$")
+    strip_esn = re.compile(r"[esn]$")
+    repl_xx_back = re.compile(r"(.)\*")
+
+    def __init__(self, case_insensitive: bool = False):
+        self._case_insensitive = case_insensitive
+
+    @staticmethod
+    def replace_to(word: str) -> str:
+        word = word.replace("sch", "$")
+        word = word.replace("ei", "%")
+        word = word.replace("ie", "&")
+        word = Cistem.repl_xx.sub(r"\1*", word)
+
+        return word
+
+    @staticmethod
+    def replace_back(word: str) -> str:
+        word = Cistem.repl_xx_back.sub(r"\1\1", word)
+        word = word.replace("%", "ei")
+        word = word.replace("&", "ie")
+        word = word.replace("$", "sch")
+
+        return word
+
+    def stem(self, word: str) -> str:
+        """Stems the input word.
+
+        :param word: The word that is to be stemmed.
+        :type word: str
+        :return: The stemmed word.
+        :rtype: str
+
+        >>> from nltk.stem.cistem import Cistem
+        >>> stemmer = Cistem()
+        >>> s1 = "Speicherbehältern"
+        >>> stemmer.stem(s1)
+        'speicherbehalt'
+        >>> s2 = "Grenzpostens"
+        >>> stemmer.stem(s2)
+        'grenzpost'
+        >>> s3 = "Ausgefeiltere"
+        >>> stemmer.stem(s3)
+        'ausgefeilt'
+        >>> stemmer = Cistem(True)
+        >>> stemmer.stem(s1)
+        'speicherbehal'
+        >>> stemmer.stem(s2)
+        'grenzpo'
+        >>> stemmer.stem(s3)
+        'ausgefeil'
+        """
+        if len(word) == 0:
+            return word
+
+        upper = word[0].isupper()
+        word = word.lower()
+
+        word = word.replace("ü", "u")
+        word = word.replace("ö", "o")
+        word = word.replace("ä", "a")
+        word = word.replace("ß", "ss")
+
+        word = Cistem.strip_ge.sub(r"\1", word)
+
+        return self._segment_inner(word, upper)[0]
+
+    def segment(self, word: str) -> Tuple[str, str]:
+        """
+        This method works very similarly to stem (:func:'cistem.stem'). The difference is that in
+        addition to returning the stem, it also returns the rest that was removed at
+        the end. To be able to return the stem unchanged so the stem and the rest
+        can be concatenated to form the original word, all subsitutions that altered
+        the stem in any other way than by removing letters at the end were left out.
+
+        :param word: The word that is to be stemmed.
+        :type word: str
+        :return: A tuple of the stemmed word and the removed suffix.
+        :rtype: Tuple[str, str]
+
+        >>> from nltk.stem.cistem import Cistem
+        >>> stemmer = Cistem()
+        >>> s1 = "Speicherbehältern"
+        >>> stemmer.segment(s1)
+        ('speicherbehält', 'ern')
+        >>> s2 = "Grenzpostens"
+        >>> stemmer.segment(s2)
+        ('grenzpost', 'ens')
+        >>> s3 = "Ausgefeiltere"
+        >>> stemmer.segment(s3)
+        ('ausgefeilt', 'ere')
+        >>> stemmer = Cistem(True)
+        >>> stemmer.segment(s1)
+        ('speicherbehäl', 'tern')
+        >>> stemmer.segment(s2)
+        ('grenzpo', 'stens')
+        >>> stemmer.segment(s3)
+        ('ausgefeil', 'tere')
+        """
+        if len(word) == 0:
+            return ("", "")
+
+        upper = word[0].isupper()
+        word = word.lower()
+
+        return self._segment_inner(word, upper)
+
+    def _segment_inner(self, word: str, upper: bool):
+        """Inner method for iteratively applying the code stemming regexes.
+        This method receives a pre-processed variant of the word to be stemmed,
+        or the word to be segmented, and returns a tuple of the word and the
+        removed suffix.
+
+        :param word: A pre-processed variant of the word that is to be stemmed.
+        :type word: str
+        :param upper: Whether the original word started with a capital letter.
+        :type upper: bool
+        :return: A tuple of the stemmed word and the removed suffix.
+        :rtype: Tuple[str, str]
+        """
+
+        rest_length = 0
+        word_copy = word[:]
+
+        # Pre-processing before applying the substitution patterns
+        word = Cistem.replace_to(word)
+        rest = ""
+
+        # Apply the substitution patterns
+        while len(word) > 3:
+            if len(word) > 5:
+                word, n = Cistem.strip_emr.subn("", word)
+                if n != 0:
+                    rest_length += 2
+                    continue
+
+                word, n = Cistem.strip_nd.subn("", word)
+                if n != 0:
+                    rest_length += 2
+                    continue
+
+            if not upper or self._case_insensitive:
+                word, n = Cistem.strip_t.subn("", word)
+                if n != 0:
+                    rest_length += 1
+                    continue
+
+            word, n = Cistem.strip_esn.subn("", word)
+            if n != 0:
+                rest_length += 1
+                continue
+            else:
+                break
+
+        # Post-processing after applying the substitution patterns
+        word = Cistem.replace_back(word)
+
+        if rest_length:
+            rest = word_copy[-rest_length:]
+
+        return (word, rest)
--- a/backend/venv/Lib/site-packages/nltk/stem/isri.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/isri.py
@@ -0,0 +1,395 @@
+#
+# Natural Language Toolkit: The ISRI Arabic Stemmer
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Algorithm: Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005)
+# Author: Hosam Algasaier <hosam_hme@yahoo.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+ISRI Arabic Stemmer
+
+The algorithm for this stemmer is described in:
+
+Taghva, K., Elkoury, R., and Coombs, J. 2005. Arabic Stemming without a root dictionary.
+Information Science Research Institute. University of Nevada, Las Vegas, USA.
+
+The Information Science Research Institute’s (ISRI) Arabic stemmer shares many features
+with the Khoja stemmer. However, the main difference is that ISRI stemmer does not use root
+dictionary. Also, if a root is not found, ISRI stemmer returned normalized form, rather than
+returning the original unmodified word.
+
+Additional adjustments were made to improve the algorithm:
+
+1- Adding 60 stop words.
+2- Adding the pattern (تفاعيل) to ISRI pattern set.
+3- The step 2 in the original algorithm was normalizing all hamza. This step is discarded because it
+increases the word ambiguities and changes the original root.
+
+"""
+import re
+
+from nltk.stem.api import StemmerI
+
+
+class ISRIStemmer(StemmerI):
+    """
+    ISRI Arabic stemmer based on algorithm: Arabic Stemming without a root dictionary.
+    Information Science Research Institute. University of Nevada, Las Vegas, USA.
+
+    A few minor modifications have been made to ISRI basic algorithm.
+    See the source code of this module for more information.
+
+    isri.stem(token) returns Arabic root for the given token.
+
+    The ISRI Stemmer requires that all tokens have Unicode string types.
+    If you use Python IDLE on Arabic Windows you have to decode text first
+    using Arabic '1256' coding.
+    """
+
+    def __init__(self):
+        # length three prefixes
+        self.p3 = [
+            "\u0643\u0627\u0644",
+            "\u0628\u0627\u0644",
+            "\u0648\u0644\u0644",
+            "\u0648\u0627\u0644",
+        ]
+
+        # length two prefixes
+        self.p2 = ["\u0627\u0644", "\u0644\u0644"]
+
+        # length one prefixes
+        self.p1 = [
+            "\u0644",
+            "\u0628",
+            "\u0641",
+            "\u0633",
+            "\u0648",
+            "\u064a",
+            "\u062a",
+            "\u0646",
+            "\u0627",
+        ]
+
+        # length three suffixes
+        self.s3 = [
+            "\u062a\u0645\u0644",
+            "\u0647\u0645\u0644",
+            "\u062a\u0627\u0646",
+            "\u062a\u064a\u0646",
+            "\u0643\u0645\u0644",
+        ]
+
+        # length two suffixes
+        self.s2 = [
+            "\u0648\u0646",
+            "\u0627\u062a",
+            "\u0627\u0646",
+            "\u064a\u0646",
+            "\u062a\u0646",
+            "\u0643\u0645",
+            "\u0647\u0646",
+            "\u0646\u0627",
+            "\u064a\u0627",
+            "\u0647\u0627",
+            "\u062a\u0645",
+            "\u0643\u0646",
+            "\u0646\u064a",
+            "\u0648\u0627",
+            "\u0645\u0627",
+            "\u0647\u0645",
+        ]
+
+        # length one suffixes
+        self.s1 = ["\u0629", "\u0647", "\u064a", "\u0643", "\u062a", "\u0627", "\u0646"]
+
+        # groups of length four patterns
+        self.pr4 = {
+            0: ["\u0645"],
+            1: ["\u0627"],
+            2: ["\u0627", "\u0648", "\u064A"],
+            3: ["\u0629"],
+        }
+
+        # Groups of length five patterns and length three roots
+        self.pr53 = {
+            0: ["\u0627", "\u062a"],
+            1: ["\u0627", "\u064a", "\u0648"],
+            2: ["\u0627", "\u062a", "\u0645"],
+            3: ["\u0645", "\u064a", "\u062a"],
+            4: ["\u0645", "\u062a"],
+            5: ["\u0627", "\u0648"],
+            6: ["\u0627", "\u0645"],
+        }
+
+        self.re_short_vowels = re.compile(r"[\u064B-\u0652]")
+        self.re_hamza = re.compile(r"[\u0621\u0624\u0626]")
+        self.re_initial_hamza = re.compile(r"^[\u0622\u0623\u0625]")
+
+        self.stop_words = [
+            "\u064a\u0643\u0648\u0646",
+            "\u0648\u0644\u064a\u0633",
+            "\u0648\u0643\u0627\u0646",
+            "\u0643\u0630\u0644\u0643",
+            "\u0627\u0644\u062a\u064a",
+            "\u0648\u0628\u064a\u0646",
+            "\u0639\u0644\u064a\u0647\u0627",
+            "\u0645\u0633\u0627\u0621",
+            "\u0627\u0644\u0630\u064a",
+            "\u0648\u0643\u0627\u0646\u062a",
+            "\u0648\u0644\u0643\u0646",
+            "\u0648\u0627\u0644\u062a\u064a",
+            "\u062a\u0643\u0648\u0646",
+            "\u0627\u0644\u064a\u0648\u0645",
+            "\u0627\u0644\u0644\u0630\u064a\u0646",
+            "\u0639\u0644\u064a\u0647",
+            "\u0643\u0627\u0646\u062a",
+            "\u0644\u0630\u0644\u0643",
+            "\u0623\u0645\u0627\u0645",
+            "\u0647\u0646\u0627\u0643",
+            "\u0645\u0646\u0647\u0627",
+            "\u0645\u0627\u0632\u0627\u0644",
+            "\u0644\u0627\u0632\u0627\u0644",
+            "\u0644\u0627\u064a\u0632\u0627\u0644",
+            "\u0645\u0627\u064a\u0632\u0627\u0644",
+            "\u0627\u0635\u0628\u062d",
+            "\u0623\u0635\u0628\u062d",
+            "\u0623\u0645\u0633\u0649",
+            "\u0627\u0645\u0633\u0649",
+            "\u0623\u0636\u062d\u0649",
+            "\u0627\u0636\u062d\u0649",
+            "\u0645\u0627\u0628\u0631\u062d",
+            "\u0645\u0627\u0641\u062a\u0626",
+            "\u0645\u0627\u0627\u0646\u0641\u0643",
+            "\u0644\u0627\u0633\u064a\u0645\u0627",
+            "\u0648\u0644\u0627\u064a\u0632\u0627\u0644",
+            "\u0627\u0644\u062d\u0627\u0644\u064a",
+            "\u0627\u0644\u064a\u0647\u0627",
+            "\u0627\u0644\u0630\u064a\u0646",
+            "\u0641\u0627\u0646\u0647",
+            "\u0648\u0627\u0644\u0630\u064a",
+            "\u0648\u0647\u0630\u0627",
+            "\u0644\u0647\u0630\u0627",
+            "\u0641\u0643\u0627\u0646",
+            "\u0633\u062a\u0643\u0648\u0646",
+            "\u0627\u0644\u064a\u0647",
+            "\u064a\u0645\u0643\u0646",
+            "\u0628\u0647\u0630\u0627",
+            "\u0627\u0644\u0630\u0649",
+        ]
+
+    def stem(self, token):
+        """
+        Stemming a word token using the ISRI stemmer.
+        """
+        token = self.norm(
+            token, 1
+        )  # remove diacritics which representing Arabic short vowels
+        if token in self.stop_words:
+            return token  # exclude stop words from being processed
+        token = self.pre32(
+            token
+        )  # remove length three and length two prefixes in this order
+        token = self.suf32(
+            token
+        )  # remove length three and length two suffixes in this order
+        token = self.waw(
+            token
+        )  # remove connective ‘و’ if it precedes a word beginning with ‘و’
+        token = self.norm(token, 2)  # normalize initial hamza to bare alif
+        # if 4 <= word length <= 7, then stem; otherwise, no stemming
+        if len(token) == 4:  # length 4 word
+            token = self.pro_w4(token)
+        elif len(token) == 5:  # length 5 word
+            token = self.pro_w53(token)
+            token = self.end_w5(token)
+        elif len(token) == 6:  # length 6 word
+            token = self.pro_w6(token)
+            token = self.end_w6(token)
+        elif len(token) == 7:  # length 7 word
+            token = self.suf1(token)
+            if len(token) == 7:
+                token = self.pre1(token)
+            if len(token) == 6:
+                token = self.pro_w6(token)
+                token = self.end_w6(token)
+        return token
+
+    def norm(self, word, num=3):
+        """
+        normalization:
+        num=1  normalize diacritics
+        num=2  normalize initial hamza
+        num=3  both 1&2
+        """
+        if num == 1:
+            word = self.re_short_vowels.sub("", word)
+        elif num == 2:
+            word = self.re_initial_hamza.sub("\u0627", word)
+        elif num == 3:
+            word = self.re_short_vowels.sub("", word)
+            word = self.re_initial_hamza.sub("\u0627", word)
+        return word
+
+    def pre32(self, word):
+        """remove length three and length two prefixes in this order"""
+        if len(word) >= 6:
+            for pre3 in self.p3:
+                if word.startswith(pre3):
+                    return word[3:]
+        if len(word) >= 5:
+            for pre2 in self.p2:
+                if word.startswith(pre2):
+                    return word[2:]
+        return word
+
+    def suf32(self, word):
+        """remove length three and length two suffixes in this order"""
+        if len(word) >= 6:
+            for suf3 in self.s3:
+                if word.endswith(suf3):
+                    return word[:-3]
+        if len(word) >= 5:
+            for suf2 in self.s2:
+                if word.endswith(suf2):
+                    return word[:-2]
+        return word
+
+    def waw(self, word):
+        """remove connective ‘و’ if it precedes a word beginning with ‘و’"""
+        if len(word) >= 4 and word[:2] == "\u0648\u0648":
+            word = word[1:]
+        return word
+
+    def pro_w4(self, word):
+        """process length four patterns and extract length three roots"""
+        if word[0] in self.pr4[0]:  # مفعل
+            word = word[1:]
+        elif word[1] in self.pr4[1]:  # فاعل
+            word = word[:1] + word[2:]
+        elif word[2] in self.pr4[2]:  # فعال - فعول - فعيل
+            word = word[:2] + word[3]
+        elif word[3] in self.pr4[3]:  # فعلة
+            word = word[:-1]
+        else:
+            word = self.suf1(word)  # do - normalize short sufix
+            if len(word) == 4:
+                word = self.pre1(word)  # do - normalize short prefix
+        return word
+
+    def pro_w53(self, word):
+        """process length five patterns and extract length three roots"""
+        if word[2] in self.pr53[0] and word[0] == "\u0627":  # افتعل - افاعل
+            word = word[1] + word[3:]
+        elif word[3] in self.pr53[1] and word[0] == "\u0645":  # مفعول - مفعال - مفعيل
+            word = word[1:3] + word[4]
+        elif word[0] in self.pr53[2] and word[4] == "\u0629":  # مفعلة - تفعلة - افعلة
+            word = word[1:4]
+        elif word[0] in self.pr53[3] and word[2] == "\u062a":  # مفتعل - يفتعل - تفتعل
+            word = word[1] + word[3:]
+        elif word[0] in self.pr53[4] and word[2] == "\u0627":  # مفاعل - تفاعل
+            word = word[1] + word[3:]
+        elif word[2] in self.pr53[5] and word[4] == "\u0629":  # فعولة - فعالة
+            word = word[:2] + word[3]
+        elif word[0] in self.pr53[6] and word[1] == "\u0646":  # انفعل - منفعل
+            word = word[2:]
+        elif word[3] == "\u0627" and word[0] == "\u0627":  # افعال
+            word = word[1:3] + word[4]
+        elif word[4] == "\u0646" and word[3] == "\u0627":  # فعلان
+            word = word[:3]
+        elif word[3] == "\u064a" and word[0] == "\u062a":  # تفعيل
+            word = word[1:3] + word[4]
+        elif word[3] == "\u0648" and word[1] == "\u0627":  # فاعول
+            word = word[0] + word[2] + word[4]
+        elif word[2] == "\u0627" and word[1] == "\u0648":  # فواعل
+            word = word[0] + word[3:]
+        elif word[3] == "\u0626" and word[2] == "\u0627":  # فعائل
+            word = word[:2] + word[4]
+        elif word[4] == "\u0629" and word[1] == "\u0627":  # فاعلة
+            word = word[0] + word[2:4]
+        elif word[4] == "\u064a" and word[2] == "\u0627":  # فعالي
+            word = word[:2] + word[3]
+        else:
+            word = self.suf1(word)  # do - normalize short sufix
+            if len(word) == 5:
+                word = self.pre1(word)  # do - normalize short prefix
+        return word
+
+    def pro_w54(self, word):
+        """process length five patterns and extract length four roots"""
+        if word[0] in self.pr53[2]:  # تفعلل - افعلل - مفعلل
+            word = word[1:]
+        elif word[4] == "\u0629":  # فعللة
+            word = word[:4]
+        elif word[2] == "\u0627":  # فعالل
+            word = word[:2] + word[3:]
+        return word
+
+    def end_w5(self, word):
+        """ending step (word of length five)"""
+        if len(word) == 4:
+            word = self.pro_w4(word)
+        elif len(word) == 5:
+            word = self.pro_w54(word)
+        return word
+
+    def pro_w6(self, word):
+        """process length six patterns and extract length three roots"""
+        if word.startswith("\u0627\u0633\u062a") or word.startswith(
+            "\u0645\u0633\u062a"
+        ):  # مستفعل - استفعل
+            word = word[3:]
+        elif (
+            word[0] == "\u0645" and word[3] == "\u0627" and word[5] == "\u0629"
+        ):  # مفعالة
+            word = word[1:3] + word[4]
+        elif (
+            word[0] == "\u0627" and word[2] == "\u062a" and word[4] == "\u0627"
+        ):  # افتعال
+            word = word[1] + word[3] + word[5]
+        elif (
+            word[0] == "\u0627" and word[3] == "\u0648" and word[2] == word[4]
+        ):  # افعوعل
+            word = word[1] + word[4:]
+        elif (
+            word[0] == "\u062a" and word[2] == "\u0627" and word[4] == "\u064a"
+        ):  # تفاعيل   new pattern
+            word = word[1] + word[3] + word[5]
+        else:
+            word = self.suf1(word)  # do - normalize short sufix
+            if len(word) == 6:
+                word = self.pre1(word)  # do - normalize short prefix
+        return word
+
+    def pro_w64(self, word):
+        """process length six patterns and extract length four roots"""
+        if word[0] == "\u0627" and word[4] == "\u0627":  # افعلال
+            word = word[1:4] + word[5]
+        elif word.startswith("\u0645\u062a"):  # متفعلل
+            word = word[2:]
+        return word
+
+    def end_w6(self, word):
+        """ending step (word of length six)"""
+        if len(word) == 5:
+            word = self.pro_w53(word)
+            word = self.end_w5(word)
+        elif len(word) == 6:
+            word = self.pro_w64(word)
+        return word
+
+    def suf1(self, word):
+        """normalize short sufix"""
+        for sf1 in self.s1:
+            if word.endswith(sf1):
+                return word[:-1]
+        return word
+
+    def pre1(self, word):
+        """normalize short prefix"""
+        for sp1 in self.p1:
+            if word.startswith(sp1):
+                return word[1:]
+        return word
--- a/backend/venv/Lib/site-packages/nltk/stem/lancaster.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/lancaster.py
@@ -0,0 +1,342 @@
+# Natural Language Toolkit: Stemmers
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Steven Tomcavage <stomcava@law.upenn.edu>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm.
+Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56-61.
+"""
+import re
+
+from nltk.stem.api import StemmerI
+
+
+class LancasterStemmer(StemmerI):
+    """
+    Lancaster Stemmer
+
+        >>> from nltk.stem.lancaster import LancasterStemmer
+        >>> st = LancasterStemmer()
+        >>> st.stem('maximum')     # Remove "-um" when word is intact
+        'maxim'
+        >>> st.stem('presumably')  # Don't remove "-um" when word is not intact
+        'presum'
+        >>> st.stem('multiply')    # No action taken if word ends with "-ply"
+        'multiply'
+        >>> st.stem('provision')   # Replace "-sion" with "-j" to trigger "j" set of rules
+        'provid'
+        >>> st.stem('owed')        # Word starting with vowel must contain at least 2 letters
+        'ow'
+        >>> st.stem('ear')         # ditto
+        'ear'
+        >>> st.stem('saying')      # Words starting with consonant must contain at least 3
+        'say'
+        >>> st.stem('crying')      #     letters and one of those letters must be a vowel
+        'cry'
+        >>> st.stem('string')      # ditto
+        'string'
+        >>> st.stem('meant')       # ditto
+        'meant'
+        >>> st.stem('cement')      # ditto
+        'cem'
+        >>> st_pre = LancasterStemmer(strip_prefix_flag=True)
+        >>> st_pre.stem('kilometer') # Test Prefix
+        'met'
+        >>> st_custom = LancasterStemmer(rule_tuple=("ssen4>", "s1t."))
+        >>> st_custom.stem("ness") # Change s to t
+        'nest'
+    """
+
+    # The rule list is static since it doesn't change between instances
+    default_rule_tuple = (
+        "ai*2.",  # -ia > -   if intact
+        "a*1.",  # -a > -    if intact
+        "bb1.",  # -bb > -b
+        "city3s.",  # -ytic > -ys
+        "ci2>",  # -ic > -
+        "cn1t>",  # -nc > -nt
+        "dd1.",  # -dd > -d
+        "dei3y>",  # -ied > -y
+        "deec2ss.",  # -ceed >", -cess
+        "dee1.",  # -eed > -ee
+        "de2>",  # -ed > -
+        "dooh4>",  # -hood > -
+        "e1>",  # -e > -
+        "feil1v.",  # -lief > -liev
+        "fi2>",  # -if > -
+        "gni3>",  # -ing > -
+        "gai3y.",  # -iag > -y
+        "ga2>",  # -ag > -
+        "gg1.",  # -gg > -g
+        "ht*2.",  # -th > -   if intact
+        "hsiug5ct.",  # -guish > -ct
+        "hsi3>",  # -ish > -
+        "i*1.",  # -i > -    if intact
+        "i1y>",  # -i > -y
+        "ji1d.",  # -ij > -id   --  see nois4j> & vis3j>
+        "juf1s.",  # -fuj > -fus
+        "ju1d.",  # -uj > -ud
+        "jo1d.",  # -oj > -od
+        "jeh1r.",  # -hej > -her
+        "jrev1t.",  # -verj > -vert
+        "jsim2t.",  # -misj > -mit
+        "jn1d.",  # -nj > -nd
+        "j1s.",  # -j > -s
+        "lbaifi6.",  # -ifiabl > -
+        "lbai4y.",  # -iabl > -y
+        "lba3>",  # -abl > -
+        "lbi3.",  # -ibl > -
+        "lib2l>",  # -bil > -bl
+        "lc1.",  # -cl > c
+        "lufi4y.",  # -iful > -y
+        "luf3>",  # -ful > -
+        "lu2.",  # -ul > -
+        "lai3>",  # -ial > -
+        "lau3>",  # -ual > -
+        "la2>",  # -al > -
+        "ll1.",  # -ll > -l
+        "mui3.",  # -ium > -
+        "mu*2.",  # -um > -   if intact
+        "msi3>",  # -ism > -
+        "mm1.",  # -mm > -m
+        "nois4j>",  # -sion > -j
+        "noix4ct.",  # -xion > -ct
+        "noi3>",  # -ion > -
+        "nai3>",  # -ian > -
+        "na2>",  # -an > -
+        "nee0.",  # protect  -een
+        "ne2>",  # -en > -
+        "nn1.",  # -nn > -n
+        "pihs4>",  # -ship > -
+        "pp1.",  # -pp > -p
+        "re2>",  # -er > -
+        "rae0.",  # protect  -ear
+        "ra2.",  # -ar > -
+        "ro2>",  # -or > -
+        "ru2>",  # -ur > -
+        "rr1.",  # -rr > -r
+        "rt1>",  # -tr > -t
+        "rei3y>",  # -ier > -y
+        "sei3y>",  # -ies > -y
+        "sis2.",  # -sis > -s
+        "si2>",  # -is > -
+        "ssen4>",  # -ness > -
+        "ss0.",  # protect  -ss
+        "suo3>",  # -ous > -
+        "su*2.",  # -us > -   if intact
+        "s*1>",  # -s > -    if intact
+        "s0.",  # -s > -s
+        "tacilp4y.",  # -plicat > -ply
+        "ta2>",  # -at > -
+        "tnem4>",  # -ment > -
+        "tne3>",  # -ent > -
+        "tna3>",  # -ant > -
+        "tpir2b.",  # -ript > -rib
+        "tpro2b.",  # -orpt > -orb
+        "tcud1.",  # -duct > -duc
+        "tpmus2.",  # -sumpt > -sum
+        "tpec2iv.",  # -cept > -ceiv
+        "tulo2v.",  # -olut > -olv
+        "tsis0.",  # protect  -sist
+        "tsi3>",  # -ist > -
+        "tt1.",  # -tt > -t
+        "uqi3.",  # -iqu > -
+        "ugo1.",  # -ogu > -og
+        "vis3j>",  # -siv > -j
+        "vie0.",  # protect  -eiv
+        "vi2>",  # -iv > -
+        "ylb1>",  # -bly > -bl
+        "yli3y>",  # -ily > -y
+        "ylp0.",  # protect  -ply
+        "yl2>",  # -ly > -
+        "ygo1.",  # -ogy > -og
+        "yhp1.",  # -phy > -ph
+        "ymo1.",  # -omy > -om
+        "ypo1.",  # -opy > -op
+        "yti3>",  # -ity > -
+        "yte3>",  # -ety > -
+        "ytl2.",  # -lty > -l
+        "yrtsi5.",  # -istry > -
+        "yra3>",  # -ary > -
+        "yro3>",  # -ory > -
+        "yfi3.",  # -ify > -
+        "ycn2t>",  # -ncy > -nt
+        "yca3>",  # -acy > -
+        "zi2>",  # -iz > -
+        "zy1s.",  # -yz > -ys
+    )
+
+    def __init__(self, rule_tuple=None, strip_prefix_flag=False):
+        """Create an instance of the Lancaster stemmer."""
+        # Setup an empty rule dictionary - this will be filled in later
+        self.rule_dictionary = {}
+        # Check if a user wants to strip prefix
+        self._strip_prefix = strip_prefix_flag
+        # Check if a user wants to use his/her own rule tuples.
+        self._rule_tuple = rule_tuple if rule_tuple else self.default_rule_tuple
+
+    def parseRules(self, rule_tuple=None):
+        """Validate the set of rules used in this stemmer.
+
+        If this function is called as an individual method, without using stem
+        method, rule_tuple argument will be compiled into self.rule_dictionary.
+        If this function is called within stem, self._rule_tuple will be used.
+
+        """
+        # If there is no argument for the function, use class' own rule tuple.
+        rule_tuple = rule_tuple if rule_tuple else self._rule_tuple
+        valid_rule = re.compile(r"^[a-z]+\*?\d[a-z]*[>\.]?$")
+        # Empty any old rules from the rule set before adding new ones
+        self.rule_dictionary = {}
+
+        for rule in rule_tuple:
+            if not valid_rule.match(rule):
+                raise ValueError(f"The rule {rule} is invalid")
+            first_letter = rule[0:1]
+            if first_letter in self.rule_dictionary:
+                self.rule_dictionary[first_letter].append(rule)
+            else:
+                self.rule_dictionary[first_letter] = [rule]
+
+    def stem(self, word):
+        """Stem a word using the Lancaster stemmer."""
+        # Lower-case the word, since all the rules are lower-cased
+        word = word.lower()
+        word = self.__stripPrefix(word) if self._strip_prefix else word
+
+        # Save a copy of the original word
+        intact_word = word
+
+        # If rule dictionary is empty, parse rule tuple.
+        if not self.rule_dictionary:
+            self.parseRules()
+
+        return self.__doStemming(word, intact_word)
+
+    def __doStemming(self, word, intact_word):
+        """Perform the actual word stemming"""
+
+        valid_rule = re.compile(r"^([a-z]+)(\*?)(\d)([a-z]*)([>\.]?)$")
+
+        proceed = True
+
+        while proceed:
+            # Find the position of the last letter of the word to be stemmed
+            last_letter_position = self.__getLastLetter(word)
+
+            # Only stem the word if it has a last letter and a rule matching that last letter
+            if (
+                last_letter_position < 0
+                or word[last_letter_position] not in self.rule_dictionary
+            ):
+                proceed = False
+
+            else:
+                rule_was_applied = False
+
+                # Go through each rule that matches the word's final letter
+                for rule in self.rule_dictionary[word[last_letter_position]]:
+                    rule_match = valid_rule.match(rule)
+                    if rule_match:
+                        (
+                            ending_string,
+                            intact_flag,
+                            remove_total,
+                            append_string,
+                            cont_flag,
+                        ) = rule_match.groups()
+
+                        # Convert the number of chars to remove when stemming
+                        # from a string to an integer
+                        remove_total = int(remove_total)
+
+                        # Proceed if word's ending matches rule's word ending
+                        if word.endswith(ending_string[::-1]):
+                            if intact_flag:
+                                if word == intact_word and self.__isAcceptable(
+                                    word, remove_total
+                                ):
+                                    word = self.__applyRule(
+                                        word, remove_total, append_string
+                                    )
+                                    rule_was_applied = True
+                                    if cont_flag == ".":
+                                        proceed = False
+                                    break
+                            elif self.__isAcceptable(word, remove_total):
+                                word = self.__applyRule(
+                                    word, remove_total, append_string
+                                )
+                                rule_was_applied = True
+                                if cont_flag == ".":
+                                    proceed = False
+                                break
+                # If no rules apply, the word doesn't need any more stemming
+                if rule_was_applied == False:
+                    proceed = False
+        return word
+
+    def __getLastLetter(self, word):
+        """Get the zero-based index of the last alphabetic character in this string"""
+        last_letter = -1
+        for position in range(len(word)):
+            if word[position].isalpha():
+                last_letter = position
+            else:
+                break
+        return last_letter
+
+    def __isAcceptable(self, word, remove_total):
+        """Determine if the word is acceptable for stemming."""
+        word_is_acceptable = False
+        # If the word starts with a vowel, it must be at least 2
+        # characters long to be stemmed
+        if word[0] in "aeiouy":
+            if len(word) - remove_total >= 2:
+                word_is_acceptable = True
+        # If the word starts with a consonant, it must be at least 3
+        # characters long (including one vowel) to be stemmed
+        elif len(word) - remove_total >= 3:
+            if word[1] in "aeiouy":
+                word_is_acceptable = True
+            elif word[2] in "aeiouy":
+                word_is_acceptable = True
+        return word_is_acceptable
+
+    def __applyRule(self, word, remove_total, append_string):
+        """Apply the stemming rule to the word"""
+        # Remove letters from the end of the word
+        new_word_length = len(word) - remove_total
+        word = word[0:new_word_length]
+
+        # And add new letters to the end of the truncated word
+        if append_string:
+            word += append_string
+        return word
+
+    def __stripPrefix(self, word):
+        """Remove prefix from a word.
+
+        This function originally taken from Whoosh.
+
+        """
+        for prefix in (
+            "kilo",
+            "micro",
+            "milli",
+            "intra",
+            "ultra",
+            "mega",
+            "nano",
+            "pico",
+            "pseudo",
+        ):
+            if word.startswith(prefix):
+                return word[len(prefix) :]
+        return word
+
+    def __repr__(self):
+        return "<LancasterStemmer>"
--- a/backend/venv/Lib/site-packages/nltk/stem/porter.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/porter.py
@@ -0,0 +1,717 @@
+"""
+Porter Stemmer
+
+This is the Porter stemming algorithm. It follows the algorithm
+presented in
+
+Porter, M. "An algorithm for suffix stripping." Program 14.3 (1980): 130-137.
+
+with some optional deviations that can be turned on or off with the
+`mode` argument to the constructor.
+
+Martin Porter, the algorithm's inventor, maintains a web page about the
+algorithm at
+
+    https://www.tartarus.org/~martin/PorterStemmer/
+
+which includes another Python implementation and other implementations
+in many languages.
+"""
+
+__docformat__ = "plaintext"
+
+import re
+
+from nltk.stem.api import StemmerI
+
+
+class PorterStemmer(StemmerI):
+    """
+    A word stemmer based on the Porter stemming algorithm.
+
+        Porter, M. "An algorithm for suffix stripping."
+        Program 14.3 (1980): 130-137.
+
+    See https://www.tartarus.org/~martin/PorterStemmer/ for the homepage
+    of the algorithm.
+
+    Martin Porter has endorsed several modifications to the Porter
+    algorithm since writing his original paper, and those extensions are
+    included in the implementations on his website. Additionally, others
+    have proposed further improvements to the algorithm, including NLTK
+    contributors. There are thus three modes that can be selected by
+    passing the appropriate constant to the class constructor's `mode`
+    attribute:
+
+    - PorterStemmer.ORIGINAL_ALGORITHM
+
+        An implementation that is faithful to the original paper.
+
+        Note that Martin Porter has deprecated this version of the
+        algorithm. Martin distributes implementations of the Porter
+        Stemmer in many languages, hosted at:
+
+        https://www.tartarus.org/~martin/PorterStemmer/
+
+        and all of these implementations include his extensions. He
+        strongly recommends against using the original, published
+        version of the algorithm; only use this mode if you clearly
+        understand why you are choosing to do so.
+
+    - PorterStemmer.MARTIN_EXTENSIONS
+
+        An implementation that only uses the modifications to the
+        algorithm that are included in the implementations on Martin
+        Porter's website. He has declared Porter frozen, so the
+        behaviour of those implementations should never change.
+
+    - PorterStemmer.NLTK_EXTENSIONS (default)
+
+        An implementation that includes further improvements devised by
+        NLTK contributors or taken from other modified implementations
+        found on the web.
+
+    For the best stemming, you should use the default NLTK_EXTENSIONS
+    version. However, if you need to get the same results as either the
+    original algorithm or one of Martin Porter's hosted versions for
+    compatibility with an existing implementation or dataset, you can use
+    one of the other modes instead.
+    """
+
+    # Modes the Stemmer can be instantiated in
+    NLTK_EXTENSIONS = "NLTK_EXTENSIONS"
+    MARTIN_EXTENSIONS = "MARTIN_EXTENSIONS"
+    ORIGINAL_ALGORITHM = "ORIGINAL_ALGORITHM"
+
+    def __init__(self, mode=NLTK_EXTENSIONS):
+        if mode not in (
+            self.NLTK_EXTENSIONS,
+            self.MARTIN_EXTENSIONS,
+            self.ORIGINAL_ALGORITHM,
+        ):
+            raise ValueError(
+                "Mode must be one of PorterStemmer.NLTK_EXTENSIONS, "
+                "PorterStemmer.MARTIN_EXTENSIONS, or "
+                "PorterStemmer.ORIGINAL_ALGORITHM"
+            )
+
+        self.mode = mode
+
+        if self.mode == self.NLTK_EXTENSIONS:
+            # This is a table of irregular forms. It is quite short,
+            # but still reflects the errors actually drawn to Martin
+            # Porter's attention over a 20 year period!
+            irregular_forms = {
+                "sky": ["sky", "skies"],
+                "die": ["dying"],
+                "lie": ["lying"],
+                "tie": ["tying"],
+                "news": ["news"],
+                "inning": ["innings", "inning"],
+                "outing": ["outings", "outing"],
+                "canning": ["cannings", "canning"],
+                "howe": ["howe"],
+                "proceed": ["proceed"],
+                "exceed": ["exceed"],
+                "succeed": ["succeed"],
+            }
+
+            self.pool = {}
+            for key in irregular_forms:
+                for val in irregular_forms[key]:
+                    self.pool[val] = key
+
+        self.vowels = frozenset(["a", "e", "i", "o", "u"])
+
+    def _is_consonant(self, word, i):
+        """Returns True if word[i] is a consonant, False otherwise
+
+        A consonant is defined in the paper as follows:
+
+            A consonant in a word is a letter other than A, E, I, O or
+            U, and other than Y preceded by a consonant. (The fact that
+            the term `consonant' is defined to some extent in terms of
+            itself does not make it ambiguous.) So in TOY the consonants
+            are T and Y, and in SYZYGY they are S, Z and G. If a letter
+            is not a consonant it is a vowel.
+        """
+        if word[i] in self.vowels:
+            return False
+        if word[i] == "y":
+            if i == 0:
+                return True
+            else:
+                return not self._is_consonant(word, i - 1)
+        return True
+
+    def _measure(self, stem):
+        r"""Returns the 'measure' of stem, per definition in the paper
+
+        From the paper:
+
+            A consonant will be denoted by c, a vowel by v. A list
+            ccc... of length greater than 0 will be denoted by C, and a
+            list vvv... of length greater than 0 will be denoted by V.
+            Any word, or part of a word, therefore has one of the four
+            forms:
+
+                CVCV ... C
+                CVCV ... V
+                VCVC ... C
+                VCVC ... V
+
+            These may all be represented by the single form
+
+                [C]VCVC ... [V]
+
+            where the square brackets denote arbitrary presence of their
+            contents. Using (VC){m} to denote VC repeated m times, this
+            may again be written as
+
+                [C](VC){m}[V].
+
+            m will be called the \measure\ of any word or word part when
+            represented in this form. The case m = 0 covers the null
+            word. Here are some examples:
+
+                m=0    TR,  EE,  TREE,  Y,  BY.
+                m=1    TROUBLE,  OATS,  TREES,  IVY.
+                m=2    TROUBLES,  PRIVATE,  OATEN,  ORRERY.
+        """
+        cv_sequence = ""
+
+        # Construct a string of 'c's and 'v's representing whether each
+        # character in `stem` is a consonant or a vowel.
+        # e.g. 'falafel' becomes 'cvcvcvc',
+        #      'architecture' becomes 'vcccvcvccvcv'
+        for i in range(len(stem)):
+            if self._is_consonant(stem, i):
+                cv_sequence += "c"
+            else:
+                cv_sequence += "v"
+
+        # Count the number of 'vc' occurrences, which is equivalent to
+        # the number of 'VC' occurrences in Porter's reduced form in the
+        # docstring above, which is in turn equivalent to `m`
+        return cv_sequence.count("vc")
+
+    def _has_positive_measure(self, stem):
+        return self._measure(stem) > 0
+
+    def _contains_vowel(self, stem):
+        """Returns True if stem contains a vowel, else False"""
+        for i in range(len(stem)):
+            if not self._is_consonant(stem, i):
+                return True
+        return False
+
+    def _ends_double_consonant(self, word):
+        """Implements condition *d from the paper
+
+        Returns True if word ends with a double consonant
+        """
+        return (
+            len(word) >= 2
+            and word[-1] == word[-2]
+            and self._is_consonant(word, len(word) - 1)
+        )
+
+    def _ends_cvc(self, word):
+        """Implements condition *o from the paper
+
+        From the paper:
+
+            *o  - the stem ends cvc, where the second c is not W, X or Y
+                  (e.g. -WIL, -HOP).
+        """
+        return (
+            len(word) >= 3
+            and self._is_consonant(word, len(word) - 3)
+            and not self._is_consonant(word, len(word) - 2)
+            and self._is_consonant(word, len(word) - 1)
+            and word[-1] not in ("w", "x", "y")
+        ) or (
+            self.mode == self.NLTK_EXTENSIONS
+            and len(word) == 2
+            and not self._is_consonant(word, 0)
+            and self._is_consonant(word, 1)
+        )
+
+    def _replace_suffix(self, word, suffix, replacement):
+        """Replaces `suffix` of `word` with `replacement"""
+        assert word.endswith(suffix), "Given word doesn't end with given suffix"
+        if suffix == "":
+            return word + replacement
+        else:
+            return word[: -len(suffix)] + replacement
+
+    def _apply_rule_list(self, word, rules):
+        """Applies the first applicable suffix-removal rule to the word
+
+        Takes a word and a list of suffix-removal rules represented as
+        3-tuples, with the first element being the suffix to remove,
+        the second element being the string to replace it with, and the
+        final element being the condition for the rule to be applicable,
+        or None if the rule is unconditional.
+        """
+        for rule in rules:
+            suffix, replacement, condition = rule
+            if suffix == "*d" and self._ends_double_consonant(word):
+                stem = word[:-2]
+                if condition is None or condition(stem):
+                    return stem + replacement
+                else:
+                    # Don't try any further rules
+                    return word
+            if word.endswith(suffix):
+                stem = self._replace_suffix(word, suffix, "")
+                if condition is None or condition(stem):
+                    return stem + replacement
+                else:
+                    # Don't try any further rules
+                    return word
+
+        return word
+
+    def _step1a(self, word):
+        """Implements Step 1a from "An algorithm for suffix stripping"
+
+        From the paper:
+
+            SSES -> SS                         caresses  ->  caress
+            IES  -> I                          ponies    ->  poni
+                                               ties      ->  ti
+            SS   -> SS                         caress    ->  caress
+            S    ->                            cats      ->  cat
+        """
+        # this NLTK-only rule extends the original algorithm, so
+        # that 'flies'->'fli' but 'dies'->'die' etc
+        if self.mode == self.NLTK_EXTENSIONS:
+            if word.endswith("ies") and len(word) == 4:
+                return self._replace_suffix(word, "ies", "ie")
+
+        return self._apply_rule_list(
+            word,
+            [
+                ("sses", "ss", None),  # SSES -> SS
+                ("ies", "i", None),  # IES  -> I
+                ("ss", "ss", None),  # SS   -> SS
+                ("s", "", None),  # S    ->
+            ],
+        )
+
+    def _step1b(self, word):
+        """Implements Step 1b from "An algorithm for suffix stripping"
+
+        From the paper:
+
+            (m>0) EED -> EE                    feed      ->  feed
+                                               agreed    ->  agree
+            (*v*) ED  ->                       plastered ->  plaster
+                                               bled      ->  bled
+            (*v*) ING ->                       motoring  ->  motor
+                                               sing      ->  sing
+
+        If the second or third of the rules in Step 1b is successful,
+        the following is done:
+
+            AT -> ATE                       conflat(ed)  ->  conflate
+            BL -> BLE                       troubl(ed)   ->  trouble
+            IZ -> IZE                       siz(ed)      ->  size
+            (*d and not (*L or *S or *Z))
+               -> single letter
+                                            hopp(ing)    ->  hop
+                                            tann(ed)     ->  tan
+                                            fall(ing)    ->  fall
+                                            hiss(ing)    ->  hiss
+                                            fizz(ed)     ->  fizz
+            (m=1 and *o) -> E               fail(ing)    ->  fail
+                                            fil(ing)     ->  file
+
+        The rule to map to a single letter causes the removal of one of
+        the double letter pair. The -E is put back on -AT, -BL and -IZ,
+        so that the suffixes -ATE, -BLE and -IZE can be recognised
+        later. This E may be removed in step 4.
+        """
+        # this NLTK-only block extends the original algorithm, so that
+        # 'spied'->'spi' but 'died'->'die' etc
+        if self.mode == self.NLTK_EXTENSIONS:
+            if word.endswith("ied"):
+                if len(word) == 4:
+                    return self._replace_suffix(word, "ied", "ie")
+                else:
+                    return self._replace_suffix(word, "ied", "i")
+
+        # (m>0) EED -> EE
+        if word.endswith("eed"):
+            stem = self._replace_suffix(word, "eed", "")
+            if self._measure(stem) > 0:
+                return stem + "ee"
+            else:
+                return word
+
+        rule_2_or_3_succeeded = False
+
+        for suffix in ["ed", "ing"]:
+            if word.endswith(suffix):
+                intermediate_stem = self._replace_suffix(word, suffix, "")
+                if self._contains_vowel(intermediate_stem):
+                    rule_2_or_3_succeeded = True
+                    break
+
+        if not rule_2_or_3_succeeded:
+            return word
+
+        return self._apply_rule_list(
+            intermediate_stem,
+            [
+                ("at", "ate", None),  # AT -> ATE
+                ("bl", "ble", None),  # BL -> BLE
+                ("iz", "ize", None),  # IZ -> IZE
+                # (*d and not (*L or *S or *Z))
+                # -> single letter
+                (
+                    "*d",
+                    intermediate_stem[-1],
+                    lambda stem: intermediate_stem[-1] not in ("l", "s", "z"),
+                ),
+                # (m=1 and *o) -> E
+                (
+                    "",
+                    "e",
+                    lambda stem: (self._measure(stem) == 1 and self._ends_cvc(stem)),
+                ),
+            ],
+        )
+
+    def _step1c(self, word):
+        """Implements Step 1c from "An algorithm for suffix stripping"
+
+        From the paper:
+
+        Step 1c
+
+            (*v*) Y -> I                    happy        ->  happi
+                                            sky          ->  sky
+        """
+
+        def nltk_condition(stem):
+            """
+            This has been modified from the original Porter algorithm so
+            that y->i is only done when y is preceded by a consonant,
+            but not if the stem is only a single consonant, i.e.
+
+               (*c and not c) Y -> I
+
+            So 'happy' -> 'happi', but
+               'enjoy' -> 'enjoy'  etc
+
+            This is a much better rule. Formerly 'enjoy'->'enjoi' and
+            'enjoyment'->'enjoy'. Step 1c is perhaps done too soon; but
+            with this modification that no longer really matters.
+
+            Also, the removal of the contains_vowel(z) condition means
+            that 'spy', 'fly', 'try' ... stem to 'spi', 'fli', 'tri' and
+            conflate with 'spied', 'tried', 'flies' ...
+            """
+            return len(stem) > 1 and self._is_consonant(stem, len(stem) - 1)
+
+        def original_condition(stem):
+            return self._contains_vowel(stem)
+
+        return self._apply_rule_list(
+            word,
+            [
+                (
+                    "y",
+                    "i",
+                    (
+                        nltk_condition
+                        if self.mode == self.NLTK_EXTENSIONS
+                        else original_condition
+                    ),
+                )
+            ],
+        )
+
+    def _step2(self, word):
+        """Implements Step 2 from "An algorithm for suffix stripping"
+
+        From the paper:
+
+        Step 2
+
+            (m>0) ATIONAL ->  ATE       relational     ->  relate
+            (m>0) TIONAL  ->  TION      conditional    ->  condition
+                                        rational       ->  rational
+            (m>0) ENCI    ->  ENCE      valenci        ->  valence
+            (m>0) ANCI    ->  ANCE      hesitanci      ->  hesitance
+            (m>0) IZER    ->  IZE       digitizer      ->  digitize
+            (m>0) ABLI    ->  ABLE      conformabli    ->  conformable
+            (m>0) ALLI    ->  AL        radicalli      ->  radical
+            (m>0) ENTLI   ->  ENT       differentli    ->  different
+            (m>0) ELI     ->  E         vileli        - >  vile
+            (m>0) OUSLI   ->  OUS       analogousli    ->  analogous
+            (m>0) IZATION ->  IZE       vietnamization ->  vietnamize
+            (m>0) ATION   ->  ATE       predication    ->  predicate
+            (m>0) ATOR    ->  ATE       operator       ->  operate
+            (m>0) ALISM   ->  AL        feudalism      ->  feudal
+            (m>0) IVENESS ->  IVE       decisiveness   ->  decisive
+            (m>0) FULNESS ->  FUL       hopefulness    ->  hopeful
+            (m>0) OUSNESS ->  OUS       callousness    ->  callous
+            (m>0) ALITI   ->  AL        formaliti      ->  formal
+            (m>0) IVITI   ->  IVE       sensitiviti    ->  sensitive
+            (m>0) BILITI  ->  BLE       sensibiliti    ->  sensible
+        """
+
+        if self.mode == self.NLTK_EXTENSIONS:
+            # Instead of applying the ALLI -> AL rule after '(a)bli' per
+            # the published algorithm, instead we apply it first, and,
+            # if it succeeds, run the result through step2 again.
+            if word.endswith("alli") and self._has_positive_measure(
+                self._replace_suffix(word, "alli", "")
+            ):
+                return self._step2(self._replace_suffix(word, "alli", "al"))
+
+        bli_rule = ("bli", "ble", self._has_positive_measure)
+        abli_rule = ("abli", "able", self._has_positive_measure)
+
+        rules = [
+            ("ational", "ate", self._has_positive_measure),
+            ("tional", "tion", self._has_positive_measure),
+            ("enci", "ence", self._has_positive_measure),
+            ("anci", "ance", self._has_positive_measure),
+            ("izer", "ize", self._has_positive_measure),
+            abli_rule if self.mode == self.ORIGINAL_ALGORITHM else bli_rule,
+            ("alli", "al", self._has_positive_measure),
+            ("entli", "ent", self._has_positive_measure),
+            ("eli", "e", self._has_positive_measure),
+            ("ousli", "ous", self._has_positive_measure),
+            ("ization", "ize", self._has_positive_measure),
+            ("ation", "ate", self._has_positive_measure),
+            ("ator", "ate", self._has_positive_measure),
+            ("alism", "al", self._has_positive_measure),
+            ("iveness", "ive", self._has_positive_measure),
+            ("fulness", "ful", self._has_positive_measure),
+            ("ousness", "ous", self._has_positive_measure),
+            ("aliti", "al", self._has_positive_measure),
+            ("iviti", "ive", self._has_positive_measure),
+            ("biliti", "ble", self._has_positive_measure),
+        ]
+
+        if self.mode == self.NLTK_EXTENSIONS:
+            rules.append(("fulli", "ful", self._has_positive_measure))
+
+            # The 'l' of the 'logi' -> 'log' rule is put with the stem,
+            # so that short stems like 'geo' 'theo' etc work like
+            # 'archaeo' 'philo' etc.
+            rules.append(
+                ("logi", "log", lambda stem: self._has_positive_measure(word[:-3]))
+            )
+
+        if self.mode == self.MARTIN_EXTENSIONS:
+            rules.append(("logi", "log", self._has_positive_measure))
+
+        return self._apply_rule_list(word, rules)
+
+    def _step3(self, word):
+        """Implements Step 3 from "An algorithm for suffix stripping"
+
+        From the paper:
+
+        Step 3
+
+            (m>0) ICATE ->  IC              triplicate     ->  triplic
+            (m>0) ATIVE ->                  formative      ->  form
+            (m>0) ALIZE ->  AL              formalize      ->  formal
+            (m>0) ICITI ->  IC              electriciti    ->  electric
+            (m>0) ICAL  ->  IC              electrical     ->  electric
+            (m>0) FUL   ->                  hopeful        ->  hope
+            (m>0) NESS  ->                  goodness       ->  good
+        """
+        return self._apply_rule_list(
+            word,
+            [
+                ("icate", "ic", self._has_positive_measure),
+                ("ative", "", self._has_positive_measure),
+                ("alize", "al", self._has_positive_measure),
+                ("iciti", "ic", self._has_positive_measure),
+                ("ical", "ic", self._has_positive_measure),
+                ("ful", "", self._has_positive_measure),
+                ("ness", "", self._has_positive_measure),
+            ],
+        )
+
+    def _step4(self, word):
+        """Implements Step 4 from "An algorithm for suffix stripping"
+
+        Step 4
+
+            (m>1) AL    ->                  revival        ->  reviv
+            (m>1) ANCE  ->                  allowance      ->  allow
+            (m>1) ENCE  ->                  inference      ->  infer
+            (m>1) ER    ->                  airliner       ->  airlin
+            (m>1) IC    ->                  gyroscopic     ->  gyroscop
+            (m>1) ABLE  ->                  adjustable     ->  adjust
+            (m>1) IBLE  ->                  defensible     ->  defens
+            (m>1) ANT   ->                  irritant       ->  irrit
+            (m>1) EMENT ->                  replacement    ->  replac
+            (m>1) MENT  ->                  adjustment     ->  adjust
+            (m>1) ENT   ->                  dependent      ->  depend
+            (m>1 and (*S or *T)) ION ->     adoption       ->  adopt
+            (m>1) OU    ->                  homologou      ->  homolog
+            (m>1) ISM   ->                  communism      ->  commun
+            (m>1) ATE   ->                  activate       ->  activ
+            (m>1) ITI   ->                  angulariti     ->  angular
+            (m>1) OUS   ->                  homologous     ->  homolog
+            (m>1) IVE   ->                  effective      ->  effect
+            (m>1) IZE   ->                  bowdlerize     ->  bowdler
+
+        The suffixes are now removed. All that remains is a little
+        tidying up.
+        """
+        measure_gt_1 = lambda stem: self._measure(stem) > 1
+
+        return self._apply_rule_list(
+            word,
+            [
+                ("al", "", measure_gt_1),
+                ("ance", "", measure_gt_1),
+                ("ence", "", measure_gt_1),
+                ("er", "", measure_gt_1),
+                ("ic", "", measure_gt_1),
+                ("able", "", measure_gt_1),
+                ("ible", "", measure_gt_1),
+                ("ant", "", measure_gt_1),
+                ("ement", "", measure_gt_1),
+                ("ment", "", measure_gt_1),
+                ("ent", "", measure_gt_1),
+                # (m>1 and (*S or *T)) ION ->
+                (
+                    "ion",
+                    "",
+                    lambda stem: self._measure(stem) > 1 and stem[-1] in ("s", "t"),
+                ),
+                ("ou", "", measure_gt_1),
+                ("ism", "", measure_gt_1),
+                ("ate", "", measure_gt_1),
+                ("iti", "", measure_gt_1),
+                ("ous", "", measure_gt_1),
+                ("ive", "", measure_gt_1),
+                ("ize", "", measure_gt_1),
+            ],
+        )
+
+    def _step5a(self, word):
+        """Implements Step 5a from "An algorithm for suffix stripping"
+
+        From the paper:
+
+        Step 5a
+
+            (m>1) E     ->                  probate        ->  probat
+                                            rate           ->  rate
+            (m=1 and not *o) E ->           cease          ->  ceas
+        """
+        # Note that Martin's test vocabulary and reference
+        # implementations are inconsistent in how they handle the case
+        # where two rules both refer to a suffix that matches the word
+        # to be stemmed, but only the condition of the second one is
+        # true.
+        # Earlier in step2b we had the rules:
+        #     (m>0) EED -> EE
+        #     (*v*) ED  ->
+        # but the examples in the paper included "feed"->"feed", even
+        # though (*v*) is true for "fe" and therefore the second rule
+        # alone would map "feed"->"fe".
+        # However, in THIS case, we need to handle the consecutive rules
+        # differently and try both conditions (obviously; the second
+        # rule here would be redundant otherwise). Martin's paper makes
+        # no explicit mention of the inconsistency; you have to infer it
+        # from the examples.
+        # For this reason, we can't use _apply_rule_list here.
+        if word.endswith("e"):
+            stem = self._replace_suffix(word, "e", "")
+            if self._measure(stem) > 1:
+                return stem
+            if self._measure(stem) == 1 and not self._ends_cvc(stem):
+                return stem
+        return word
+
+    def _step5b(self, word):
+        """Implements Step 5a from "An algorithm for suffix stripping"
+
+        From the paper:
+
+        Step 5b
+
+            (m > 1 and *d and *L) -> single letter
+                                    controll       ->  control
+                                    roll           ->  roll
+        """
+        return self._apply_rule_list(
+            word, [("ll", "l", lambda stem: self._measure(word[:-1]) > 1)]
+        )
+
+    def stem(self, word, to_lowercase=True):
+        """
+        :param to_lowercase: if `to_lowercase=True` the word always lowercase
+        """
+        stem = word.lower() if to_lowercase else word
+
+        if self.mode == self.NLTK_EXTENSIONS and word in self.pool:
+            return self.pool[stem]
+
+        if self.mode != self.ORIGINAL_ALGORITHM and len(word) <= 2:
+            # With this line, strings of length 1 or 2 don't go through
+            # the stemming process, although no mention is made of this
+            # in the published algorithm.
+            return stem
+
+        stem = self._step1a(stem)
+        stem = self._step1b(stem)
+        stem = self._step1c(stem)
+        stem = self._step2(stem)
+        stem = self._step3(stem)
+        stem = self._step4(stem)
+        stem = self._step5a(stem)
+        stem = self._step5b(stem)
+
+        return stem
+
+    def __repr__(self):
+        return "<PorterStemmer>"
+
+
+def demo():
+    """
+    A demonstration of the porter stemmer on a sample from
+    the Penn Treebank corpus.
+    """
+
+    from nltk import stem
+    from nltk.corpus import treebank
+
+    stemmer = stem.PorterStemmer()
+
+    orig = []
+    stemmed = []
+    for item in treebank.fileids()[:3]:
+        for word, tag in treebank.tagged_words(item):
+            orig.append(word)
+            stemmed.append(stemmer.stem(word))
+
+    # Convert the results to a string, and word-wrap them.
+    results = " ".join(stemmed)
+    results = re.sub(r"(.{,70})\s", r"\1\n", results + " ").rstrip()
+
+    # Convert the original to a string, and word wrap it.
+    original = " ".join(orig)
+    original = re.sub(r"(.{,70})\s", r"\1\n", original + " ").rstrip()
+
+    # Print the results.
+    print("-Original-".center(70).replace(" ", "*").replace("-", " "))
+    print(original)
+    print("-Results-".center(70).replace(" ", "*").replace("-", " "))
+    print(results)
+    print("*" * 70)
--- a/backend/venv/Lib/site-packages/nltk/stem/regexp.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/regexp.py
@@ -0,0 +1,55 @@
+# Natural Language Toolkit: Stemmers
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Trevor Cohn <tacohn@cs.mu.oz.au>
+#         Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+import re
+
+from nltk.stem.api import StemmerI
+
+
+class RegexpStemmer(StemmerI):
+    """
+    A stemmer that uses regular expressions to identify morphological
+    affixes.  Any substrings that match the regular expressions will
+    be removed.
+
+        >>> from nltk.stem import RegexpStemmer
+        >>> st = RegexpStemmer('ing$|s$|e$|able$', min=4)
+        >>> st.stem('cars')
+        'car'
+        >>> st.stem('mass')
+        'mas'
+        >>> st.stem('was')
+        'was'
+        >>> st.stem('bee')
+        'bee'
+        >>> st.stem('compute')
+        'comput'
+        >>> st.stem('advisable')
+        'advis'
+
+    :type regexp: str or regexp
+    :param regexp: The regular expression that should be used to
+        identify morphological affixes.
+    :type min: int
+    :param min: The minimum length of string to stem
+    """
+
+    def __init__(self, regexp, min=0):
+        if not hasattr(regexp, "pattern"):
+            regexp = re.compile(regexp)
+        self._regexp = regexp
+        self._min = min
+
+    def stem(self, word):
+        if len(word) < self._min:
+            return word
+        else:
+            return self._regexp.sub("", word)
+
+    def __repr__(self):
+        return f"<RegexpStemmer: {self._regexp.pattern!r}>"
--- a/backend/venv/Lib/site-packages/nltk/stem/rslp.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/rslp.py
@@ -0,0 +1,137 @@
+# Natural Language Toolkit: RSLP Stemmer
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Tiago Tresoldi <tresoldi@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+# This code is based on the algorithm presented in the paper "A Stemming
+# Algorithm for the Portuguese Language" by Viviane Moreira Orengo and
+# Christian Huyck, which unfortunately I had no access to. The code is a
+# Python version, with some minor modifications of mine, to the description
+# presented at https://www.webcitation.org/5NnvdIzOb and to the C source code
+# available at http://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html.
+# Please note that this stemmer is intended for demonstration and educational
+# purposes only. Feel free to write me for any comments, including the
+# development of a different and/or better stemmer for Portuguese. I also
+# suggest using NLTK's mailing list for Portuguese for any discussion.
+
+# Este código é baseado no algoritmo apresentado no artigo "A Stemming
+# Algorithm for the Portuguese Language" de Viviane Moreira Orengo e
+# Christian Huyck, o qual infelizmente não tive a oportunidade de ler. O
+# código é uma conversão para Python, com algumas pequenas modificações
+# minhas, daquele apresentado em https://www.webcitation.org/5NnvdIzOb e do
+# código para linguagem C disponível em
+# http://www.inf.ufrgs.br/~arcoelho/rslp/integrando_rslp.html. Por favor,
+# lembre-se de que este stemmer foi desenvolvido com finalidades unicamente
+# de demonstração e didáticas. Sinta-se livre para me escrever para qualquer
+# comentário, inclusive sobre o desenvolvimento de um stemmer diferente
+# e/ou melhor para o português. Também sugiro utilizar-se a lista de discussão
+# do NLTK para o português para qualquer debate.
+
+from nltk.data import load
+from nltk.stem.api import StemmerI
+
+
+class RSLPStemmer(StemmerI):
+    """
+    A stemmer for Portuguese.
+
+        >>> from nltk.stem import RSLPStemmer
+        >>> st = RSLPStemmer()
+        >>> # opening lines of Erico Verissimo's "Música ao Longe"
+        >>> text = '''
+        ... Clarissa risca com giz no quadro-negro a paisagem que os alunos
+        ... devem copiar . Uma casinha de porta e janela , em cima duma
+        ... coxilha .'''
+        >>> for token in text.split(): # doctest: +NORMALIZE_WHITESPACE
+        ...     print(st.stem(token))
+        clariss risc com giz no quadro-negr a pais que os alun dev copi .
+        uma cas de port e janel , em cim dum coxilh .
+    """
+
+    def __init__(self):
+        self._model = []
+
+        self._model.append(self.read_rule("step0.pt"))
+        self._model.append(self.read_rule("step1.pt"))
+        self._model.append(self.read_rule("step2.pt"))
+        self._model.append(self.read_rule("step3.pt"))
+        self._model.append(self.read_rule("step4.pt"))
+        self._model.append(self.read_rule("step5.pt"))
+        self._model.append(self.read_rule("step6.pt"))
+
+    def read_rule(self, filename):
+        rules = load("nltk:stemmers/rslp/" + filename, format="raw").decode("utf8")
+        lines = rules.split("\n")
+
+        lines = [line for line in lines if line != ""]  # remove blank lines
+        lines = [line for line in lines if line[0] != "#"]  # remove comments
+
+        # NOTE: a simple but ugly hack to make this parser happy with double '\t's
+        lines = [line.replace("\t\t", "\t") for line in lines]
+
+        # parse rules
+        rules = []
+        for line in lines:
+            rule = []
+            tokens = line.split("\t")
+
+            # text to be searched for at the end of the string
+            rule.append(tokens[0][1:-1])  # remove quotes
+
+            # minimum stem size to perform the replacement
+            rule.append(int(tokens[1]))
+
+            # text to be replaced into
+            rule.append(tokens[2][1:-1])  # remove quotes
+
+            # exceptions to this rule
+            rule.append([token[1:-1] for token in tokens[3].split(",")])
+
+            # append to the results
+            rules.append(rule)
+
+        return rules
+
+    def stem(self, word):
+        word = word.lower()
+
+        # the word ends in 's'? apply rule for plural reduction
+        if word[-1] == "s":
+            word = self.apply_rule(word, 0)
+
+        # the word ends in 'a'? apply rule for feminine reduction
+        if word[-1] == "a":
+            word = self.apply_rule(word, 1)
+
+        # augmentative reduction
+        word = self.apply_rule(word, 3)
+
+        # adverb reduction
+        word = self.apply_rule(word, 2)
+
+        # noun reduction
+        prev_word = word
+        word = self.apply_rule(word, 4)
+        if word == prev_word:
+            # verb reduction
+            prev_word = word
+            word = self.apply_rule(word, 5)
+            if word == prev_word:
+                # vowel removal
+                word = self.apply_rule(word, 6)
+
+        return word
+
+    def apply_rule(self, word, rule_index):
+        rules = self._model[rule_index]
+        for rule in rules:
+            suffix_length = len(rule[0])
+            if word[-suffix_length:] == rule[0]:  # if suffix matches
+                if len(word) >= suffix_length + rule[1]:  # if we have minimum size
+                    if word not in rule[3]:  # if not an exception
+                        word = word[:-suffix_length] + rule[2]
+                        break
+
+        return word
--- a/backend/venv/Lib/site-packages/nltk/stem/snowball.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/snowball.py
--- a/backend/venv/Lib/site-packages/nltk/stem/util.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/util.py
@@ -0,0 +1,25 @@
+# Natural Language Toolkit: Stemmer Utilities
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Helder <he7d3r@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+def suffix_replace(original, old, new):
+    """
+    Replaces the old suffix of the original string by a new suffix
+    """
+    return original[: -len(old)] + new
+
+
+def prefix_replace(original, old, new):
+    """
+    Replaces the old prefix of the original string by a new suffix
+
+    :param original: string
+    :param old: string
+    :param new: string
+    :return: string
+    """
+    return new + original[len(old) :]
--- a/backend/venv/Lib/site-packages/nltk/stem/wordnet.py
+++ b/backend/venv/Lib/site-packages/nltk/stem/wordnet.py
@@ -0,0 +1,89 @@
+# Natural Language Toolkit: WordNet stemmer interface
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+#         Eric Kafe <kafe.eric@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+class WordNetLemmatizer:
+    """
+    WordNet Lemmatizer
+
+    Provides 3 lemmatizer modes: _morphy(), morphy() and lemmatize().
+
+    lemmatize() is a permissive wrapper around _morphy().
+    It returns the shortest lemma found in WordNet,
+    or the input string unchanged if nothing is found.
+
+    >>> from nltk.stem import WordNetLemmatizer as wnl
+    >>> print(wnl().lemmatize('us', 'n'))
+    u
+
+    >>> print(wnl().lemmatize('Anythinggoeszxcv'))
+    Anythinggoeszxcv
+
+    """
+
+    def _morphy(self, form, pos, check_exceptions=True):
+        """
+        _morphy() is WordNet's _morphy lemmatizer.
+        It returns a list of all lemmas found in WordNet.
+
+        >>> from nltk.stem import WordNetLemmatizer as wnl
+        >>> print(wnl()._morphy('us', 'n'))
+        ['us', 'u']
+        """
+        from nltk.corpus import wordnet as wn
+
+        return wn._morphy(form, pos, check_exceptions)
+
+    def morphy(self, form, pos=None, check_exceptions=True):
+        """
+        morphy() is a restrictive wrapper around _morphy().
+        It returns the first lemma found in WordNet,
+        or None if no lemma is found.
+
+        >>> from nltk.stem import WordNetLemmatizer as wnl
+        >>> print(wnl().morphy('us', 'n'))
+        us
+
+        >>> print(wnl().morphy('catss'))
+        None
+        """
+        from nltk.corpus import wordnet as wn
+
+        return wn.morphy(form, pos, check_exceptions)
+
+    def lemmatize(self, word: str, pos: str = "n") -> str:
+        """Lemmatize `word` by picking the shortest of the possible lemmas,
+        using the wordnet corpus reader's built-in _morphy function.
+        Returns the input word unchanged if it cannot be found in WordNet.
+
+        >>> from nltk.stem import WordNetLemmatizer as wnl
+        >>> print(wnl().lemmatize('dogs'))
+        dog
+        >>> print(wnl().lemmatize('churches'))
+        church
+        >>> print(wnl().lemmatize('aardwolves'))
+        aardwolf
+        >>> print(wnl().lemmatize('abaci'))
+        abacus
+        >>> print(wnl().lemmatize('hardrock'))
+        hardrock
+
+        :param word: The input word to lemmatize.
+        :type word: str
+        :param pos: The Part Of Speech tag. Valid options are `"n"` for nouns,
+            `"v"` for verbs, `"a"` for adjectives, `"r"` for adverbs and `"s"`
+            for satellite adjectives.
+        :type pos: str
+        :return: The shortest lemma of `word`, for the given `pos`.
+        """
+        lemmas = self._morphy(word, pos)
+        return min(lemmas, key=len) if lemmas else word
+
+    def __repr__(self):
+        return "<WordNetLemmatizer>"