Initial commit

2026-02-01 09:31:38 +01:00
commit e02db93960
4396 changed files with 1511612 additions and 0 deletions
--- a/backend/venv/Lib/site-packages/nltk/parse/init.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/init.py
@@ -0,0 +1,102 @@
+# Natural Language Toolkit: Parsers
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+#
+
+"""
+NLTK Parsers
+
+Classes and interfaces for producing tree structures that represent
+the internal organization of a text.  This task is known as "parsing"
+the text, and the resulting tree structures are called the text's
+"parses".  Typically, the text is a single sentence, and the tree
+structure represents the syntactic structure of the sentence.
+However, parsers can also be used in other domains.  For example,
+parsers can be used to derive the morphological structure of the
+morphemes that make up a word, or to derive the discourse structure
+for a set of utterances.
+
+Sometimes, a single piece of text can be represented by more than one
+tree structure.  Texts represented by more than one tree structure are
+called "ambiguous" texts.  Note that there are actually two ways in
+which a text can be ambiguous:
+
+    - The text has multiple correct parses.
+    - There is not enough information to decide which of several
+      candidate parses is correct.
+
+However, the parser module does *not* distinguish these two types of
+ambiguity.
+
+The parser module defines ``ParserI``, a standard interface for parsing
+texts; and two simple implementations of that interface,
+``ShiftReduceParser`` and ``RecursiveDescentParser``.  It also contains
+three sub-modules for specialized kinds of parsing:
+
+  - ``nltk.parser.chart`` defines chart parsing, which uses dynamic
+    programming to efficiently parse texts.
+  - ``nltk.parser.probabilistic`` defines probabilistic parsing, which
+    associates a probability with each parse.
+"""
+
+from nltk.parse.api import ParserI
+from nltk.parse.bllip import BllipParser
+from nltk.parse.chart import (
+    BottomUpChartParser,
+    BottomUpLeftCornerChartParser,
+    ChartParser,
+    LeftCornerChartParser,
+    SteppingChartParser,
+    TopDownChartParser,
+)
+from nltk.parse.corenlp import CoreNLPDependencyParser, CoreNLPParser
+from nltk.parse.dependencygraph import DependencyGraph
+from nltk.parse.earleychart import (
+    EarleyChartParser,
+    FeatureEarleyChartParser,
+    FeatureIncrementalBottomUpChartParser,
+    FeatureIncrementalBottomUpLeftCornerChartParser,
+    FeatureIncrementalChartParser,
+    FeatureIncrementalTopDownChartParser,
+    IncrementalBottomUpChartParser,
+    IncrementalBottomUpLeftCornerChartParser,
+    IncrementalChartParser,
+    IncrementalLeftCornerChartParser,
+    IncrementalTopDownChartParser,
+)
+from nltk.parse.evaluate import DependencyEvaluator
+from nltk.parse.featurechart import (
+    FeatureBottomUpChartParser,
+    FeatureBottomUpLeftCornerChartParser,
+    FeatureChartParser,
+    FeatureTopDownChartParser,
+)
+from nltk.parse.malt import MaltParser
+from nltk.parse.nonprojectivedependencyparser import (
+    NaiveBayesDependencyScorer,
+    NonprojectiveDependencyParser,
+    ProbabilisticNonprojectiveParser,
+)
+from nltk.parse.pchart import (
+    BottomUpProbabilisticChartParser,
+    InsideChartParser,
+    LongestChartParser,
+    RandomChartParser,
+    UnsortedChartParser,
+)
+from nltk.parse.projectivedependencyparser import (
+    ProbabilisticProjectiveDependencyParser,
+    ProjectiveDependencyParser,
+)
+from nltk.parse.recursivedescent import (
+    RecursiveDescentParser,
+    SteppingRecursiveDescentParser,
+)
+from nltk.parse.shiftreduce import ShiftReduceParser, SteppingShiftReduceParser
+from nltk.parse.transitionparser import TransitionParser
+from nltk.parse.util import TestGrammar, extract_test_sentences, load_parser
+from nltk.parse.viterbi import ViterbiParser
--- a/backend/venv/Lib/site-packages/nltk/parse/api.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/api.py
@@ -0,0 +1,72 @@
+# Natural Language Toolkit: Parser API
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+#
+
+import itertools
+
+from nltk.internals import overridden
+
+
+class ParserI:
+    """
+    A processing class for deriving trees that represent possible
+    structures for a sequence of tokens.  These tree structures are
+    known as "parses".  Typically, parsers are used to derive syntax
+    trees for sentences.  But parsers can also be used to derive other
+    kinds of tree structure, such as morphological trees and discourse
+    structures.
+
+    Subclasses must define:
+      - at least one of: ``parse()``, ``parse_sents()``.
+
+    Subclasses may define:
+      - ``grammar()``
+    """
+
+    def grammar(self):
+        """
+        :return: The grammar used by this parser.
+        """
+        raise NotImplementedError()
+
+    def parse(self, sent, *args, **kwargs):
+        """
+        :return: An iterator that generates parse trees for the sentence.
+            When possible this list is sorted from most likely to least likely.
+
+        :param sent: The sentence to be parsed
+        :type sent: list(str)
+        :rtype: iter(Tree)
+        """
+        if overridden(self.parse_sents):
+            return next(self.parse_sents([sent], *args, **kwargs))
+        elif overridden(self.parse_one):
+            return (
+                tree
+                for tree in [self.parse_one(sent, *args, **kwargs)]
+                if tree is not None
+            )
+        elif overridden(self.parse_all):
+            return iter(self.parse_all(sent, *args, **kwargs))
+        else:
+            raise NotImplementedError()
+
+    def parse_sents(self, sents, *args, **kwargs):
+        """
+        Apply ``self.parse()`` to each element of ``sents``.
+        :rtype: iter(iter(Tree))
+        """
+        return (self.parse(sent, *args, **kwargs) for sent in sents)
+
+    def parse_all(self, sent, *args, **kwargs):
+        """:rtype: list(Tree)"""
+        return list(self.parse(sent, *args, **kwargs))
+
+    def parse_one(self, sent, *args, **kwargs):
+        """:rtype: Tree or None"""
+        return next(self.parse(sent, *args, **kwargs), None)
--- a/backend/venv/Lib/site-packages/nltk/parse/bllip.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/bllip.py
@@ -0,0 +1,299 @@
+# Natural Language Toolkit: Interface to BLLIP Parser
+#
+# Author: David McClosky <dmcc@bigasterisk.com>
+#
+# Copyright (C) 2001-2025 NLTK Project
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+from nltk.parse.api import ParserI
+from nltk.tree import Tree
+
+"""
+Interface for parsing with BLLIP Parser. Requires the Python
+bllipparser module. BllipParser objects can be constructed with the
+``BllipParser.from_unified_model_dir`` class method or manually using the
+``BllipParser`` constructor. The former is generally easier if you have
+a BLLIP Parser unified model directory -- a basic model can be obtained
+from NLTK's downloader. More unified parsing models can be obtained with
+BLLIP Parser's ModelFetcher (run ``python -m bllipparser.ModelFetcher``
+or see docs for ``bllipparser.ModelFetcher.download_and_install_model``).
+
+Basic usage::
+
+    # download and install a basic unified parsing model (Wall Street Journal)
+    # sudo python -m nltk.downloader bllip_wsj_no_aux
+
+    >>> from nltk.data import find
+    >>> model_dir = find('models/bllip_wsj_no_aux').path
+    >>> bllip = BllipParser.from_unified_model_dir(model_dir)
+
+    # 1-best parsing
+    >>> sentence1 = 'British left waffles on Falklands .'.split()
+    >>> top_parse = bllip.parse_one(sentence1)
+    >>> print(top_parse)
+    (S1
+      (S
+        (NP (JJ British) (NN left))
+        (VP (VBZ waffles) (PP (IN on) (NP (NNP Falklands))))
+        (. .)))
+
+    # n-best parsing
+    >>> sentence2 = 'Time flies'.split()
+    >>> all_parses = bllip.parse_all(sentence2)
+    >>> print(len(all_parses))
+    50
+    >>> print(all_parses[0])
+    (S1 (S (NP (NNP Time)) (VP (VBZ flies))))
+
+    # incorporating external tagging constraints (None means unconstrained tag)
+    >>> constrained1 = bllip.tagged_parse([('Time', 'VB'), ('flies', 'NNS')])
+    >>> print(next(constrained1))
+    (S1 (NP (VB Time) (NNS flies)))
+    >>> constrained2 = bllip.tagged_parse([('Time', 'NN'), ('flies', None)])
+    >>> print(next(constrained2))
+    (S1 (NP (NN Time) (VBZ flies)))
+
+References
+----------
+
+- Charniak, Eugene. "A maximum-entropy-inspired parser." Proceedings of
+  the 1st North American chapter of the Association for Computational
+  Linguistics conference. Association for Computational Linguistics,
+  2000.
+
+- Charniak, Eugene, and Mark Johnson. "Coarse-to-fine n-best parsing
+  and MaxEnt discriminative reranking." Proceedings of the 43rd Annual
+  Meeting on Association for Computational Linguistics. Association
+  for Computational Linguistics, 2005.
+
+Known issues
+------------
+
+Note that BLLIP Parser is not currently threadsafe. Since this module
+uses a SWIG interface, it is potentially unsafe to create multiple
+``BllipParser`` objects in the same process. BLLIP Parser currently
+has issues with non-ASCII text and will raise an error if given any.
+
+See https://pypi.python.org/pypi/bllipparser/ for more information
+on BLLIP Parser's Python interface.
+"""
+
+__all__ = ["BllipParser"]
+
+# this block allows this module to be imported even if bllipparser isn't
+# available
+try:
+    from bllipparser import RerankingParser
+    from bllipparser.RerankingParser import get_unified_model_parameters
+
+    def _ensure_bllip_import_or_error():
+        pass
+
+except ImportError as ie:
+
+    def _ensure_bllip_import_or_error(ie=ie):
+        raise ImportError("Couldn't import bllipparser module: %s" % ie)
+
+
+def _ensure_ascii(words):
+    try:
+        for i, word in enumerate(words):
+            word.encode("ascii")
+    except UnicodeEncodeError as e:
+        raise ValueError(
+            f"Token {i} ({word!r}) is non-ASCII. BLLIP Parser "
+            "currently doesn't support non-ASCII inputs."
+        ) from e
+
+
+def _scored_parse_to_nltk_tree(scored_parse):
+    return Tree.fromstring(str(scored_parse.ptb_parse))
+
+
+class BllipParser(ParserI):
+    """
+    Interface for parsing with BLLIP Parser. BllipParser objects can be
+    constructed with the ``BllipParser.from_unified_model_dir`` class
+    method or manually using the ``BllipParser`` constructor.
+    """
+
+    def __init__(
+        self,
+        parser_model=None,
+        reranker_features=None,
+        reranker_weights=None,
+        parser_options=None,
+        reranker_options=None,
+    ):
+        """
+        Load a BLLIP Parser model from scratch. You'll typically want to
+        use the ``from_unified_model_dir()`` class method to construct
+        this object.
+
+        :param parser_model: Path to parser model directory
+        :type parser_model: str
+
+        :param reranker_features: Path the reranker model's features file
+        :type reranker_features: str
+
+        :param reranker_weights: Path the reranker model's weights file
+        :type reranker_weights: str
+
+        :param parser_options: optional dictionary of parser options, see
+            ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
+            for more information.
+        :type parser_options: dict(str)
+
+        :param reranker_options: optional
+            dictionary of reranker options, see
+            ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
+            for more information.
+        :type reranker_options: dict(str)
+        """
+        _ensure_bllip_import_or_error()
+
+        parser_options = parser_options or {}
+        reranker_options = reranker_options or {}
+
+        self.rrp = RerankingParser()
+        self.rrp.load_parser_model(parser_model, **parser_options)
+        if reranker_features and reranker_weights:
+            self.rrp.load_reranker_model(
+                features_filename=reranker_features,
+                weights_filename=reranker_weights,
+                **reranker_options,
+            )
+
+    def parse(self, sentence):
+        """
+        Use BLLIP Parser to parse a sentence. Takes a sentence as a list
+        of words; it will be automatically tagged with this BLLIP Parser
+        instance's tagger.
+
+        :return: An iterator that generates parse trees for the sentence
+            from most likely to least likely.
+
+        :param sentence: The sentence to be parsed
+        :type sentence: list(str)
+        :rtype: iter(Tree)
+        """
+        _ensure_ascii(sentence)
+        nbest_list = self.rrp.parse(sentence)
+        for scored_parse in nbest_list:
+            yield _scored_parse_to_nltk_tree(scored_parse)
+
+    def tagged_parse(self, word_and_tag_pairs):
+        """
+        Use BLLIP to parse a sentence. Takes a sentence as a list of
+        (word, tag) tuples; the sentence must have already been tokenized
+        and tagged. BLLIP will attempt to use the tags provided but may
+        use others if it can't come up with a complete parse subject
+        to those constraints. You may also specify a tag as ``None``
+        to leave a token's tag unconstrained.
+
+        :return: An iterator that generates parse trees for the sentence
+            from most likely to least likely.
+
+        :param sentence: Input sentence to parse as (word, tag) pairs
+        :type sentence: list(tuple(str, str))
+        :rtype: iter(Tree)
+        """
+        words = []
+        tag_map = {}
+        for i, (word, tag) in enumerate(word_and_tag_pairs):
+            words.append(word)
+            if tag is not None:
+                tag_map[i] = tag
+
+        _ensure_ascii(words)
+        nbest_list = self.rrp.parse_tagged(words, tag_map)
+        for scored_parse in nbest_list:
+            yield _scored_parse_to_nltk_tree(scored_parse)
+
+    @classmethod
+    def from_unified_model_dir(
+        cls, model_dir, parser_options=None, reranker_options=None
+    ):
+        """
+        Create a ``BllipParser`` object from a unified parsing model
+        directory. Unified parsing model directories are a standardized
+        way of storing BLLIP parser and reranker models together on disk.
+        See ``bllipparser.RerankingParser.get_unified_model_parameters()``
+        for more information about unified model directories.
+
+        :return: A ``BllipParser`` object using the parser and reranker
+            models in the model directory.
+
+        :param model_dir: Path to the unified model directory.
+        :type model_dir: str
+        :param parser_options: optional dictionary of parser options, see
+            ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
+            for more information.
+        :type parser_options: dict(str)
+        :param reranker_options: optional dictionary of reranker options, see
+            ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
+            for more information.
+        :type reranker_options: dict(str)
+        :rtype: BllipParser
+        """
+        (
+            parser_model_dir,
+            reranker_features_filename,
+            reranker_weights_filename,
+        ) = get_unified_model_parameters(model_dir)
+        return cls(
+            parser_model_dir,
+            reranker_features_filename,
+            reranker_weights_filename,
+            parser_options,
+            reranker_options,
+        )
+
+
+def demo():
+    """This assumes the Python module bllipparser is installed."""
+
+    # download and install a basic unified parsing model (Wall Street Journal)
+    # sudo python -m nltk.downloader bllip_wsj_no_aux
+
+    from nltk.data import find
+
+    model_dir = find("models/bllip_wsj_no_aux").path
+
+    print("Loading BLLIP Parsing models...")
+    # the easiest way to get started is to use a unified model
+    bllip = BllipParser.from_unified_model_dir(model_dir)
+    print("Done.")
+
+    sentence1 = "British left waffles on Falklands .".split()
+    sentence2 = "I saw the man with the telescope .".split()
+    # this sentence is known to fail under the WSJ parsing model
+    fail1 = "# ! ? : -".split()
+    for sentence in (sentence1, sentence2, fail1):
+        print("Sentence: %r" % " ".join(sentence))
+        try:
+            tree = next(bllip.parse(sentence))
+            print(tree)
+        except StopIteration:
+            print("(parse failed)")
+
+    # n-best parsing demo
+    for i, parse in enumerate(bllip.parse(sentence1)):
+        print("parse %d:\n%s" % (i, parse))
+
+    # using external POS tag constraints
+    print(
+        "forcing 'tree' to be 'NN':",
+        next(bllip.tagged_parse([("A", None), ("tree", "NN")])),
+    )
+    print(
+        "forcing 'A' to be 'DT' and 'tree' to be 'NNP':",
+        next(bllip.tagged_parse([("A", "DT"), ("tree", "NNP")])),
+    )
+    # constraints don't have to make sense... (though on more complicated
+    # sentences, they may cause the parse to fail)
+    print(
+        "forcing 'A' to be 'NNP':",
+        next(bllip.tagged_parse([("A", "NNP"), ("tree", None)])),
+    )
--- a/backend/venv/Lib/site-packages/nltk/parse/chart.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/chart.py
--- a/backend/venv/Lib/site-packages/nltk/parse/corenlp.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/corenlp.py
@@ -0,0 +1,805 @@
+# Natural Language Toolkit: Interface to the CoreNLP REST API.
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Dmitrijs Milajevs <dimazest@gmail.com>
+#
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import json
+import os
+import re
+import socket
+import time
+from typing import List, Tuple
+
+from nltk.internals import _java_options, config_java, find_jar_iter, java
+from nltk.parse.api import ParserI
+from nltk.parse.dependencygraph import DependencyGraph
+from nltk.tag.api import TaggerI
+from nltk.tokenize.api import TokenizerI
+from nltk.tree import Tree
+
+_stanford_url = "https://stanfordnlp.github.io/CoreNLP/"
+
+
+class CoreNLPServerError(EnvironmentError):
+    """Exceptions associated with the Core NLP server."""
+
+
+def try_port(port=0):
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.bind(("", port))
+
+    p = sock.getsockname()[1]
+    sock.close()
+
+    return p
+
+
+class CoreNLPServer:
+    _MODEL_JAR_PATTERN = r"stanford-corenlp-(\d+)\.(\d+)\.(\d+)-models\.jar"
+    _JAR = r"stanford-corenlp-(\d+)\.(\d+)\.(\d+)\.jar"
+
+    def __init__(
+        self,
+        path_to_jar=None,
+        path_to_models_jar=None,
+        verbose=False,
+        java_options=None,
+        corenlp_options=None,
+        port=None,
+    ):
+        if corenlp_options is None:
+            corenlp_options = ["-preload", "tokenize,ssplit,pos,lemma,parse,depparse"]
+
+        jars = list(
+            find_jar_iter(
+                self._JAR,
+                path_to_jar,
+                env_vars=("CORENLP",),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+                is_regex=True,
+            )
+        )
+
+        # find the most recent code and model jar
+        stanford_jar = max(jars, key=lambda model_name: re.match(self._JAR, model_name))
+
+        if port is None:
+            try:
+                port = try_port(9000)
+            except OSError:
+                port = try_port()
+                corenlp_options.extend(["-port", str(port)])
+        else:
+            try_port(port)
+            corenlp_options.extend(["-port", str(port)])
+
+        self.url = f"http://localhost:{port}"
+
+        model_jar = max(
+            find_jar_iter(
+                self._MODEL_JAR_PATTERN,
+                path_to_models_jar,
+                env_vars=("CORENLP_MODELS",),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+                is_regex=True,
+            ),
+            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name),
+        )
+
+        self.verbose = verbose
+
+        self._classpath = stanford_jar, model_jar
+
+        self.corenlp_options = corenlp_options
+        self.java_options = java_options or ["-mx2g"]
+
+    def start(self, stdout="devnull", stderr="devnull"):
+        """Starts the CoreNLP server
+
+        :param stdout, stderr: Specifies where CoreNLP output is redirected. Valid values are 'devnull', 'stdout', 'pipe'
+        """
+        import requests
+
+        cmd = ["edu.stanford.nlp.pipeline.StanfordCoreNLPServer"]
+
+        if self.corenlp_options:
+            cmd.extend(self.corenlp_options)
+
+        # Configure java.
+        default_options = " ".join(_java_options)
+        config_java(options=self.java_options, verbose=self.verbose)
+
+        try:
+            self.popen = java(
+                cmd,
+                classpath=self._classpath,
+                blocking=False,
+                stdout=stdout,
+                stderr=stderr,
+            )
+        finally:
+            # Return java configurations to their default values.
+            config_java(options=default_options, verbose=self.verbose)
+
+        # Check that the server is istill running.
+        returncode = self.popen.poll()
+        if returncode is not None:
+            _, stderrdata = self.popen.communicate()
+            raise CoreNLPServerError(
+                returncode,
+                "Could not start the server. "
+                "The error was: {}".format(stderrdata.decode("ascii")),
+            )
+
+        for i in range(30):
+            try:
+                response = requests.get(requests.compat.urljoin(self.url, "live"))
+            except requests.exceptions.ConnectionError:
+                time.sleep(1)
+            else:
+                if response.ok:
+                    break
+        else:
+            raise CoreNLPServerError("Could not connect to the server.")
+
+        for i in range(60):
+            try:
+                response = requests.get(requests.compat.urljoin(self.url, "ready"))
+            except requests.exceptions.ConnectionError:
+                time.sleep(1)
+            else:
+                if response.ok:
+                    break
+        else:
+            raise CoreNLPServerError("The server is not ready.")
+
+    def stop(self):
+        self.popen.terminate()
+        self.popen.wait()
+
+    def __enter__(self):
+        self.start()
+
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+        return False
+
+
+class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI):
+    """Interface to the CoreNLP Parser."""
+
+    def __init__(
+        self,
+        url="http://localhost:9000",
+        encoding="utf8",
+        tagtype=None,
+        strict_json=True,
+    ):
+        import requests
+
+        self.url = url
+        self.encoding = encoding
+
+        if tagtype not in ["pos", "ner", None]:
+            raise ValueError("tagtype must be either 'pos', 'ner' or None")
+
+        self.tagtype = tagtype
+        self.strict_json = strict_json
+
+        self.session = requests.Session()
+
+    def parse_sents(self, sentences, *args, **kwargs):
+        """Parse multiple sentences.
+
+        Takes multiple sentences as a list where each sentence is a list of
+        words. Each sentence will be automatically tagged with this
+        CoreNLPParser instance's tagger.
+
+        If a whitespace exists inside a token, then the token will be treated as
+        several tokens.
+
+        :param sentences: Input sentences to parse
+        :type sentences: list(list(str))
+        :rtype: iter(iter(Tree))
+        """
+        # Converting list(list(str)) -> list(str)
+        sentences = (" ".join(words) for words in sentences)
+        return self.raw_parse_sents(sentences, *args, **kwargs)
+
+    def raw_parse(self, sentence, properties=None, *args, **kwargs):
+        """Parse a sentence.
+
+        Takes a sentence as a string; before parsing, it will be automatically
+        tokenized and tagged by the CoreNLP Parser.
+
+        :param sentence: Input sentence to parse
+        :type sentence: str
+        :rtype: iter(Tree)
+        """
+        default_properties = {"tokenize.whitespace": "false"}
+        default_properties.update(properties or {})
+
+        return next(
+            self.raw_parse_sents(
+                [sentence], properties=default_properties, *args, **kwargs
+            )
+        )
+
+    def api_call(self, data, properties=None, timeout=60):
+        default_properties = {
+            "outputFormat": "json",
+            "annotators": "tokenize,pos,lemma,ssplit,{parser_annotator}".format(
+                parser_annotator=self.parser_annotator
+            ),
+        }
+
+        default_properties.update(properties or {})
+
+        response = self.session.post(
+            self.url,
+            params={"properties": json.dumps(default_properties)},
+            data=data.encode(self.encoding),
+            headers={"Content-Type": f"text/plain; charset={self.encoding}"},
+            timeout=timeout,
+        )
+
+        response.raise_for_status()
+
+        return response.json(strict=self.strict_json)
+
+    def raw_parse_sents(
+        self, sentences, verbose=False, properties=None, *args, **kwargs
+    ):
+        """Parse multiple sentences.
+
+        Takes multiple sentences as a list of strings. Each sentence will be
+        automatically tokenized and tagged.
+
+        :param sentences: Input sentences to parse.
+        :type sentences: list(str)
+        :rtype: iter(iter(Tree))
+
+        """
+        default_properties = {
+            # Only splits on '\n', never inside the sentence.
+            "ssplit.eolonly": "true"
+        }
+
+        default_properties.update(properties or {})
+
+        """
+        for sentence in sentences:
+            parsed_data = self.api_call(sentence, properties=default_properties)
+
+            assert len(parsed_data['sentences']) == 1
+
+            for parse in parsed_data['sentences']:
+                tree = self.make_tree(parse)
+                yield iter([tree])
+        """
+        parsed_data = self.api_call("\n".join(sentences), properties=default_properties)
+        for parsed_sent in parsed_data["sentences"]:
+            tree = self.make_tree(parsed_sent)
+            yield iter([tree])
+
+    def parse_text(self, text, *args, **kwargs):
+        """Parse a piece of text.
+
+        The text might contain several sentences which will be split by CoreNLP.
+
+        :param str text: text to be split.
+        :returns: an iterable of syntactic structures.  # TODO: should it be an iterable of iterables?
+
+        """
+        parsed_data = self.api_call(text, *args, **kwargs)
+
+        for parse in parsed_data["sentences"]:
+            yield self.make_tree(parse)
+
+    def tokenize(self, text, properties=None):
+        """Tokenize a string of text.
+
+        Skip these tests if CoreNLP is likely not ready.
+        >>> from nltk.test.setup_fixt import check_jar
+        >>> check_jar(CoreNLPServer._JAR, env_vars=("CORENLP",), is_regex=True)
+
+        The CoreNLP server can be started using the following notation, although
+        we recommend the `with CoreNLPServer() as server:` context manager notation
+        to ensure that the server is always stopped.
+        >>> server = CoreNLPServer()
+        >>> server.start()
+        >>> parser = CoreNLPParser(url=server.url)
+
+        >>> text = 'Good muffins cost $3.88\\nin New York.  Please buy me\\ntwo of them.\\nThanks.'
+        >>> list(parser.tokenize(text))
+        ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.', 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
+
+        >>> s = "The colour of the wall is blue."
+        >>> list(
+        ...     parser.tokenize(
+        ...         'The colour of the wall is blue.',
+        ...             properties={'tokenize.options': 'americanize=true'},
+        ...     )
+        ... )
+        ['The', 'colour', 'of', 'the', 'wall', 'is', 'blue', '.']
+        >>> server.stop()
+
+        """
+        default_properties = {"annotators": "tokenize,ssplit"}
+
+        default_properties.update(properties or {})
+
+        result = self.api_call(text, properties=default_properties)
+
+        for sentence in result["sentences"]:
+            for token in sentence["tokens"]:
+                yield token["originalText"] or token["word"]
+
+    def tag_sents(self, sentences, properties=None):
+        """
+        Tag multiple sentences.
+
+        Takes multiple sentences as a list where each sentence is a list of
+        tokens.
+
+        :param sentences: Input sentences to tag
+        :type sentences: list(list(str))
+        :rtype: list(list(tuple(str, str))
+        """
+
+        # Converting list(list(str)) -> list(str)
+        sentences = (" ".join(words) for words in sentences)
+
+        if properties is None:
+            properties = {"tokenize.whitespace": "true", "ner.useSUTime": "false"}
+
+        return [sentences[0] for sentences in self.raw_tag_sents(sentences, properties)]
+
+    def tag(self, sentence: str, properties=None) -> List[Tuple[str, str]]:
+        """
+        Tag a list of tokens.
+
+        :rtype: list(tuple(str, str))
+
+        Skip these tests if CoreNLP is likely not ready.
+        >>> from nltk.test.setup_fixt import check_jar
+        >>> check_jar(CoreNLPServer._JAR, env_vars=("CORENLP",), is_regex=True)
+
+        The CoreNLP server can be started using the following notation, although
+        we recommend the `with CoreNLPServer() as server:` context manager notation
+        to ensure that the server is always stopped.
+        >>> server = CoreNLPServer()
+        >>> server.start()
+        >>> parser = CoreNLPParser(url=server.url, tagtype='ner')
+        >>> tokens = 'Rami Eid is studying at Stony Brook University in NY'.split()
+        >>> parser.tag(tokens)  # doctest: +NORMALIZE_WHITESPACE
+        [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'),
+        ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'STATE_OR_PROVINCE')]
+
+        >>> parser = CoreNLPParser(url=server.url, tagtype='pos')
+        >>> tokens = "What is the airspeed of an unladen swallow ?".split()
+        >>> parser.tag(tokens)  # doctest: +NORMALIZE_WHITESPACE
+        [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'),
+        ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'),
+        ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')]
+        >>> server.stop()
+        """
+        return self.tag_sents([sentence], properties)[0]
+
+    def raw_tag_sents(self, sentences, properties=None):
+        """
+        Tag multiple sentences.
+
+        Takes multiple sentences as a list where each sentence is a string.
+
+        :param sentences: Input sentences to tag
+        :type sentences: list(str)
+        :rtype: list(list(list(tuple(str, str)))
+        """
+        default_properties = {
+            "ssplit.isOneSentence": "true",
+            "annotators": "tokenize,ssplit,",
+        }
+        default_properties.update(properties or {})
+
+        # Supports only 'pos' or 'ner' tags.
+        assert self.tagtype in [
+            "pos",
+            "ner",
+        ], "CoreNLP tagger supports only 'pos' or 'ner' tags."
+        default_properties["annotators"] += self.tagtype
+        for sentence in sentences:
+            tagged_data = self.api_call(sentence, properties=default_properties)
+            yield [
+                [
+                    (token["word"], token[self.tagtype])
+                    for token in tagged_sentence["tokens"]
+                ]
+                for tagged_sentence in tagged_data["sentences"]
+            ]
+
+
+class CoreNLPParser(GenericCoreNLPParser):
+    """
+    Skip these tests if CoreNLP is likely not ready.
+    >>> from nltk.test.setup_fixt import check_jar
+    >>> check_jar(CoreNLPServer._JAR, env_vars=("CORENLP",), is_regex=True)
+
+    The recommended usage of `CoreNLPParser` is using the context manager notation:
+    >>> with CoreNLPServer() as server:
+    ...     parser = CoreNLPParser(url=server.url)
+    ...     next(
+    ...         parser.raw_parse('The quick brown fox jumps over the lazy dog.')
+    ...     ).pretty_print()  # doctest: +NORMALIZE_WHITESPACE
+                            ROOT
+                            |
+                            S
+            _______________|__________________________
+            |                         VP               |
+            |                _________|___             |
+            |               |             PP           |
+            |               |     ________|___         |
+            NP              |    |            NP       |
+        ____|__________     |    |     _______|____    |
+        DT   JJ    JJ   NN  VBZ   IN   DT      JJ   NN  .
+        |    |     |    |    |    |    |       |    |   |
+        The quick brown fox jumps over the     lazy dog  .
+
+    Alternatively, the server can be started using the following notation.
+    Note that `CoreNLPServer` does not need to be used if the CoreNLP server is started
+    outside of Python.
+    >>> server = CoreNLPServer()
+    >>> server.start()
+    >>> parser = CoreNLPParser(url=server.url)
+
+    >>> (parse_fox, ), (parse_wolf, ) = parser.raw_parse_sents(
+    ...     [
+    ...         'The quick brown fox jumps over the lazy dog.',
+    ...         'The quick grey wolf jumps over the lazy fox.',
+    ...     ]
+    ... )
+
+    >>> parse_fox.pretty_print()  # doctest: +NORMALIZE_WHITESPACE
+                         ROOT
+                          |
+                          S
+           _______________|__________________________
+          |                         VP               |
+          |                _________|___             |
+          |               |             PP           |
+          |               |     ________|___         |
+          NP              |    |            NP       |
+      ____|__________     |    |     _______|____    |
+     DT   JJ    JJ   NN  VBZ   IN   DT      JJ   NN  .
+     |    |     |    |    |    |    |       |    |   |
+    The quick brown fox jumps over the     lazy dog  .
+
+    >>> parse_wolf.pretty_print()  # doctest: +NORMALIZE_WHITESPACE
+                         ROOT
+                          |
+                          S
+           _______________|__________________________
+          |                         VP               |
+          |                _________|___             |
+          |               |             PP           |
+          |               |     ________|___         |
+          NP              |    |            NP       |
+      ____|_________      |    |     _______|____    |
+     DT   JJ   JJ   NN   VBZ   IN   DT      JJ   NN  .
+     |    |    |    |     |    |    |       |    |   |
+    The quick grey wolf jumps over the     lazy fox  .
+
+    >>> (parse_dog, ), (parse_friends, ) = parser.parse_sents(
+    ...     [
+    ...         "I 'm a dog".split(),
+    ...         "This is my friends ' cat ( the tabby )".split(),
+    ...     ]
+    ... )
+
+    >>> parse_dog.pretty_print()  # doctest: +NORMALIZE_WHITESPACE
+            ROOT
+             |
+             S
+      _______|____
+     |            VP
+     |    ________|___
+     NP  |            NP
+     |   |         ___|___
+    PRP VBP       DT      NN
+     |   |        |       |
+     I   'm       a      dog
+
+    >>> parse_friends.pretty_print()  # doctest: +NORMALIZE_WHITESPACE
+         ROOT
+          |
+          S
+      ____|___________
+     |                VP
+     |     ___________|_____________
+     |    |                         NP
+     |    |                  _______|________________________
+     |    |                 NP           |        |          |
+     |    |            _____|_______     |        |          |
+     NP   |           NP            |    |        NP         |
+     |    |     ______|_________    |    |     ___|____      |
+     DT  VBZ  PRP$   NNS       POS  NN -LRB-  DT       NN  -RRB-
+     |    |    |      |         |   |    |    |        |     |
+    This  is   my  friends      '  cat -LRB- the     tabby -RRB-
+
+    >>> parse_john, parse_mary, = parser.parse_text(
+    ...     'John loves Mary. Mary walks.'
+    ... )
+
+    >>> parse_john.pretty_print()  # doctest: +NORMALIZE_WHITESPACE
+          ROOT
+           |
+           S
+      _____|_____________
+     |          VP       |
+     |      ____|___     |
+     NP    |        NP   |
+     |     |        |    |
+    NNP   VBZ      NNP   .
+     |     |        |    |
+    John loves     Mary  .
+
+    >>> parse_mary.pretty_print()  # doctest: +NORMALIZE_WHITESPACE
+          ROOT
+           |
+           S
+      _____|____
+     NP    VP   |
+     |     |    |
+    NNP   VBZ   .
+     |     |    |
+    Mary walks  .
+
+    Special cases
+
+    >>> next(
+    ...     parser.raw_parse(
+    ...         'NASIRIYA, Iraq—Iraqi doctors who treated former prisoner of war '
+    ...         'Jessica Lynch have angrily dismissed claims made in her biography '
+    ...         'that she was raped by her Iraqi captors.'
+    ...     )
+    ... ).height()
+    14
+
+    >>> next(
+    ...     parser.raw_parse(
+    ...         "The broader Standard & Poor's 500 Index <.SPX> was 0.46 points lower, or "
+    ...         '0.05 percent, at 997.02.'
+    ...     )
+    ... ).height()
+    11
+
+    >>> server.stop()
+    """
+
+    _OUTPUT_FORMAT = "penn"
+    parser_annotator = "parse"
+
+    def make_tree(self, result):
+        return Tree.fromstring(result["parse"])
+
+
+class CoreNLPDependencyParser(GenericCoreNLPParser):
+    """Dependency parser.
+
+    Skip these tests if CoreNLP is likely not ready.
+    >>> from nltk.test.setup_fixt import check_jar
+    >>> check_jar(CoreNLPServer._JAR, env_vars=("CORENLP",), is_regex=True)
+
+    The recommended usage of `CoreNLPParser` is using the context manager notation:
+    >>> with CoreNLPServer() as server:
+    ...     dep_parser = CoreNLPDependencyParser(url=server.url)
+    ...     parse, = dep_parser.raw_parse(
+    ...         'The quick brown fox jumps over the lazy dog.'
+    ...     )
+    ...     print(parse.to_conll(4))  # doctest: +NORMALIZE_WHITESPACE
+    The        DT      4       det
+    quick      JJ      4       amod
+    brown      JJ      4       amod
+    fox        NN      5       nsubj
+    jumps      VBZ     0       ROOT
+    over       IN      9       case
+    the        DT      9       det
+    lazy       JJ      9       amod
+    dog        NN      5       obl
+    .  .       5       punct
+
+    Alternatively, the server can be started using the following notation.
+    Note that `CoreNLPServer` does not need to be used if the CoreNLP server is started
+    outside of Python.
+    >>> server = CoreNLPServer()
+    >>> server.start()
+    >>> dep_parser = CoreNLPDependencyParser(url=server.url)
+    >>> parse, = dep_parser.raw_parse('The quick brown fox jumps over the lazy dog.')
+    >>> print(parse.tree())  # doctest: +NORMALIZE_WHITESPACE
+    (jumps (fox The quick brown) (dog over the lazy) .)
+
+    >>> for governor, dep, dependent in parse.triples():
+    ...     print(governor, dep, dependent)  # doctest: +NORMALIZE_WHITESPACE
+    ('jumps', 'VBZ') nsubj ('fox', 'NN')
+    ('fox', 'NN') det ('The', 'DT')
+    ('fox', 'NN') amod ('quick', 'JJ')
+    ('fox', 'NN') amod ('brown', 'JJ')
+    ('jumps', 'VBZ') obl ('dog', 'NN')
+    ('dog', 'NN') case ('over', 'IN')
+    ('dog', 'NN') det ('the', 'DT')
+    ('dog', 'NN') amod ('lazy', 'JJ')
+    ('jumps', 'VBZ') punct ('.', '.')
+
+    >>> (parse_fox, ), (parse_dog, ) = dep_parser.raw_parse_sents(
+    ...     [
+    ...         'The quick brown fox jumps over the lazy dog.',
+    ...         'The quick grey wolf jumps over the lazy fox.',
+    ...     ]
+    ... )
+    >>> print(parse_fox.to_conll(4))  # doctest: +NORMALIZE_WHITESPACE
+    The        DT      4       det
+    quick      JJ      4       amod
+    brown      JJ      4       amod
+    fox        NN      5       nsubj
+    jumps      VBZ     0       ROOT
+    over       IN      9       case
+    the        DT      9       det
+    lazy       JJ      9       amod
+    dog        NN      5       obl
+    .  .       5       punct
+
+    >>> print(parse_dog.to_conll(4))  # doctest: +NORMALIZE_WHITESPACE
+    The        DT      4       det
+    quick      JJ      4       amod
+    grey       JJ      4       amod
+    wolf       NN      5       nsubj
+    jumps      VBZ     0       ROOT
+    over       IN      9       case
+    the        DT      9       det
+    lazy       JJ      9       amod
+    fox        NN      5       obl
+    .  .       5       punct
+
+    >>> (parse_dog, ), (parse_friends, ) = dep_parser.parse_sents(
+    ...     [
+    ...         "I 'm a dog".split(),
+    ...         "This is my friends ' cat ( the tabby )".split(),
+    ...     ]
+    ... )
+    >>> print(parse_dog.to_conll(4))  # doctest: +NORMALIZE_WHITESPACE
+    I   PRP     4       nsubj
+    'm  VBP     4       cop
+    a   DT      4       det
+    dog NN      0       ROOT
+
+    >>> print(parse_friends.to_conll(4))  # doctest: +NORMALIZE_WHITESPACE
+    This       DT      6       nsubj
+    is VBZ     6       cop
+    my PRP$    4       nmod:poss
+    friends    NNS     6       nmod:poss
+    '  POS     4       case
+    cat        NN      0       ROOT
+    (  -LRB-   9       punct
+    the        DT      9       det
+    tabby      NN      6       dep
+    )  -RRB-   9       punct
+
+    >>> parse_john, parse_mary, = dep_parser.parse_text(
+    ...     'John loves Mary. Mary walks.'
+    ... )
+
+    >>> print(parse_john.to_conll(4))  # doctest: +NORMALIZE_WHITESPACE
+    John       NNP     2       nsubj
+    loves      VBZ     0       ROOT
+    Mary       NNP     2       obj
+    .  .       2       punct
+
+    >>> print(parse_mary.to_conll(4))  # doctest: +NORMALIZE_WHITESPACE
+    Mary        NNP     2       nsubj
+    walks       VBZ     0       ROOT
+    .   .       2       punct
+
+    Special cases
+
+    Non-breaking space inside of a token.
+
+    >>> len(
+    ...     next(
+    ...         dep_parser.raw_parse(
+    ...             'Anhalt said children typically treat a 20-ounce soda bottle as one '
+    ...             'serving, while it actually contains 2 1/2 servings.'
+    ...         )
+    ...     ).nodes
+    ... )
+    23
+
+    Phone  numbers.
+
+    >>> len(
+    ...     next(
+    ...         dep_parser.raw_parse('This is not going to crash: 01 111 555.')
+    ...     ).nodes
+    ... )
+    10
+
+    >>> print(
+    ...     next(
+    ...         dep_parser.raw_parse('The underscore _ should not simply disappear.')
+    ...     ).to_conll(4)
+    ... )  # doctest: +NORMALIZE_WHITESPACE
+    The        DT      2       det
+    underscore NN      7       nsubj
+    _  NFP     7       punct
+    should     MD      7       aux
+    not        RB      7       advmod
+    simply     RB      7       advmod
+    disappear  VB      0       ROOT
+    .  .       7       punct
+
+    >>> print(
+    ...     next(
+    ...         dep_parser.raw_parse(
+    ...             'for all of its insights into the dream world of teen life , and its electronic expression through '
+    ...             'cyber culture , the film gives no quarter to anyone seeking to pull a cohesive story out of its 2 '
+    ...             '1/2-hour running time .'
+    ...         )
+    ...     ).to_conll(4)
+    ... )  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    for        IN      2       case
+    all        DT      24      obl
+    of IN      5       case
+    its        PRP$    5       nmod:poss
+    insights   NNS     2       nmod
+    into       IN      9       case
+    the        DT      9       det
+    dream      NN      9       compound
+    world      NN      5       nmod
+    of IN      12      case
+    teen       NN      12      compound
+    ...
+
+    >>> server.stop()
+    """
+
+    _OUTPUT_FORMAT = "conll2007"
+    parser_annotator = "depparse"
+
+    def make_tree(self, result):
+        return DependencyGraph(
+            (
+                " ".join(n_items[1:])  # NLTK expects an iterable of strings...
+                for n_items in sorted(transform(result))
+            ),
+            cell_separator=" ",  # To make sure that a non-breaking space is kept inside of a token.
+        )
+
+
+def transform(sentence):
+    for dependency in sentence["basicDependencies"]:
+        dependent_index = dependency["dependent"]
+        token = sentence["tokens"][dependent_index - 1]
+
+        # Return values that we don't know as '_'. Also, consider tag and ctag
+        # to be equal.
+        yield (
+            dependent_index,
+            "_",
+            token["word"],
+            token["lemma"],
+            token["pos"],
+            token["pos"],
+            "_",
+            str(dependency["governor"]),
+            dependency["dep"],
+            "_",
+            "_",
+        )
--- a/backend/venv/Lib/site-packages/nltk/parse/dependencygraph.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/dependencygraph.py
@@ -0,0 +1,799 @@
+# Natural Language Toolkit: Dependency Grammars
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Jason Narad <jason.narad@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com> (modifications)
+#
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+#
+
+"""
+Tools for reading and writing dependency trees.
+The input is assumed to be in Malt-TAB format
+(https://stp.lingfil.uu.se/~nivre/research/MaltXML.html).
+"""
+
+import subprocess
+import warnings
+from collections import defaultdict
+from itertools import chain
+from pprint import pformat
+
+from nltk.internals import find_binary
+from nltk.tree import Tree
+
+#################################################################
+# DependencyGraph Class
+#################################################################
+
+
+class DependencyGraph:
+    """
+    A container for the nodes and labelled edges of a dependency structure.
+    """
+
+    def __init__(
+        self,
+        tree_str=None,
+        cell_extractor=None,
+        zero_based=False,
+        cell_separator=None,
+        top_relation_label="ROOT",
+    ):
+        """Dependency graph.
+
+        We place a dummy `TOP` node with the index 0, since the root node is
+        often assigned 0 as its head. This also means that the indexing of the
+        nodes corresponds directly to the Malt-TAB format, which starts at 1.
+
+        If zero-based is True, then Malt-TAB-like input with node numbers
+        starting at 0 and the root node assigned -1 (as produced by, e.g.,
+        zpar).
+
+        :param str cell_separator: the cell separator. If not provided, cells
+            are split by whitespace.
+
+        :param str top_relation_label: the label by which the top relation is
+            identified, for examlple, `ROOT`, `null` or `TOP`.
+        """
+        self.nodes = defaultdict(
+            lambda: {
+                "address": None,
+                "word": None,
+                "lemma": None,
+                "ctag": None,
+                "tag": None,
+                "feats": None,
+                "head": None,
+                "deps": defaultdict(list),
+                "rel": None,
+            }
+        )
+
+        self.nodes[0].update({"ctag": "TOP", "tag": "TOP", "address": 0})
+
+        self.root = None
+
+        if tree_str:
+            self._parse(
+                tree_str,
+                cell_extractor=cell_extractor,
+                zero_based=zero_based,
+                cell_separator=cell_separator,
+                top_relation_label=top_relation_label,
+            )
+
+    def remove_by_address(self, address):
+        """
+        Removes the node with the given address.  References
+        to this node in others will still exist.
+        """
+        del self.nodes[address]
+
+    def redirect_arcs(self, originals, redirect):
+        """
+        Redirects arcs to any of the nodes in the originals list
+        to the redirect node address.
+        """
+        for node in self.nodes.values():
+            new_deps = []
+            for dep in node["deps"]:
+                if dep in originals:
+                    new_deps.append(redirect)
+                else:
+                    new_deps.append(dep)
+            node["deps"] = new_deps
+
+    def add_arc(self, head_address, mod_address):
+        """
+        Adds an arc from the node specified by head_address to the
+        node specified by the mod address.
+        """
+        relation = self.nodes[mod_address]["rel"]
+        self.nodes[head_address]["deps"].setdefault(relation, [])
+        self.nodes[head_address]["deps"][relation].append(mod_address)
+        # self.nodes[head_address]['deps'].append(mod_address)
+
+    def connect_graph(self):
+        """
+        Fully connects all non-root nodes.  All nodes are set to be dependents
+        of the root node.
+        """
+        for node1 in self.nodes.values():
+            for node2 in self.nodes.values():
+                if node1["address"] != node2["address"] and node2["rel"] != "TOP":
+                    relation = node2["rel"]
+                    node1["deps"].setdefault(relation, [])
+                    node1["deps"][relation].append(node2["address"])
+                    # node1['deps'].append(node2['address'])
+
+    def get_by_address(self, node_address):
+        """Return the node with the given address."""
+        return self.nodes[node_address]
+
+    def contains_address(self, node_address):
+        """
+        Returns true if the graph contains a node with the given node
+        address, false otherwise.
+        """
+        return node_address in self.nodes
+
+    def to_dot(self):
+        """Return a dot representation suitable for using with Graphviz.
+
+        >>> dg = DependencyGraph(
+        ...     'John N 2\\n'
+        ...     'loves V 0\\n'
+        ...     'Mary N 2'
+        ... )
+        >>> print(dg.to_dot())
+        digraph G{
+        edge [dir=forward]
+        node [shape=plaintext]
+        <BLANKLINE>
+        0 [label="0 (None)"]
+        0 -> 2 [label="ROOT"]
+        1 [label="1 (John)"]
+        2 [label="2 (loves)"]
+        2 -> 1 [label=""]
+        2 -> 3 [label=""]
+        3 [label="3 (Mary)"]
+        }
+
+        """
+        # Start the digraph specification
+        s = "digraph G{\n"
+        s += "edge [dir=forward]\n"
+        s += "node [shape=plaintext]\n"
+
+        # Draw the remaining nodes
+        for node in sorted(self.nodes.values(), key=lambda v: v["address"]):
+            s += '\n{} [label="{} ({})"]'.format(
+                node["address"],
+                node["address"],
+                node["word"],
+            )
+            for rel, deps in node["deps"].items():
+                for dep in deps:
+                    if rel is not None:
+                        s += '\n{} -> {} [label="{}"]'.format(node["address"], dep, rel)
+                    else:
+                        s += "\n{} -> {} ".format(node["address"], dep)
+        s += "\n}"
+
+        return s
+
+    def _repr_svg_(self):
+        """Show SVG representation of the transducer (IPython magic).
+        >>> from nltk.test.setup_fixt import check_binary
+        >>> check_binary('dot')
+        >>> dg = DependencyGraph(
+        ...     'John N 2\\n'
+        ...     'loves V 0\\n'
+        ...     'Mary N 2'
+        ... )
+        >>> dg._repr_svg_().split('\\n')[0]
+        '<?xml version="1.0" encoding="UTF-8" standalone="no"?>'
+
+        """
+        dot_string = self.to_dot()
+        return dot2img(dot_string)
+
+    def __str__(self):
+        return pformat(self.nodes)
+
+    def __repr__(self):
+        return f"<DependencyGraph with {len(self.nodes)} nodes>"
+
+    @staticmethod
+    def load(
+        filename, zero_based=False, cell_separator=None, top_relation_label="ROOT"
+    ):
+        """
+        :param filename: a name of a file in Malt-TAB format
+        :param zero_based: nodes in the input file are numbered starting from 0
+            rather than 1 (as produced by, e.g., zpar)
+        :param str cell_separator: the cell separator. If not provided, cells
+            are split by whitespace.
+        :param str top_relation_label: the label by which the top relation is
+            identified, for examlple, `ROOT`, `null` or `TOP`.
+
+        :return: a list of DependencyGraphs
+
+        """
+        with open(filename) as infile:
+            return [
+                DependencyGraph(
+                    tree_str,
+                    zero_based=zero_based,
+                    cell_separator=cell_separator,
+                    top_relation_label=top_relation_label,
+                )
+                for tree_str in infile.read().split("\n\n")
+            ]
+
+    def left_children(self, node_index):
+        """
+        Returns the number of left children under the node specified
+        by the given address.
+        """
+        children = chain.from_iterable(self.nodes[node_index]["deps"].values())
+        index = self.nodes[node_index]["address"]
+        return sum(1 for c in children if c < index)
+
+    def right_children(self, node_index):
+        """
+        Returns the number of right children under the node specified
+        by the given address.
+        """
+        children = chain.from_iterable(self.nodes[node_index]["deps"].values())
+        index = self.nodes[node_index]["address"]
+        return sum(1 for c in children if c > index)
+
+    def add_node(self, node):
+        if not self.contains_address(node["address"]):
+            self.nodes[node["address"]].update(node)
+
+    def _parse(
+        self,
+        input_,
+        cell_extractor=None,
+        zero_based=False,
+        cell_separator=None,
+        top_relation_label="ROOT",
+    ):
+        """Parse a sentence.
+
+        :param extractor: a function that given a tuple of cells returns a
+        7-tuple, where the values are ``word, lemma, ctag, tag, feats, head,
+        rel``.
+
+        :param str cell_separator: the cell separator. If not provided, cells
+        are split by whitespace.
+
+        :param str top_relation_label: the label by which the top relation is
+        identified, for examlple, `ROOT`, `null` or `TOP`.
+
+        """
+
+        def extract_3_cells(cells, index):
+            word, tag, head = cells
+            return index, word, word, tag, tag, "", head, ""
+
+        def extract_4_cells(cells, index):
+            word, tag, head, rel = cells
+            return index, word, word, tag, tag, "", head, rel
+
+        def extract_7_cells(cells, index):
+            line_index, word, lemma, tag, _, head, rel = cells
+            try:
+                index = int(line_index)
+            except ValueError:
+                # index can't be parsed as an integer, use default
+                pass
+            return index, word, lemma, tag, tag, "", head, rel
+
+        def extract_10_cells(cells, index):
+            line_index, word, lemma, ctag, tag, feats, head, rel, _, _ = cells
+            try:
+                index = int(line_index)
+            except ValueError:
+                # index can't be parsed as an integer, use default
+                pass
+            return index, word, lemma, ctag, tag, feats, head, rel
+
+        extractors = {
+            3: extract_3_cells,
+            4: extract_4_cells,
+            7: extract_7_cells,
+            10: extract_10_cells,
+        }
+
+        if isinstance(input_, str):
+            input_ = (line for line in input_.split("\n"))
+
+        lines = (l.rstrip() for l in input_)
+        lines = (l for l in lines if l)
+
+        cell_number = None
+        for index, line in enumerate(lines, start=1):
+            cells = line.split(cell_separator)
+            if cell_number is None:
+                cell_number = len(cells)
+            else:
+                assert cell_number == len(cells)
+
+            if cell_extractor is None:
+                try:
+                    cell_extractor = extractors[cell_number]
+                except KeyError as e:
+                    raise ValueError(
+                        "Number of tab-delimited fields ({}) not supported by "
+                        "CoNLL(10) or Malt-Tab(4) format".format(cell_number)
+                    ) from e
+
+            try:
+                index, word, lemma, ctag, tag, feats, head, rel = cell_extractor(
+                    cells, index
+                )
+            except (TypeError, ValueError):
+                # cell_extractor doesn't take 2 arguments or doesn't return 8
+                # values; assume the cell_extractor is an older external
+                # extractor and doesn't accept or return an index.
+                word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells)
+
+            if head == "_":
+                continue
+
+            head = int(head)
+            if zero_based:
+                head += 1
+
+            self.nodes[index].update(
+                {
+                    "address": index,
+                    "word": word,
+                    "lemma": lemma,
+                    "ctag": ctag,
+                    "tag": tag,
+                    "feats": feats,
+                    "head": head,
+                    "rel": rel,
+                }
+            )
+
+            # Make sure that the fake root node has labeled dependencies.
+            if (cell_number == 3) and (head == 0):
+                rel = top_relation_label
+            self.nodes[head]["deps"][rel].append(index)
+
+        if self.nodes[0]["deps"][top_relation_label]:
+            root_address = self.nodes[0]["deps"][top_relation_label][0]
+            self.root = self.nodes[root_address]
+            self.top_relation_label = top_relation_label
+        else:
+            warnings.warn(
+                "The graph doesn't contain a node " "that depends on the root element."
+            )
+
+    def _word(self, node, filter=True):
+        w = node["word"]
+        if filter:
+            if w != ",":
+                return w
+        return w
+
+    def _tree(self, i):
+        """Turn dependency graphs into NLTK trees.
+
+        :param int i: index of a node
+        :return: either a word (if the indexed node is a leaf) or a ``Tree``.
+        """
+        node = self.get_by_address(i)
+        word = node["word"]
+        deps = sorted(chain.from_iterable(node["deps"].values()))
+
+        if deps:
+            return Tree(word, [self._tree(dep) for dep in deps])
+        else:
+            return word
+
+    def tree(self):
+        """
+        Starting with the ``root`` node, build a dependency tree using the NLTK
+        ``Tree`` constructor. Dependency labels are omitted.
+        """
+        node = self.root
+
+        word = node["word"]
+        deps = sorted(chain.from_iterable(node["deps"].values()))
+        return Tree(word, [self._tree(dep) for dep in deps])
+
+    def triples(self, node=None):
+        """
+        Extract dependency triples of the form:
+        ((head word, head tag), rel, (dep word, dep tag))
+        """
+
+        if not node:
+            node = self.root
+
+        head = (node["word"], node["ctag"])
+        for i in sorted(chain.from_iterable(node["deps"].values())):
+            dep = self.get_by_address(i)
+            yield (head, dep["rel"], (dep["word"], dep["ctag"]))
+            yield from self.triples(node=dep)
+
+    def _hd(self, i):
+        try:
+            return self.nodes[i]["head"]
+        except IndexError:
+            return None
+
+    def _rel(self, i):
+        try:
+            return self.nodes[i]["rel"]
+        except IndexError:
+            return None
+
+    # what's the return type?  Boolean or list?
+    def contains_cycle(self):
+        """Check whether there are cycles.
+
+        >>> dg = DependencyGraph(treebank_data)
+        >>> dg.contains_cycle()
+        False
+
+        >>> cyclic_dg = DependencyGraph()
+        >>> top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}
+        >>> child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}
+        >>> child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}
+        >>> child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}
+        >>> child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}
+        >>> cyclic_dg.nodes = {
+        ...     0: top,
+        ...     1: child1,
+        ...     2: child2,
+        ...     3: child3,
+        ...     4: child4,
+        ... }
+        >>> cyclic_dg.root = top
+
+        >>> cyclic_dg.contains_cycle()
+        [1, 2, 4, 3]
+
+        """
+        distances = {}
+
+        for node in self.nodes.values():
+            for dep in node["deps"]:
+                key = tuple([node["address"], dep])
+                distances[key] = 1
+
+        for _ in self.nodes:
+            new_entries = {}
+
+            for pair1 in distances:
+                for pair2 in distances:
+                    if pair1[1] == pair2[0]:
+                        key = tuple([pair1[0], pair2[1]])
+                        new_entries[key] = distances[pair1] + distances[pair2]
+
+            for pair in new_entries:
+                distances[pair] = new_entries[pair]
+                if pair[0] == pair[1]:
+                    path = self.get_cycle_path(self.get_by_address(pair[0]), pair[0])
+                    return path
+
+        return False  # return []?
+
+    def get_cycle_path(self, curr_node, goal_node_index):
+        for dep in curr_node["deps"]:
+            if dep == goal_node_index:
+                return [curr_node["address"]]
+        for dep in curr_node["deps"]:
+            path = self.get_cycle_path(self.get_by_address(dep), goal_node_index)
+            if len(path) > 0:
+                path.insert(0, curr_node["address"])
+                return path
+        return []
+
+    def to_conll(self, style):
+        """
+        The dependency graph in CoNLL format.
+
+        :param style: the style to use for the format (3, 4, 10 columns)
+        :type style: int
+        :rtype: str
+        """
+
+        if style == 3:
+            template = "{word}\t{tag}\t{head}\n"
+        elif style == 4:
+            template = "{word}\t{tag}\t{head}\t{rel}\n"
+        elif style == 10:
+            template = (
+                "{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n"
+            )
+        else:
+            raise ValueError(
+                "Number of tab-delimited fields ({}) not supported by "
+                "CoNLL(10) or Malt-Tab(4) format".format(style)
+            )
+
+        return "".join(
+            template.format(i=i, **node)
+            for i, node in sorted(self.nodes.items())
+            if node["tag"] != "TOP"
+        )
+
+    def nx_graph(self):
+        """Convert the data in a ``nodelist`` into a networkx labeled directed graph."""
+        import networkx
+
+        nx_nodelist = list(range(1, len(self.nodes)))
+        nx_edgelist = [
+            (n, self._hd(n), self._rel(n)) for n in nx_nodelist if self._hd(n)
+        ]
+        self.nx_labels = {}
+        for n in nx_nodelist:
+            self.nx_labels[n] = self.nodes[n]["word"]
+
+        g = networkx.MultiDiGraph()
+        g.add_nodes_from(nx_nodelist)
+        g.add_edges_from(nx_edgelist)
+
+        return g
+
+
+def dot2img(dot_string, t="svg"):
+    """
+    Create image representation fom dot_string, using the 'dot' program
+    from the Graphviz package.
+
+    Use the 't' argument to specify the image file format, for ex. 'jpeg', 'eps',
+    'json', 'png' or 'webp' (Running 'dot -T:' lists all available formats).
+
+    Note that the "capture_output" option of subprocess.run() is only available
+    with text formats (like svg), but not with binary image formats (like png).
+    """
+
+    try:
+        find_binary("dot")
+        try:
+            if t in ["dot", "dot_json", "json", "svg"]:
+                proc = subprocess.run(
+                    ["dot", "-T%s" % t],
+                    capture_output=True,
+                    input=dot_string,
+                    text=True,
+                )
+            else:
+                proc = subprocess.run(
+                    ["dot", "-T%s" % t],
+                    input=bytes(dot_string, encoding="utf8"),
+                )
+            return proc.stdout
+        except:
+            raise Exception(
+                "Cannot create image representation by running dot from string: {}"
+                "".format(dot_string)
+            )
+    except OSError as e:
+        raise Exception("Cannot find the dot binary from Graphviz package") from e
+
+
+class DependencyGraphError(Exception):
+    """Dependency graph exception."""
+
+
+def demo():
+    malt_demo()
+    conll_demo()
+    conll_file_demo()
+    cycle_finding_demo()
+
+
+def malt_demo(nx=False):
+    """
+    A demonstration of the result of reading a dependency
+    version of the first sentence of the Penn Treebank.
+    """
+    dg = DependencyGraph(
+        """Pierre  NNP     2       NMOD
+Vinken  NNP     8       SUB
+,       ,       2       P
+61      CD      5       NMOD
+years   NNS     6       AMOD
+old     JJ      2       NMOD
+,       ,       2       P
+will    MD      0       ROOT
+join    VB      8       VC
+the     DT      11      NMOD
+board   NN      9       OBJ
+as      IN      9       VMOD
+a       DT      15      NMOD
+nonexecutive    JJ      15      NMOD
+director        NN      12      PMOD
+Nov.    NNP     9       VMOD
+29      CD      16      NMOD
+.       .       9       VMOD
+"""
+    )
+    tree = dg.tree()
+    tree.pprint()
+    if nx:
+        # currently doesn't work
+        import networkx
+        from matplotlib import pylab
+
+        g = dg.nx_graph()
+        g.info()
+        pos = networkx.spring_layout(g, dim=1)
+        networkx.draw_networkx_nodes(g, pos, node_size=50)
+        # networkx.draw_networkx_edges(g, pos, edge_color='k', width=8)
+        networkx.draw_networkx_labels(g, pos, dg.nx_labels)
+        pylab.xticks([])
+        pylab.yticks([])
+        pylab.savefig("tree.png")
+        pylab.show()
+
+
+def conll_demo():
+    """
+    A demonstration of how to read a string representation of
+    a CoNLL format dependency tree.
+    """
+    dg = DependencyGraph(conll_data1)
+    tree = dg.tree()
+    tree.pprint()
+    print(dg)
+    print(dg.to_conll(4))
+
+
+def conll_file_demo():
+    print("Mass conll_read demo...")
+    graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry]
+    for graph in graphs:
+        tree = graph.tree()
+        print("\n")
+        tree.pprint()
+
+
+def cycle_finding_demo():
+    dg = DependencyGraph(treebank_data)
+    print(dg.contains_cycle())
+    cyclic_dg = DependencyGraph()
+    cyclic_dg.add_node({"word": None, "deps": [1], "rel": "TOP", "address": 0})
+    cyclic_dg.add_node({"word": None, "deps": [2], "rel": "NTOP", "address": 1})
+    cyclic_dg.add_node({"word": None, "deps": [4], "rel": "NTOP", "address": 2})
+    cyclic_dg.add_node({"word": None, "deps": [1], "rel": "NTOP", "address": 3})
+    cyclic_dg.add_node({"word": None, "deps": [3], "rel": "NTOP", "address": 4})
+    print(cyclic_dg.contains_cycle())
+
+
+treebank_data = """Pierre  NNP     2       NMOD
+Vinken  NNP     8       SUB
+,       ,       2       P
+61      CD      5       NMOD
+years   NNS     6       AMOD
+old     JJ      2       NMOD
+,       ,       2       P
+will    MD      0       ROOT
+join    VB      8       VC
+the     DT      11      NMOD
+board   NN      9       OBJ
+as      IN      9       VMOD
+a       DT      15      NMOD
+nonexecutive    JJ      15      NMOD
+director        NN      12      PMOD
+Nov.    NNP     9       VMOD
+29      CD      16      NMOD
+.       .       9       VMOD
+"""
+
+conll_data1 = """
+1   Ze                ze                Pron  Pron  per|3|evofmv|nom                 2   su      _  _
+2   had               heb               V     V     trans|ovt|1of2of3|ev             0   ROOT    _  _
+3   met               met               Prep  Prep  voor                             8   mod     _  _
+4   haar              haar              Pron  Pron  bez|3|ev|neut|attr               5   det     _  _
+5   moeder            moeder            N     N     soort|ev|neut                    3   obj1    _  _
+6   kunnen            kan               V     V     hulp|ott|1of2of3|mv              2   vc      _  _
+7   gaan              ga                V     V     hulp|inf                         6   vc      _  _
+8   winkelen          winkel            V     V     intrans|inf                      11  cnj     _  _
+9   ,                 ,                 Punc  Punc  komma                            8   punct   _  _
+10  zwemmen           zwem              V     V     intrans|inf                      11  cnj     _  _
+11  of                of                Conj  Conj  neven                            7   vc      _  _
+12  terrassen         terras            N     N     soort|mv|neut                    11  cnj     _  _
+13  .                 .                 Punc  Punc  punt                             12  punct   _  _
+"""
+
+conll_data2 = """1   Cathy             Cathy             N     N     eigen|ev|neut                    2   su      _  _
+2   zag               zie               V     V     trans|ovt|1of2of3|ev             0   ROOT    _  _
+3   hen               hen               Pron  Pron  per|3|mv|datofacc                2   obj1    _  _
+4   wild              wild              Adj   Adj   attr|stell|onverv                5   mod     _  _
+5   zwaaien           zwaai             N     N     soort|mv|neut                    2   vc      _  _
+6   .                 .                 Punc  Punc  punt                             5   punct   _  _
+
+1   Ze                ze                Pron  Pron  per|3|evofmv|nom                 2   su      _  _
+2   had               heb               V     V     trans|ovt|1of2of3|ev             0   ROOT    _  _
+3   met               met               Prep  Prep  voor                             8   mod     _  _
+4   haar              haar              Pron  Pron  bez|3|ev|neut|attr               5   det     _  _
+5   moeder            moeder            N     N     soort|ev|neut                    3   obj1    _  _
+6   kunnen            kan               V     V     hulp|ott|1of2of3|mv              2   vc      _  _
+7   gaan              ga                V     V     hulp|inf                         6   vc      _  _
+8   winkelen          winkel            V     V     intrans|inf                      11  cnj     _  _
+9   ,                 ,                 Punc  Punc  komma                            8   punct   _  _
+10  zwemmen           zwem              V     V     intrans|inf                      11  cnj     _  _
+11  of                of                Conj  Conj  neven                            7   vc      _  _
+12  terrassen         terras            N     N     soort|mv|neut                    11  cnj     _  _
+13  .                 .                 Punc  Punc  punt                             12  punct   _  _
+
+1   Dat               dat               Pron  Pron  aanw|neut|attr                   2   det     _  _
+2   werkwoord         werkwoord         N     N     soort|ev|neut                    6   obj1    _  _
+3   had               heb               V     V     hulp|ovt|1of2of3|ev              0   ROOT    _  _
+4   ze                ze                Pron  Pron  per|3|evofmv|nom                 6   su      _  _
+5   zelf              zelf              Pron  Pron  aanw|neut|attr|wzelf             3   predm   _  _
+6   uitgevonden       vind              V     V     trans|verldw|onverv              3   vc      _  _
+7   .                 .                 Punc  Punc  punt                             6   punct   _  _
+
+1   Het               het               Pron  Pron  onbep|neut|zelfst                2   su      _  _
+2   hoorde            hoor              V     V     trans|ovt|1of2of3|ev             0   ROOT    _  _
+3   bij               bij               Prep  Prep  voor                             2   ld      _  _
+4   de                de                Art   Art   bep|zijdofmv|neut                6   det     _  _
+5   warme             warm              Adj   Adj   attr|stell|vervneut              6   mod     _  _
+6   zomerdag          zomerdag          N     N     soort|ev|neut                    3   obj1    _  _
+7   die               die               Pron  Pron  betr|neut|zelfst                 6   mod     _  _
+8   ze                ze                Pron  Pron  per|3|evofmv|nom                 12  su      _  _
+9   ginds             ginds             Adv   Adv   gew|aanw                         12  mod     _  _
+10  achter            achter            Adv   Adv   gew|geenfunc|stell|onverv        12  svp     _  _
+11  had               heb               V     V     hulp|ovt|1of2of3|ev              7   body    _  _
+12  gelaten           laat              V     V     trans|verldw|onverv              11  vc      _  _
+13  .                 .                 Punc  Punc  punt                             12  punct   _  _
+
+1   Ze                ze                Pron  Pron  per|3|evofmv|nom                 2   su      _  _
+2   hadden            heb               V     V     trans|ovt|1of2of3|mv             0   ROOT    _  _
+3   languit           languit           Adv   Adv   gew|geenfunc|stell|onverv        11  mod     _  _
+4   naast             naast             Prep  Prep  voor                             11  mod     _  _
+5   elkaar            elkaar            Pron  Pron  rec|neut                         4   obj1    _  _
+6   op                op                Prep  Prep  voor                             11  ld      _  _
+7   de                de                Art   Art   bep|zijdofmv|neut                8   det     _  _
+8   strandstoelen     strandstoel       N     N     soort|mv|neut                    6   obj1    _  _
+9   kunnen            kan               V     V     hulp|inf                         2   vc      _  _
+10  gaan              ga                V     V     hulp|inf                         9   vc      _  _
+11  liggen            lig               V     V     intrans|inf                      10  vc      _  _
+12  .                 .                 Punc  Punc  punt                             11  punct   _  _
+
+1   Zij               zij               Pron  Pron  per|3|evofmv|nom                 2   su      _  _
+2   zou               zal               V     V     hulp|ovt|1of2of3|ev              7   cnj     _  _
+3   mams              mams              N     N     soort|ev|neut                    4   det     _  _
+4   rug               rug               N     N     soort|ev|neut                    5   obj1    _  _
+5   ingewreven        wrijf             V     V     trans|verldw|onverv              6   vc      _  _
+6   hebben            heb               V     V     hulp|inf                         2   vc      _  _
+7   en                en                Conj  Conj  neven                            0   ROOT    _  _
+8   mam               mam               V     V     trans|ovt|1of2of3|ev             7   cnj     _  _
+9   de                de                Art   Art   bep|zijdofmv|neut                10  det     _  _
+10  hare              hare              Pron  Pron  bez|3|ev|neut|attr               8   obj1    _  _
+11  .                 .                 Punc  Punc  punt                             10  punct   _  _
+
+1   Of                of                Conj  Conj  onder|metfin                     0   ROOT    _  _
+2   ze                ze                Pron  Pron  per|3|evofmv|nom                 3   su      _  _
+3   had               heb               V     V     hulp|ovt|1of2of3|ev              0   ROOT    _  _
+4   gewoon            gewoon            Adj   Adj   adv|stell|onverv                 10  mod     _  _
+5   met               met               Prep  Prep  voor                             10  mod     _  _
+6   haar              haar              Pron  Pron  bez|3|ev|neut|attr               7   det     _  _
+7   vriendinnen       vriendin          N     N     soort|mv|neut                    5   obj1    _  _
+8   rond              rond              Adv   Adv   deelv                            10  svp     _  _
+9   kunnen            kan               V     V     hulp|inf                         3   vc      _  _
+10  slenteren         slenter           V     V     intrans|inf                      9   vc      _  _
+11  in                in                Prep  Prep  voor                             10  mod     _  _
+12  de                de                Art   Art   bep|zijdofmv|neut                13  det     _  _
+13  buurt             buurt             N     N     soort|ev|neut                    11  obj1    _  _
+14  van               van               Prep  Prep  voor                             13  mod     _  _
+15  Trafalgar_Square  Trafalgar_Square  MWU   N_N   eigen|ev|neut_eigen|ev|neut      14  obj1    _  _
+16  .                 .                 Punc  Punc  punt                             15  punct   _  _
+"""
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/earleychart.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/earleychart.py
@@ -0,0 +1,552 @@
+# Natural Language Toolkit: An Incremental Earley Chart Parser
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
+#         Rob Speer <rspeer@mit.edu>
+#         Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+#         Jean Mark Gawron <gawron@mail.sdsu.edu>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Data classes and parser implementations for *incremental* chart
+parsers, which use dynamic programming to efficiently parse a text.
+A "chart parser" derives parse trees for a text by iteratively adding
+\"edges\" to a \"chart\".  Each "edge" represents a hypothesis about the tree
+structure for a subsequence of the text.  The "chart" is a
+\"blackboard\" for composing and combining these hypotheses.
+
+A parser is "incremental", if it guarantees that for all i, j where i < j,
+all edges ending at i are built before any edges ending at j.
+This is appealing for, say, speech recognizer hypothesis filtering.
+
+The main parser class is ``EarleyChartParser``, which is a top-down
+algorithm, originally formulated by Jay Earley (1970).
+"""
+
+from time import perf_counter
+
+from nltk.parse.chart import (
+    BottomUpPredictCombineRule,
+    BottomUpPredictRule,
+    CachedTopDownPredictRule,
+    Chart,
+    ChartParser,
+    EdgeI,
+    EmptyPredictRule,
+    FilteredBottomUpPredictCombineRule,
+    FilteredSingleEdgeFundamentalRule,
+    LeafEdge,
+    LeafInitRule,
+    SingleEdgeFundamentalRule,
+    TopDownInitRule,
+)
+from nltk.parse.featurechart import (
+    FeatureBottomUpPredictCombineRule,
+    FeatureBottomUpPredictRule,
+    FeatureChart,
+    FeatureChartParser,
+    FeatureEmptyPredictRule,
+    FeatureSingleEdgeFundamentalRule,
+    FeatureTopDownInitRule,
+    FeatureTopDownPredictRule,
+)
+
+# ////////////////////////////////////////////////////////////
+# Incremental Chart
+# ////////////////////////////////////////////////////////////
+
+
+class IncrementalChart(Chart):
+    def initialize(self):
+        # A sequence of edge lists contained in this chart.
+        self._edgelists = tuple([] for x in self._positions())
+
+        # The set of child pointer lists associated with each edge.
+        self._edge_to_cpls = {}
+
+        # Indexes mapping attribute values to lists of edges
+        # (used by select()).
+        self._indexes = {}
+
+    def edges(self):
+        return list(self.iteredges())
+
+    def iteredges(self):
+        return (edge for edgelist in self._edgelists for edge in edgelist)
+
+    def select(self, end, **restrictions):
+        edgelist = self._edgelists[end]
+
+        # If there are no restrictions, then return all edges.
+        if restrictions == {}:
+            return iter(edgelist)
+
+        # Find the index corresponding to the given restrictions.
+        restr_keys = sorted(restrictions.keys())
+        restr_keys = tuple(restr_keys)
+
+        # If it doesn't exist, then create it.
+        if restr_keys not in self._indexes:
+            self._add_index(restr_keys)
+
+        vals = tuple(restrictions[key] for key in restr_keys)
+        return iter(self._indexes[restr_keys][end].get(vals, []))
+
+    def _add_index(self, restr_keys):
+        # Make sure it's a valid index.
+        for key in restr_keys:
+            if not hasattr(EdgeI, key):
+                raise ValueError("Bad restriction: %s" % key)
+
+        # Create the index.
+        index = self._indexes[restr_keys] = tuple({} for x in self._positions())
+
+        # Add all existing edges to the index.
+        for end, edgelist in enumerate(self._edgelists):
+            this_index = index[end]
+            for edge in edgelist:
+                vals = tuple(getattr(edge, key)() for key in restr_keys)
+                this_index.setdefault(vals, []).append(edge)
+
+    def _register_with_indexes(self, edge):
+        end = edge.end()
+        for restr_keys, index in self._indexes.items():
+            vals = tuple(getattr(edge, key)() for key in restr_keys)
+            index[end].setdefault(vals, []).append(edge)
+
+    def _append_edge(self, edge):
+        self._edgelists[edge.end()].append(edge)
+
+    def _positions(self):
+        return range(self.num_leaves() + 1)
+
+
+class FeatureIncrementalChart(IncrementalChart, FeatureChart):
+    def select(self, end, **restrictions):
+        edgelist = self._edgelists[end]
+
+        # If there are no restrictions, then return all edges.
+        if restrictions == {}:
+            return iter(edgelist)
+
+        # Find the index corresponding to the given restrictions.
+        restr_keys = sorted(restrictions.keys())
+        restr_keys = tuple(restr_keys)
+
+        # If it doesn't exist, then create it.
+        if restr_keys not in self._indexes:
+            self._add_index(restr_keys)
+
+        vals = tuple(
+            self._get_type_if_possible(restrictions[key]) for key in restr_keys
+        )
+        return iter(self._indexes[restr_keys][end].get(vals, []))
+
+    def _add_index(self, restr_keys):
+        # Make sure it's a valid index.
+        for key in restr_keys:
+            if not hasattr(EdgeI, key):
+                raise ValueError("Bad restriction: %s" % key)
+
+        # Create the index.
+        index = self._indexes[restr_keys] = tuple({} for x in self._positions())
+
+        # Add all existing edges to the index.
+        for end, edgelist in enumerate(self._edgelists):
+            this_index = index[end]
+            for edge in edgelist:
+                vals = tuple(
+                    self._get_type_if_possible(getattr(edge, key)())
+                    for key in restr_keys
+                )
+                this_index.setdefault(vals, []).append(edge)
+
+    def _register_with_indexes(self, edge):
+        end = edge.end()
+        for restr_keys, index in self._indexes.items():
+            vals = tuple(
+                self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
+            )
+            index[end].setdefault(vals, []).append(edge)
+
+
+# ////////////////////////////////////////////////////////////
+# Incremental CFG Rules
+# ////////////////////////////////////////////////////////////
+
+
+class CompleteFundamentalRule(SingleEdgeFundamentalRule):
+    def _apply_incomplete(self, chart, grammar, left_edge):
+        end = left_edge.end()
+        # When the chart is incremental, we only have to look for
+        # empty complete edges here.
+        for right_edge in chart.select(
+            start=end, end=end, is_complete=True, lhs=left_edge.nextsym()
+        ):
+            new_edge = left_edge.move_dot_forward(right_edge.end())
+            if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
+                yield new_edge
+
+
+class CompleterRule(CompleteFundamentalRule):
+    _fundamental_rule = CompleteFundamentalRule()
+
+    def apply(self, chart, grammar, edge):
+        if not isinstance(edge, LeafEdge):
+            yield from self._fundamental_rule.apply(chart, grammar, edge)
+
+
+class ScannerRule(CompleteFundamentalRule):
+    _fundamental_rule = CompleteFundamentalRule()
+
+    def apply(self, chart, grammar, edge):
+        if isinstance(edge, LeafEdge):
+            yield from self._fundamental_rule.apply(chart, grammar, edge)
+
+
+class PredictorRule(CachedTopDownPredictRule):
+    pass
+
+
+class FilteredCompleteFundamentalRule(FilteredSingleEdgeFundamentalRule):
+    def apply(self, chart, grammar, edge):
+        # Since the Filtered rule only works for grammars without empty productions,
+        # we only have to bother with complete edges here.
+        if edge.is_complete():
+            yield from self._apply_complete(chart, grammar, edge)
+
+
+# ////////////////////////////////////////////////////////////
+# Incremental FCFG Rules
+# ////////////////////////////////////////////////////////////
+
+
+class FeatureCompleteFundamentalRule(FeatureSingleEdgeFundamentalRule):
+    def _apply_incomplete(self, chart, grammar, left_edge):
+        fr = self._fundamental_rule
+        end = left_edge.end()
+        # When the chart is incremental, we only have to look for
+        # empty complete edges here.
+        for right_edge in chart.select(
+            start=end, end=end, is_complete=True, lhs=left_edge.nextsym()
+        ):
+            yield from fr.apply(chart, grammar, left_edge, right_edge)
+
+
+class FeatureCompleterRule(CompleterRule):
+    _fundamental_rule = FeatureCompleteFundamentalRule()
+
+
+class FeatureScannerRule(ScannerRule):
+    _fundamental_rule = FeatureCompleteFundamentalRule()
+
+
+class FeaturePredictorRule(FeatureTopDownPredictRule):
+    pass
+
+
+# ////////////////////////////////////////////////////////////
+# Incremental CFG Chart Parsers
+# ////////////////////////////////////////////////////////////
+
+EARLEY_STRATEGY = [
+    LeafInitRule(),
+    TopDownInitRule(),
+    CompleterRule(),
+    ScannerRule(),
+    PredictorRule(),
+]
+TD_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    TopDownInitRule(),
+    CachedTopDownPredictRule(),
+    CompleteFundamentalRule(),
+]
+BU_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    EmptyPredictRule(),
+    BottomUpPredictRule(),
+    CompleteFundamentalRule(),
+]
+BU_LC_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    EmptyPredictRule(),
+    BottomUpPredictCombineRule(),
+    CompleteFundamentalRule(),
+]
+
+LC_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    FilteredBottomUpPredictCombineRule(),
+    FilteredCompleteFundamentalRule(),
+]
+
+
+class IncrementalChartParser(ChartParser):
+    """
+    An *incremental* chart parser implementing Jay Earley's
+    parsing algorithm:
+
+    | For each index end in [0, 1, ..., N]:
+    |   For each edge such that edge.end = end:
+    |     If edge is incomplete and edge.next is not a part of speech:
+    |       Apply PredictorRule to edge
+    |     If edge is incomplete and edge.next is a part of speech:
+    |       Apply ScannerRule to edge
+    |     If edge is complete:
+    |       Apply CompleterRule to edge
+    | Return any complete parses in the chart
+    """
+
+    def __init__(
+        self,
+        grammar,
+        strategy=BU_LC_INCREMENTAL_STRATEGY,
+        trace=0,
+        trace_chart_width=50,
+        chart_class=IncrementalChart,
+    ):
+        """
+        Create a new Earley chart parser, that uses ``grammar`` to
+        parse texts.
+
+        :type grammar: CFG
+        :param grammar: The grammar used to parse texts.
+        :type trace: int
+        :param trace: The level of tracing that should be used when
+            parsing a text.  ``0`` will generate no tracing output;
+            and higher numbers will produce more verbose tracing
+            output.
+        :type trace_chart_width: int
+        :param trace_chart_width: The default total width reserved for
+            the chart in trace output.  The remainder of each line will
+            be used to display edges.
+        :param chart_class: The class that should be used to create
+            the charts used by this parser.
+        """
+        self._grammar = grammar
+        self._trace = trace
+        self._trace_chart_width = trace_chart_width
+        self._chart_class = chart_class
+
+        self._axioms = []
+        self._inference_rules = []
+        for rule in strategy:
+            if rule.NUM_EDGES == 0:
+                self._axioms.append(rule)
+            elif rule.NUM_EDGES == 1:
+                self._inference_rules.append(rule)
+            else:
+                raise ValueError(
+                    "Incremental inference rules must have " "NUM_EDGES == 0 or 1"
+                )
+
+    def chart_parse(self, tokens, trace=None):
+        if trace is None:
+            trace = self._trace
+        trace_new_edges = self._trace_new_edges
+
+        tokens = list(tokens)
+        self._grammar.check_coverage(tokens)
+        chart = self._chart_class(tokens)
+        grammar = self._grammar
+
+        # Width, for printing trace edges.
+        trace_edge_width = self._trace_chart_width // (chart.num_leaves() + 1)
+        if trace:
+            print(chart.pretty_format_leaves(trace_edge_width))
+
+        for axiom in self._axioms:
+            new_edges = list(axiom.apply(chart, grammar))
+            trace_new_edges(chart, axiom, new_edges, trace, trace_edge_width)
+
+        inference_rules = self._inference_rules
+        for end in range(chart.num_leaves() + 1):
+            if trace > 1:
+                print("\n* Processing queue:", end, "\n")
+            agenda = list(chart.select(end=end))
+            while agenda:
+                edge = agenda.pop()
+                for rule in inference_rules:
+                    new_edges = list(rule.apply(chart, grammar, edge))
+                    trace_new_edges(chart, rule, new_edges, trace, trace_edge_width)
+                    for new_edge in new_edges:
+                        if new_edge.end() == end:
+                            agenda.append(new_edge)
+
+        return chart
+
+
+class EarleyChartParser(IncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        IncrementalChartParser.__init__(self, grammar, EARLEY_STRATEGY, **parser_args)
+
+
+class IncrementalTopDownChartParser(IncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        IncrementalChartParser.__init__(
+            self, grammar, TD_INCREMENTAL_STRATEGY, **parser_args
+        )
+
+
+class IncrementalBottomUpChartParser(IncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        IncrementalChartParser.__init__(
+            self, grammar, BU_INCREMENTAL_STRATEGY, **parser_args
+        )
+
+
+class IncrementalBottomUpLeftCornerChartParser(IncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        IncrementalChartParser.__init__(
+            self, grammar, BU_LC_INCREMENTAL_STRATEGY, **parser_args
+        )
+
+
+class IncrementalLeftCornerChartParser(IncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        if not grammar.is_nonempty():
+            raise ValueError(
+                "IncrementalLeftCornerParser only works for grammars "
+                "without empty productions."
+            )
+        IncrementalChartParser.__init__(
+            self, grammar, LC_INCREMENTAL_STRATEGY, **parser_args
+        )
+
+
+# ////////////////////////////////////////////////////////////
+# Incremental FCFG Chart Parsers
+# ////////////////////////////////////////////////////////////
+
+EARLEY_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureTopDownInitRule(),
+    FeatureCompleterRule(),
+    FeatureScannerRule(),
+    FeaturePredictorRule(),
+]
+TD_INCREMENTAL_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureTopDownInitRule(),
+    FeatureTopDownPredictRule(),
+    FeatureCompleteFundamentalRule(),
+]
+BU_INCREMENTAL_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictRule(),
+    FeatureCompleteFundamentalRule(),
+]
+BU_LC_INCREMENTAL_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictCombineRule(),
+    FeatureCompleteFundamentalRule(),
+]
+
+
+class FeatureIncrementalChartParser(IncrementalChartParser, FeatureChartParser):
+    def __init__(
+        self,
+        grammar,
+        strategy=BU_LC_INCREMENTAL_FEATURE_STRATEGY,
+        trace_chart_width=20,
+        chart_class=FeatureIncrementalChart,
+        **parser_args
+    ):
+        IncrementalChartParser.__init__(
+            self,
+            grammar,
+            strategy=strategy,
+            trace_chart_width=trace_chart_width,
+            chart_class=chart_class,
+            **parser_args
+        )
+
+
+class FeatureEarleyChartParser(FeatureIncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, EARLEY_FEATURE_STRATEGY, **parser_args
+        )
+
+
+class FeatureIncrementalTopDownChartParser(FeatureIncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, TD_INCREMENTAL_FEATURE_STRATEGY, **parser_args
+        )
+
+
+class FeatureIncrementalBottomUpChartParser(FeatureIncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, BU_INCREMENTAL_FEATURE_STRATEGY, **parser_args
+        )
+
+
+class FeatureIncrementalBottomUpLeftCornerChartParser(FeatureIncrementalChartParser):
+    def __init__(self, grammar, **parser_args):
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, BU_LC_INCREMENTAL_FEATURE_STRATEGY, **parser_args
+        )
+
+
+# ////////////////////////////////////////////////////////////
+# Demonstration
+# ////////////////////////////////////////////////////////////
+
+
+def demo(
+    print_times=True,
+    print_grammar=False,
+    print_trees=True,
+    trace=2,
+    sent="I saw John with a dog with my cookie",
+    numparses=5,
+):
+    """
+    A demonstration of the Earley parsers.
+    """
+    import sys
+    import time
+
+    from nltk.parse.chart import demo_grammar
+
+    # The grammar for ChartParser and SteppingChartParser:
+    grammar = demo_grammar()
+    if print_grammar:
+        print("* Grammar")
+        print(grammar)
+
+    # Tokenize the sample sentence.
+    print("* Sentence:")
+    print(sent)
+    tokens = sent.split()
+    print(tokens)
+    print()
+
+    # Do the parsing.
+    earley = EarleyChartParser(grammar, trace=trace)
+    t = perf_counter()
+    chart = earley.chart_parse(tokens)
+    parses = list(chart.parses(grammar.start()))
+    t = perf_counter() - t
+
+    # Print results.
+    if numparses:
+        assert len(parses) == numparses, "Not all parses found"
+    if print_trees:
+        for tree in parses:
+            print(tree)
+    else:
+        print("Nr trees:", len(parses))
+    if print_times:
+        print("Time:", t)
+
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/evaluate.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/evaluate.py
@@ -0,0 +1,129 @@
+# Natural Language Toolkit: evaluation of dependency parser
+#
+# Author: Long Duong <longdt219@gmail.com>
+#
+# Copyright (C) 2001-2025 NLTK Project
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import unicodedata
+
+
+class DependencyEvaluator:
+    """
+    Class for measuring labelled and unlabelled attachment score for
+    dependency parsing. Note that the evaluation ignores punctuation.
+
+    >>> from nltk.parse import DependencyGraph, DependencyEvaluator
+
+    >>> gold_sent = DependencyGraph(\"""
+    ... Pierre  NNP     2       NMOD
+    ... Vinken  NNP     8       SUB
+    ... ,       ,       2       P
+    ... 61      CD      5       NMOD
+    ... years   NNS     6       AMOD
+    ... old     JJ      2       NMOD
+    ... ,       ,       2       P
+    ... will    MD      0       ROOT
+    ... join    VB      8       VC
+    ... the     DT      11      NMOD
+    ... board   NN      9       OBJ
+    ... as      IN      9       VMOD
+    ... a       DT      15      NMOD
+    ... nonexecutive    JJ      15      NMOD
+    ... director        NN      12      PMOD
+    ... Nov.    NNP     9       VMOD
+    ... 29      CD      16      NMOD
+    ... .       .       9       VMOD
+    ... \""")
+
+    >>> parsed_sent = DependencyGraph(\"""
+    ... Pierre  NNP     8       NMOD
+    ... Vinken  NNP     1       SUB
+    ... ,       ,       3       P
+    ... 61      CD      6       NMOD
+    ... years   NNS     6       AMOD
+    ... old     JJ      2       NMOD
+    ... ,       ,       3       AMOD
+    ... will    MD      0       ROOT
+    ... join    VB      8       VC
+    ... the     DT      11      AMOD
+    ... board   NN      9       OBJECT
+    ... as      IN      9       NMOD
+    ... a       DT      15      NMOD
+    ... nonexecutive    JJ      15      NMOD
+    ... director        NN      12      PMOD
+    ... Nov.    NNP     9       VMOD
+    ... 29      CD      16      NMOD
+    ... .       .       9       VMOD
+    ... \""")
+
+    >>> de = DependencyEvaluator([parsed_sent],[gold_sent])
+    >>> las, uas = de.eval()
+    >>> las
+    0.6
+    >>> uas
+    0.8
+    >>> abs(uas - 0.8) < 0.00001
+    True
+    """
+
+    def __init__(self, parsed_sents, gold_sents):
+        """
+        :param parsed_sents: the list of parsed_sents as the output of parser
+        :type parsed_sents: list(DependencyGraph)
+        """
+        self._parsed_sents = parsed_sents
+        self._gold_sents = gold_sents
+
+    def _remove_punct(self, inStr):
+        """
+        Function to remove punctuation from Unicode string.
+        :param input: the input string
+        :return: Unicode string after remove all punctuation
+        """
+        punc_cat = {"Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po"}
+        return "".join(x for x in inStr if unicodedata.category(x) not in punc_cat)
+
+    def eval(self):
+        """
+        Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS)
+
+        :return : tuple(float,float)
+        """
+        if len(self._parsed_sents) != len(self._gold_sents):
+            raise ValueError(
+                " Number of parsed sentence is different with number of gold sentence."
+            )
+
+        corr = 0
+        corrL = 0
+        total = 0
+
+        for i in range(len(self._parsed_sents)):
+            parsed_sent_nodes = self._parsed_sents[i].nodes
+            gold_sent_nodes = self._gold_sents[i].nodes
+
+            if len(parsed_sent_nodes) != len(gold_sent_nodes):
+                raise ValueError("Sentences must have equal length.")
+
+            for parsed_node_address, parsed_node in parsed_sent_nodes.items():
+                gold_node = gold_sent_nodes[parsed_node_address]
+
+                if parsed_node["word"] is None:
+                    continue
+                if parsed_node["word"] != gold_node["word"]:
+                    raise ValueError("Sentence sequence is not matched.")
+
+                # Ignore if word is punctuation by default
+                # if (parsed_sent[j]["word"] in string.punctuation):
+                if self._remove_punct(parsed_node["word"]) == "":
+                    continue
+
+                total += 1
+                if parsed_node["head"] == gold_node["head"]:
+                    corr += 1
+                    if parsed_node["rel"] == gold_node["rel"]:
+                        corrL += 1
+
+        return corrL / total, corr / total
--- a/backend/venv/Lib/site-packages/nltk/parse/featurechart.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/featurechart.py
@@ -0,0 +1,674 @@
+# Natural Language Toolkit: Chart Parser for Feature-Based Grammars
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Rob Speer <rspeer@mit.edu>
+#         Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Extension of chart parsing implementation to handle grammars with
+feature structures as nodes.
+"""
+from time import perf_counter
+
+from nltk.featstruct import TYPE, FeatStruct, find_variables, unify
+from nltk.grammar import (
+    CFG,
+    FeatStructNonterminal,
+    Nonterminal,
+    Production,
+    is_nonterminal,
+    is_terminal,
+)
+from nltk.parse.chart import (
+    BottomUpPredictCombineRule,
+    BottomUpPredictRule,
+    CachedTopDownPredictRule,
+    Chart,
+    ChartParser,
+    EdgeI,
+    EmptyPredictRule,
+    FundamentalRule,
+    LeafInitRule,
+    SingleEdgeFundamentalRule,
+    TopDownInitRule,
+    TreeEdge,
+)
+from nltk.sem import logic
+from nltk.tree import Tree
+
+# ////////////////////////////////////////////////////////////
+# Tree Edge
+# ////////////////////////////////////////////////////////////
+
+
+class FeatureTreeEdge(TreeEdge):
+    """
+    A specialized tree edge that allows shared variable bindings
+    between nonterminals on the left-hand side and right-hand side.
+
+    Each ``FeatureTreeEdge`` contains a set of ``bindings``, i.e., a
+    dictionary mapping from variables to values.  If the edge is not
+    complete, then these bindings are simply stored.  However, if the
+    edge is complete, then the constructor applies these bindings to
+    every nonterminal in the edge whose symbol implements the
+    interface ``SubstituteBindingsI``.
+    """
+
+    def __init__(self, span, lhs, rhs, dot=0, bindings=None):
+        """
+        Construct a new edge.  If the edge is incomplete (i.e., if
+        ``dot<len(rhs)``), then store the bindings as-is.  If the edge
+        is complete (i.e., if ``dot==len(rhs)``), then apply the
+        bindings to all nonterminals in ``lhs`` and ``rhs``, and then
+        clear the bindings.  See ``TreeEdge`` for a description of
+        the other arguments.
+        """
+        if bindings is None:
+            bindings = {}
+
+        # If the edge is complete, then substitute in the bindings,
+        # and then throw them away.  (If we didn't throw them away, we
+        # might think that 2 complete edges are different just because
+        # they have different bindings, even though all bindings have
+        # already been applied.)
+        if dot == len(rhs) and bindings:
+            lhs = self._bind(lhs, bindings)
+            rhs = [self._bind(elt, bindings) for elt in rhs]
+            bindings = {}
+
+        # Initialize the edge.
+        TreeEdge.__init__(self, span, lhs, rhs, dot)
+        self._bindings = bindings
+        self._comparison_key = (self._comparison_key, tuple(sorted(bindings.items())))
+
+    @staticmethod
+    def from_production(production, index):
+        """
+        :return: A new ``TreeEdge`` formed from the given production.
+            The new edge's left-hand side and right-hand side will
+            be taken from ``production``; its span will be
+            ``(index,index)``; and its dot position will be ``0``.
+        :rtype: TreeEdge
+        """
+        return FeatureTreeEdge(
+            span=(index, index), lhs=production.lhs(), rhs=production.rhs(), dot=0
+        )
+
+    def move_dot_forward(self, new_end, bindings=None):
+        """
+        :return: A new ``FeatureTreeEdge`` formed from this edge.
+            The new edge's dot position is increased by ``1``,
+            and its end index will be replaced by ``new_end``.
+        :rtype: FeatureTreeEdge
+        :param new_end: The new end index.
+        :type new_end: int
+        :param bindings: Bindings for the new edge.
+        :type bindings: dict
+        """
+        return FeatureTreeEdge(
+            span=(self._span[0], new_end),
+            lhs=self._lhs,
+            rhs=self._rhs,
+            dot=self._dot + 1,
+            bindings=bindings,
+        )
+
+    def _bind(self, nt, bindings):
+        if not isinstance(nt, FeatStructNonterminal):
+            return nt
+        return nt.substitute_bindings(bindings)
+
+    def next_with_bindings(self):
+        return self._bind(self.nextsym(), self._bindings)
+
+    def bindings(self):
+        """
+        Return a copy of this edge's bindings dictionary.
+        """
+        return self._bindings.copy()
+
+    def variables(self):
+        """
+        :return: The set of variables used by this edge.
+        :rtype: set(Variable)
+        """
+        return find_variables(
+            [self._lhs]
+            + list(self._rhs)
+            + list(self._bindings.keys())
+            + list(self._bindings.values()),
+            fs_class=FeatStruct,
+        )
+
+    def __str__(self):
+        if self.is_complete():
+            return super().__str__()
+        else:
+            bindings = "{%s}" % ", ".join(
+                "%s: %r" % item for item in sorted(self._bindings.items())
+            )
+            return f"{super().__str__()} {bindings}"
+
+
+# ////////////////////////////////////////////////////////////
+# A specialized Chart for feature grammars
+# ////////////////////////////////////////////////////////////
+
+# TODO: subsumes check when adding new edges
+
+
+class FeatureChart(Chart):
+    """
+    A Chart for feature grammars.
+    :see: ``Chart`` for more information.
+    """
+
+    def select(self, **restrictions):
+        """
+        Returns an iterator over the edges in this chart.
+        See ``Chart.select`` for more information about the
+        ``restrictions`` on the edges.
+        """
+        # If there are no restrictions, then return all edges.
+        if restrictions == {}:
+            return iter(self._edges)
+
+        # Find the index corresponding to the given restrictions.
+        restr_keys = sorted(restrictions.keys())
+        restr_keys = tuple(restr_keys)
+
+        # If it doesn't exist, then create it.
+        if restr_keys not in self._indexes:
+            self._add_index(restr_keys)
+
+        vals = tuple(
+            self._get_type_if_possible(restrictions[key]) for key in restr_keys
+        )
+        return iter(self._indexes[restr_keys].get(vals, []))
+
+    def _add_index(self, restr_keys):
+        """
+        A helper function for ``select``, which creates a new index for
+        a given set of attributes (aka restriction keys).
+        """
+        # Make sure it's a valid index.
+        for key in restr_keys:
+            if not hasattr(EdgeI, key):
+                raise ValueError("Bad restriction: %s" % key)
+
+        # Create the index.
+        index = self._indexes[restr_keys] = {}
+
+        # Add all existing edges to the index.
+        for edge in self._edges:
+            vals = tuple(
+                self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
+            )
+            index.setdefault(vals, []).append(edge)
+
+    def _register_with_indexes(self, edge):
+        """
+        A helper function for ``insert``, which registers the new
+        edge with all existing indexes.
+        """
+        for restr_keys, index in self._indexes.items():
+            vals = tuple(
+                self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
+            )
+            index.setdefault(vals, []).append(edge)
+
+    def _get_type_if_possible(self, item):
+        """
+        Helper function which returns the ``TYPE`` feature of the ``item``,
+        if it exists, otherwise it returns the ``item`` itself
+        """
+        if isinstance(item, dict) and TYPE in item:
+            return item[TYPE]
+        else:
+            return item
+
+    def parses(self, start, tree_class=Tree):
+        for edge in self.select(start=0, end=self._num_leaves):
+            if (
+                (isinstance(edge, FeatureTreeEdge))
+                and (edge.lhs()[TYPE] == start[TYPE])
+                and (unify(edge.lhs(), start, rename_vars=True))
+            ):
+                yield from self.trees(edge, complete=True, tree_class=tree_class)
+
+
+# ////////////////////////////////////////////////////////////
+# Fundamental Rule
+# ////////////////////////////////////////////////////////////
+
+
+class FeatureFundamentalRule(FundamentalRule):
+    r"""
+    A specialized version of the fundamental rule that operates on
+    nonterminals whose symbols are ``FeatStructNonterminal``s.  Rather
+    than simply comparing the nonterminals for equality, they are
+    unified.  Variable bindings from these unifications are collected
+    and stored in the chart using a ``FeatureTreeEdge``.  When a
+    complete edge is generated, these bindings are applied to all
+    nonterminals in the edge.
+
+    The fundamental rule states that:
+
+    - ``[A -> alpha \* B1 beta][i:j]``
+    - ``[B2 -> gamma \*][j:k]``
+
+    licenses the edge:
+
+    - ``[A -> alpha B3 \* beta][i:j]``
+
+    assuming that B1 and B2 can be unified to generate B3.
+    """
+
+    def apply(self, chart, grammar, left_edge, right_edge):
+        # Make sure the rule is applicable.
+        if not (
+            left_edge.end() == right_edge.start()
+            and left_edge.is_incomplete()
+            and right_edge.is_complete()
+            and isinstance(left_edge, FeatureTreeEdge)
+        ):
+            return
+        found = right_edge.lhs()
+        nextsym = left_edge.nextsym()
+        if isinstance(right_edge, FeatureTreeEdge):
+            if not is_nonterminal(nextsym):
+                return
+            if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]:
+                return
+            # Create a copy of the bindings.
+            bindings = left_edge.bindings()
+            # We rename vars here, because we don't want variables
+            # from the two different productions to match.
+            found = found.rename_variables(used_vars=left_edge.variables())
+            # Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to
+            # generate B3 (result).
+            result = unify(nextsym, found, bindings, rename_vars=False)
+            if result is None:
+                return
+        else:
+            if nextsym != found:
+                return
+            # Create a copy of the bindings.
+            bindings = left_edge.bindings()
+
+        # Construct the new edge.
+        new_edge = left_edge.move_dot_forward(right_edge.end(), bindings)
+
+        # Add it to the chart, with appropriate child pointers.
+        if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
+            yield new_edge
+
+
+class FeatureSingleEdgeFundamentalRule(SingleEdgeFundamentalRule):
+    """
+    A specialized version of the completer / single edge fundamental rule
+    that operates on nonterminals whose symbols are ``FeatStructNonterminal``.
+    Rather than simply comparing the nonterminals for equality, they are
+    unified.
+    """
+
+    _fundamental_rule = FeatureFundamentalRule()
+
+    def _apply_complete(self, chart, grammar, right_edge):
+        fr = self._fundamental_rule
+        for left_edge in chart.select(
+            end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs()
+        ):
+            yield from fr.apply(chart, grammar, left_edge, right_edge)
+
+    def _apply_incomplete(self, chart, grammar, left_edge):
+        fr = self._fundamental_rule
+        for right_edge in chart.select(
+            start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym()
+        ):
+            yield from fr.apply(chart, grammar, left_edge, right_edge)
+
+
+# ////////////////////////////////////////////////////////////
+# Top-Down Prediction
+# ////////////////////////////////////////////////////////////
+
+
+class FeatureTopDownInitRule(TopDownInitRule):
+    def apply(self, chart, grammar):
+        for prod in grammar.productions(lhs=grammar.start()):
+            new_edge = FeatureTreeEdge.from_production(prod, 0)
+            if chart.insert(new_edge, ()):
+                yield new_edge
+
+
+class FeatureTopDownPredictRule(CachedTopDownPredictRule):
+    r"""
+    A specialized version of the (cached) top down predict rule that operates
+    on nonterminals whose symbols are ``FeatStructNonterminal``.  Rather
+    than simply comparing the nonterminals for equality, they are
+    unified.
+
+    The top down expand rule states that:
+
+    - ``[A -> alpha \* B1 beta][i:j]``
+
+    licenses the edge:
+
+    - ``[B2 -> \* gamma][j:j]``
+
+    for each grammar production ``B2 -> gamma``, assuming that B1
+    and B2 can be unified.
+    """
+
+    def apply(self, chart, grammar, edge):
+        if edge.is_complete():
+            return
+        nextsym, index = edge.nextsym(), edge.end()
+        if not is_nonterminal(nextsym):
+            return
+
+        # If we've already applied this rule to an edge with the same
+        # next & end, and the chart & grammar have not changed, then
+        # just return (no new edges to add).
+        nextsym_with_bindings = edge.next_with_bindings()
+        done = self._done.get((nextsym_with_bindings, index), (None, None))
+        if done[0] is chart and done[1] is grammar:
+            return
+
+        for prod in grammar.productions(lhs=nextsym):
+            # If the left corner in the predicted production is
+            # leaf, it must match with the input.
+            if prod.rhs():
+                first = prod.rhs()[0]
+                if is_terminal(first):
+                    if index >= chart.num_leaves():
+                        continue
+                    if first != chart.leaf(index):
+                        continue
+
+            # We rename vars here, because we don't want variables
+            # from the two different productions to match.
+            if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True):
+                new_edge = FeatureTreeEdge.from_production(prod, edge.end())
+                if chart.insert(new_edge, ()):
+                    yield new_edge
+
+        # Record the fact that we've applied this rule.
+        self._done[nextsym_with_bindings, index] = (chart, grammar)
+
+
+# ////////////////////////////////////////////////////////////
+# Bottom-Up Prediction
+# ////////////////////////////////////////////////////////////
+
+
+class FeatureBottomUpPredictRule(BottomUpPredictRule):
+    def apply(self, chart, grammar, edge):
+        if edge.is_incomplete():
+            return
+        for prod in grammar.productions(rhs=edge.lhs()):
+            if isinstance(edge, FeatureTreeEdge):
+                _next = prod.rhs()[0]
+                if not is_nonterminal(_next):
+                    continue
+
+            new_edge = FeatureTreeEdge.from_production(prod, edge.start())
+            if chart.insert(new_edge, ()):
+                yield new_edge
+
+
+class FeatureBottomUpPredictCombineRule(BottomUpPredictCombineRule):
+    def apply(self, chart, grammar, edge):
+        if edge.is_incomplete():
+            return
+        found = edge.lhs()
+        for prod in grammar.productions(rhs=found):
+            bindings = {}
+            if isinstance(edge, FeatureTreeEdge):
+                _next = prod.rhs()[0]
+                if not is_nonterminal(_next):
+                    continue
+
+                # We rename vars here, because we don't want variables
+                # from the two different productions to match.
+                used_vars = find_variables(
+                    (prod.lhs(),) + prod.rhs(), fs_class=FeatStruct
+                )
+                found = found.rename_variables(used_vars=used_vars)
+
+                result = unify(_next, found, bindings, rename_vars=False)
+                if result is None:
+                    continue
+
+            new_edge = FeatureTreeEdge.from_production(
+                prod, edge.start()
+            ).move_dot_forward(edge.end(), bindings)
+            if chart.insert(new_edge, (edge,)):
+                yield new_edge
+
+
+class FeatureEmptyPredictRule(EmptyPredictRule):
+    def apply(self, chart, grammar):
+        for prod in grammar.productions(empty=True):
+            for index in range(chart.num_leaves() + 1):
+                new_edge = FeatureTreeEdge.from_production(prod, index)
+                if chart.insert(new_edge, ()):
+                    yield new_edge
+
+
+# ////////////////////////////////////////////////////////////
+# Feature Chart Parser
+# ////////////////////////////////////////////////////////////
+
+TD_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureTopDownInitRule(),
+    FeatureTopDownPredictRule(),
+    FeatureSingleEdgeFundamentalRule(),
+]
+BU_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictRule(),
+    FeatureSingleEdgeFundamentalRule(),
+]
+BU_LC_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictCombineRule(),
+    FeatureSingleEdgeFundamentalRule(),
+]
+
+
+class FeatureChartParser(ChartParser):
+    def __init__(
+        self,
+        grammar,
+        strategy=BU_LC_FEATURE_STRATEGY,
+        trace_chart_width=20,
+        chart_class=FeatureChart,
+        **parser_args,
+    ):
+        ChartParser.__init__(
+            self,
+            grammar,
+            strategy=strategy,
+            trace_chart_width=trace_chart_width,
+            chart_class=chart_class,
+            **parser_args,
+        )
+
+
+class FeatureTopDownChartParser(FeatureChartParser):
+    def __init__(self, grammar, **parser_args):
+        FeatureChartParser.__init__(self, grammar, TD_FEATURE_STRATEGY, **parser_args)
+
+
+class FeatureBottomUpChartParser(FeatureChartParser):
+    def __init__(self, grammar, **parser_args):
+        FeatureChartParser.__init__(self, grammar, BU_FEATURE_STRATEGY, **parser_args)
+
+
+class FeatureBottomUpLeftCornerChartParser(FeatureChartParser):
+    def __init__(self, grammar, **parser_args):
+        FeatureChartParser.__init__(
+            self, grammar, BU_LC_FEATURE_STRATEGY, **parser_args
+        )
+
+
+# ////////////////////////////////////////////////////////////
+# Instantiate Variable Chart
+# ////////////////////////////////////////////////////////////
+
+
+class InstantiateVarsChart(FeatureChart):
+    """
+    A specialized chart that 'instantiates' variables whose names
+    start with '@', by replacing them with unique new variables.
+    In particular, whenever a complete edge is added to the chart, any
+    variables in the edge's ``lhs`` whose names start with '@' will be
+    replaced by unique new ``Variable``.
+    """
+
+    def __init__(self, tokens):
+        FeatureChart.__init__(self, tokens)
+
+    def initialize(self):
+        self._instantiated = set()
+        FeatureChart.initialize(self)
+
+    def insert(self, edge, child_pointer_list):
+        if edge in self._instantiated:
+            return False
+        self.instantiate_edge(edge)
+        return FeatureChart.insert(self, edge, child_pointer_list)
+
+    def instantiate_edge(self, edge):
+        """
+        If the edge is a ``FeatureTreeEdge``, and it is complete,
+        then instantiate all variables whose names start with '@',
+        by replacing them with unique new variables.
+
+        Note that instantiation is done in-place, since the
+        parsing algorithms might already hold a reference to
+        the edge for future use.
+        """
+        # If the edge is a leaf, or is not complete, or is
+        # already in the chart, then just return it as-is.
+        if not isinstance(edge, FeatureTreeEdge):
+            return
+        if not edge.is_complete():
+            return
+        if edge in self._edge_to_cpls:
+            return
+
+        # Get a list of variables that need to be instantiated.
+        # If there are none, then return as-is.
+        inst_vars = self.inst_vars(edge)
+        if not inst_vars:
+            return
+
+        # Instantiate the edge!
+        self._instantiated.add(edge)
+        edge._lhs = edge.lhs().substitute_bindings(inst_vars)
+
+    def inst_vars(self, edge):
+        return {
+            var: logic.unique_variable()
+            for var in edge.lhs().variables()
+            if var.name.startswith("@")
+        }
+
+
+# ////////////////////////////////////////////////////////////
+# Demo
+# ////////////////////////////////////////////////////////////
+
+
+def demo_grammar():
+    from nltk.grammar import FeatureGrammar
+
+    return FeatureGrammar.fromstring(
+        """
+S  -> NP VP
+PP -> Prep NP
+NP -> NP PP
+VP -> VP PP
+VP -> Verb NP
+VP -> Verb
+NP -> Det[pl=?x] Noun[pl=?x]
+NP -> "John"
+NP -> "I"
+Det -> "the"
+Det -> "my"
+Det[-pl] -> "a"
+Noun[-pl] -> "dog"
+Noun[-pl] -> "cookie"
+Verb -> "ate"
+Verb -> "saw"
+Prep -> "with"
+Prep -> "under"
+"""
+    )
+
+
+def demo(
+    print_times=True,
+    print_grammar=True,
+    print_trees=True,
+    print_sentence=True,
+    trace=1,
+    parser=FeatureChartParser,
+    sent="I saw John with a dog with my cookie",
+):
+    import sys
+    import time
+
+    print()
+    grammar = demo_grammar()
+    if print_grammar:
+        print(grammar)
+        print()
+    print("*", parser.__name__)
+    if print_sentence:
+        print("Sentence:", sent)
+    tokens = sent.split()
+    t = perf_counter()
+    cp = parser(grammar, trace=trace)
+    chart = cp.chart_parse(tokens)
+    trees = list(chart.parses(grammar.start()))
+    if print_times:
+        print("Time: %s" % (perf_counter() - t))
+    if print_trees:
+        for tree in trees:
+            print(tree)
+    else:
+        print("Nr trees:", len(trees))
+
+
+def run_profile():
+    import profile
+
+    profile.run("for i in range(1): demo()", "/tmp/profile.out")
+    import pstats
+
+    p = pstats.Stats("/tmp/profile.out")
+    p.strip_dirs().sort_stats("time", "cum").print_stats(60)
+    p.strip_dirs().sort_stats("cum", "time").print_stats(60)
+
+
+if __name__ == "__main__":
+    from nltk.data import load
+
+    demo()
+    print()
+    grammar = load("grammars/book_grammars/feat0.fcfg")
+    cp = FeatureChartParser(grammar, trace=2)
+    sent = "Kim likes children"
+    tokens = sent.split()
+    trees = cp.parse(tokens)
+    for tree in trees:
+        print(tree)
--- a/backend/venv/Lib/site-packages/nltk/parse/generate.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/generate.py
@@ -0,0 +1,88 @@
+# Natural Language Toolkit: Generating from a CFG
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
+#         Eric Kafe <kafe.eric@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+#
+
+import itertools
+import sys
+
+from nltk.grammar import Nonterminal
+
+
+def generate(grammar, start=None, depth=None, n=None):
+    """
+    Generates an iterator of all sentences from a CFG.
+
+    :param grammar: The Grammar used to generate sentences.
+    :param start: The Nonterminal from which to start generate sentences.
+    :param depth: The maximal depth of the generated tree.
+    :param n: The maximum number of sentences to return.
+    :return: An iterator of lists of terminal tokens.
+    """
+    if not start:
+        start = grammar.start()
+    if depth is None:
+        # Safe default, assuming the grammar may be recursive:
+        depth = (sys.getrecursionlimit() // 3) - 3
+
+    iter = _generate_all(grammar, [start], depth)
+
+    if n:
+        iter = itertools.islice(iter, n)
+
+    return iter
+
+
+def _generate_all(grammar, items, depth):
+    if items:
+        try:
+            for frag1 in _generate_one(grammar, items[0], depth):
+                for frag2 in _generate_all(grammar, items[1:], depth):
+                    yield frag1 + frag2
+        except RecursionError as error:
+            # Helpful error message while still showing the recursion stack.
+            raise RuntimeError(
+                "The grammar has rule(s) that yield infinite recursion!\n\
+Eventually use a lower 'depth', or a higher 'sys.setrecursionlimit()'."
+            ) from error
+    else:
+        yield []
+
+
+def _generate_one(grammar, item, depth):
+    if depth > 0:
+        if isinstance(item, Nonterminal):
+            for prod in grammar.productions(lhs=item):
+                yield from _generate_all(grammar, prod.rhs(), depth - 1)
+        else:
+            yield [item]
+
+
+demo_grammar = """
+  S -> NP VP
+  NP -> Det N
+  PP -> P NP
+  VP -> 'slept' | 'saw' NP | 'walked' PP
+  Det -> 'the' | 'a'
+  N -> 'man' | 'park' | 'dog'
+  P -> 'in' | 'with'
+"""
+
+
+def demo(N=23):
+    from nltk.grammar import CFG
+
+    print("Generating the first %d sentences for demo grammar:" % (N,))
+    print(demo_grammar)
+    grammar = CFG.fromstring(demo_grammar)
+    for n, sent in enumerate(generate(grammar, n=N), 1):
+        print("%3d. %s" % (n, " ".join(sent)))
+
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/malt.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/malt.py
@@ -0,0 +1,393 @@
+# Natural Language Toolkit: Interface to MaltParser
+#
+# Author: Dan Garrette <dhgarrette@gmail.com>
+# Contributor: Liling Tan, Mustufain, osamamukhtar11
+#
+# Copyright (C) 2001-2025 NLTK Project
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import inspect
+import os
+import subprocess
+import sys
+import tempfile
+
+from nltk.data import ZipFilePathPointer
+from nltk.internals import find_dir, find_file, find_jars_within_path
+from nltk.parse.api import ParserI
+from nltk.parse.dependencygraph import DependencyGraph
+from nltk.parse.util import taggedsents_to_conll
+
+
+def malt_regex_tagger():
+    from nltk.tag import RegexpTagger
+
+    _tagger = RegexpTagger(
+        [
+            (r"\.$", "."),
+            (r"\,$", ","),
+            (r"\?$", "?"),  # fullstop, comma, Qmark
+            (r"\($", "("),
+            (r"\)$", ")"),  # round brackets
+            (r"\[$", "["),
+            (r"\]$", "]"),  # square brackets
+            (r"^-?[0-9]+(\.[0-9]+)?$", "CD"),  # cardinal numbers
+            (r"(The|the|A|a|An|an)$", "DT"),  # articles
+            (r"(He|he|She|she|It|it|I|me|Me|You|you)$", "PRP"),  # pronouns
+            (r"(His|his|Her|her|Its|its)$", "PRP$"),  # possessive
+            (r"(my|Your|your|Yours|yours)$", "PRP$"),  # possessive
+            (r"(on|On|in|In|at|At|since|Since)$", "IN"),  # time prepopsitions
+            (r"(for|For|ago|Ago|before|Before)$", "IN"),  # time prepopsitions
+            (r"(till|Till|until|Until)$", "IN"),  # time prepopsitions
+            (r"(by|By|beside|Beside)$", "IN"),  # space prepopsitions
+            (r"(under|Under|below|Below)$", "IN"),  # space prepopsitions
+            (r"(over|Over|above|Above)$", "IN"),  # space prepopsitions
+            (r"(across|Across|through|Through)$", "IN"),  # space prepopsitions
+            (r"(into|Into|towards|Towards)$", "IN"),  # space prepopsitions
+            (r"(onto|Onto|from|From)$", "IN"),  # space prepopsitions
+            (r".*able$", "JJ"),  # adjectives
+            (r".*ness$", "NN"),  # nouns formed from adjectives
+            (r".*ly$", "RB"),  # adverbs
+            (r".*s$", "NNS"),  # plural nouns
+            (r".*ing$", "VBG"),  # gerunds
+            (r".*ed$", "VBD"),  # past tense verbs
+            (r".*", "NN"),  # nouns (default)
+        ]
+    )
+    return _tagger.tag
+
+
+def find_maltparser(parser_dirname):
+    """
+    A module to find MaltParser .jar file and its dependencies.
+    """
+    if os.path.exists(parser_dirname):  # If a full path is given.
+        _malt_dir = parser_dirname
+    else:  # Try to find path to maltparser directory in environment variables.
+        _malt_dir = find_dir(parser_dirname, env_vars=("MALT_PARSER",))
+    # Checks that that the found directory contains all the necessary .jar
+    malt_dependencies = ["", "", ""]
+    _malt_jars = set(find_jars_within_path(_malt_dir))
+    _jars = {os.path.split(jar)[1] for jar in _malt_jars}
+    malt_dependencies = {"log4j.jar", "libsvm.jar", "liblinear-1.8.jar"}
+
+    assert malt_dependencies.issubset(_jars)
+    assert any(
+        filter(lambda i: i.startswith("maltparser-") and i.endswith(".jar"), _jars)
+    )
+    return list(_malt_jars)
+
+
+def find_malt_model(model_filename):
+    """
+    A module to find pre-trained MaltParser model.
+    """
+    if model_filename is None:
+        return "malt_temp.mco"
+    elif os.path.exists(model_filename):  # If a full path is given.
+        return model_filename
+    else:  # Try to find path to malt model in environment variables.
+        return find_file(model_filename, env_vars=("MALT_MODEL",), verbose=False)
+
+
+class MaltParser(ParserI):
+    """
+    A class for dependency parsing with MaltParser. The input is the paths to:
+    - (optionally) a maltparser directory
+    - (optionally) the path to a pre-trained MaltParser .mco model file
+    - (optionally) the tagger to use for POS tagging before parsing
+    - (optionally) additional Java arguments
+
+    Example:
+        >>> from nltk.parse import malt
+        >>> # With MALT_PARSER and MALT_MODEL environment set.
+        >>> mp = malt.MaltParser(model_filename='engmalt.linear-1.7.mco') # doctest: +SKIP
+        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
+        (shot I (elephant an) (in (pajamas my)) .)
+        >>> # Without MALT_PARSER and MALT_MODEL environment.
+        >>> mp = malt.MaltParser('/home/user/maltparser-1.9.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP
+        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
+        (shot I (elephant an) (in (pajamas my)) .)
+    """
+
+    def __init__(
+        self,
+        parser_dirname="",
+        model_filename=None,
+        tagger=None,
+        additional_java_args=None,
+    ):
+        """
+        An interface for parsing with the Malt Parser.
+
+        :param parser_dirname: The path to the maltparser directory that
+            contains the maltparser-1.x.jar
+        :type parser_dirname: str
+        :param model_filename: The name of the pre-trained model with .mco file
+            extension. If provided, training will not be required.
+            (see http://www.maltparser.org/mco/mco.html and
+            see http://www.patful.com/chalk/node/185)
+        :type model_filename: str
+        :param tagger: The tagger used to POS tag the raw string before
+            formatting to CONLL format. It should behave like `nltk.pos_tag`
+        :type tagger: function
+        :param additional_java_args: This is the additional Java arguments that
+            one can use when calling Maltparser, usually this is the heapsize
+            limits, e.g. `additional_java_args=['-Xmx1024m']`
+            (see https://javarevisited.blogspot.com/2011/05/java-heap-space-memory-size-jvm.html)
+        :type additional_java_args: list
+        """
+
+        # Find all the necessary jar files for MaltParser.
+        self.malt_jars = find_maltparser(parser_dirname)
+        # Initialize additional java arguments.
+        self.additional_java_args = (
+            additional_java_args if additional_java_args is not None else []
+        )
+        # Initialize model.
+        self.model = find_malt_model(model_filename)
+        self._trained = self.model != "malt_temp.mco"
+        # Set the working_dir parameters i.e. `-w` from MaltParser's option.
+        self.working_dir = tempfile.gettempdir()
+        # Initialize POS tagger.
+        self.tagger = tagger if tagger is not None else malt_regex_tagger()
+
+    def parse_tagged_sents(self, sentences, verbose=False, top_relation_label="null"):
+        """
+        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
+        sentences where each sentence is a list of (word, tag) tuples.
+        The sentences must have already been tokenized and tagged.
+
+        :param sentences: Input sentences to parse
+        :type sentence: list(list(tuple(str, str)))
+        :return: iter(iter(``DependencyGraph``)) the dependency graph
+            representation of each sentence
+        """
+        if not self._trained:
+            raise Exception("Parser has not been trained. Call train() first.")
+
+        with tempfile.NamedTemporaryFile(
+            prefix="malt_input.conll.", dir=self.working_dir, mode="w", delete=False
+        ) as input_file:
+            with tempfile.NamedTemporaryFile(
+                prefix="malt_output.conll.",
+                dir=self.working_dir,
+                mode="w",
+                delete=False,
+            ) as output_file:
+                # Convert list of sentences to CONLL format.
+                for line in taggedsents_to_conll(sentences):
+                    input_file.write(str(line))
+                input_file.close()
+
+                # Generate command to run maltparser.
+                cmd = self.generate_malt_command(
+                    input_file.name, output_file.name, mode="parse"
+                )
+
+                # This is a maltparser quirk, it needs to be run
+                # where the model file is. otherwise it goes into an awkward
+                # missing .jars or strange -w working_dir problem.
+                _current_path = os.getcwd()  # Remembers the current path.
+                try:  # Change to modelfile path
+                    os.chdir(os.path.split(self.model)[0])
+                except:
+                    pass
+                ret = self._execute(cmd, verbose)  # Run command.
+                os.chdir(_current_path)  # Change back to current path.
+
+                if ret != 0:
+                    raise Exception(
+                        "MaltParser parsing (%s) failed with exit "
+                        "code %d" % (" ".join(cmd), ret)
+                    )
+
+                # Must return iter(iter(Tree))
+                with open(output_file.name) as infile:
+                    for tree_str in infile.read().split("\n\n"):
+                        yield (
+                            iter(
+                                [
+                                    DependencyGraph(
+                                        tree_str, top_relation_label=top_relation_label
+                                    )
+                                ]
+                            )
+                        )
+
+        os.remove(input_file.name)
+        os.remove(output_file.name)
+
+    def parse_sents(self, sentences, verbose=False, top_relation_label="null"):
+        """
+        Use MaltParser to parse multiple sentences.
+        Takes a list of sentences, where each sentence is a list of words.
+        Each sentence will be automatically tagged with this
+        MaltParser instance's tagger.
+
+        :param sentences: Input sentences to parse
+        :type sentence: list(list(str))
+        :return: iter(DependencyGraph)
+        """
+        tagged_sentences = (self.tagger(sentence) for sentence in sentences)
+        return self.parse_tagged_sents(
+            tagged_sentences, verbose, top_relation_label=top_relation_label
+        )
+
+    def generate_malt_command(self, inputfilename, outputfilename=None, mode=None):
+        """
+        This function generates the maltparser command use at the terminal.
+
+        :param inputfilename: path to the input file
+        :type inputfilename: str
+        :param outputfilename: path to the output file
+        :type outputfilename: str
+        """
+
+        cmd = ["java"]
+        cmd += self.additional_java_args  # Adds additional java arguments
+        # Joins classpaths with ";" if on Windows and on Linux/Mac use ":"
+        classpaths_separator = ";" if sys.platform.startswith("win") else ":"
+        cmd += [
+            "-cp",
+            classpaths_separator.join(self.malt_jars),
+        ]  # Adds classpaths for jars
+        cmd += ["org.maltparser.Malt"]  # Adds the main function.
+
+        # Adds the model file.
+        if os.path.exists(self.model):  # when parsing
+            cmd += ["-c", os.path.split(self.model)[-1]]
+        else:  # when learning
+            cmd += ["-c", self.model]
+
+        cmd += ["-i", inputfilename]
+        if mode == "parse":
+            cmd += ["-o", outputfilename]
+        cmd += ["-m", mode]  # mode use to generate parses.
+        return cmd
+
+    @staticmethod
+    def _execute(cmd, verbose=False):
+        output = None if verbose else subprocess.PIPE
+        p = subprocess.Popen(cmd, stdout=output, stderr=output)
+        return p.wait()
+
+    def train(self, depgraphs, verbose=False):
+        """
+        Train MaltParser from a list of ``DependencyGraph`` objects
+
+        :param depgraphs: list of ``DependencyGraph`` objects for training input data
+        :type depgraphs: DependencyGraph
+        """
+
+        # Write the conll_str to malt_train.conll file in /tmp/
+        with tempfile.NamedTemporaryFile(
+            prefix="malt_train.conll.", dir=self.working_dir, mode="w", delete=False
+        ) as input_file:
+            input_str = "\n".join(dg.to_conll(10) for dg in depgraphs)
+            input_file.write(str(input_str))
+        # Trains the model with the malt_train.conll
+        self.train_from_file(input_file.name, verbose=verbose)
+        # Removes the malt_train.conll once training finishes.
+        os.remove(input_file.name)
+
+    def train_from_file(self, conll_file, verbose=False):
+        """
+        Train MaltParser from a file
+        :param conll_file: str for the filename of the training input data
+        :type conll_file: str
+        """
+
+        # If conll_file is a ZipFilePathPointer,
+        # then we need to do some extra massaging
+        if isinstance(conll_file, ZipFilePathPointer):
+            with tempfile.NamedTemporaryFile(
+                prefix="malt_train.conll.", dir=self.working_dir, mode="w", delete=False
+            ) as input_file:
+                with conll_file.open() as conll_input_file:
+                    conll_str = conll_input_file.read()
+                    input_file.write(str(conll_str))
+                return self.train_from_file(input_file.name, verbose=verbose)
+
+        # Generate command to run maltparser.
+        cmd = self.generate_malt_command(conll_file, mode="learn")
+        ret = self._execute(cmd, verbose)
+        if ret != 0:
+            raise Exception(
+                "MaltParser training (%s) failed with exit "
+                "code %d" % (" ".join(cmd), ret)
+            )
+        self._trained = True
+
+
+if __name__ == "__main__":
+    """
+    A demonstration function to show how NLTK users can use the malt parser API.
+
+    >>> from nltk import pos_tag
+    >>> assert 'MALT_PARSER' in os.environ, str(
+    ... "Please set MALT_PARSER in your global environment, e.g.:\n"
+    ... "$ export MALT_PARSER='/home/user/maltparser-1.9.2/'")
+    >>>
+    >>> assert 'MALT_MODEL' in os.environ, str(
+    ... "Please set MALT_MODEL in your global environment, e.g.:\n"
+    ... "$ export MALT_MODEL='/home/user/engmalt.linear-1.7.mco'")
+    >>>
+    >>> _dg1_str = str("1    John    _    NNP   _    _    2    SUBJ    _    _\n"
+    ...             "2    sees    _    VB    _    _    0    ROOT    _    _\n"
+    ...             "3    a       _    DT    _    _    4    SPEC    _    _\n"
+    ...             "4    dog     _    NN    _    _    2    OBJ     _    _\n"
+    ...             "5    .     _    .    _    _    2    PUNCT     _    _\n")
+    >>>
+    >>>
+    >>> _dg2_str  = str("1    John    _    NNP   _    _    2    SUBJ    _    _\n"
+    ...             "2    walks   _    VB    _    _    0    ROOT    _    _\n"
+    ...             "3    .     _    .    _    _    2    PUNCT     _    _\n")
+    >>> dg1 = DependencyGraph(_dg1_str)
+    >>> dg2 = DependencyGraph(_dg2_str)
+    >>> # Initialize a MaltParser object
+    >>> mp = MaltParser()
+    >>>
+    >>> # Trains a model.
+    >>> mp.train([dg1,dg2], verbose=False)
+    >>> sent1 = ['John','sees','Mary', '.']
+    >>> sent2 = ['John', 'walks', 'a', 'dog', '.']
+    >>>
+    >>> # Parse a single sentence.
+    >>> parsed_sent1 = mp.parse_one(sent1)
+    >>> parsed_sent2 = mp.parse_one(sent2)
+    >>> print(parsed_sent1.tree())
+    (sees John Mary .)
+    >>> print(parsed_sent2.tree())
+    (walks John (dog a) .)
+    >>>
+    >>> # Parsing multiple sentences.
+    >>> sentences = [sent1,sent2]
+    >>> parsed_sents = mp.parse_sents(sentences)
+    >>> print(next(next(parsed_sents)).tree())
+    (sees John Mary .)
+    >>> print(next(next(parsed_sents)).tree())
+    (walks John (dog a) .)
+    >>>
+    >>> # Initialize a MaltParser object with an English pre-trained model.
+    >>> parser_dirname = 'maltparser-1.9.2'
+    >>> model_name = 'engmalt.linear-1.7.mco'
+    >>> mp = MaltParser(parser_dirname=parser_dirname, model_filename=model_name, tagger=pos_tag)
+    >>> sent1 = 'I shot an elephant in my pajamas .'.split()
+    >>> sent2 = 'Time flies like banana .'.split()
+    >>> # Parse a single sentence.
+    >>> print(mp.parse_one(sent1).tree())
+    (shot I (elephant an) (in (pajamas my)) .)
+    # Parsing multiple sentences
+    >>> sentences = [sent1,sent2]
+    >>> parsed_sents = mp.parse_sents(sentences)
+    >>> print(next(next(parsed_sents)).tree())
+    (shot I (elephant an) (in (pajamas my)) .)
+    >>> print(next(next(parsed_sents)).tree())
+    (flies Time (like banana) .)
+    """
+
+    import doctest
+
+    doctest.testmod()
--- a/backend/venv/Lib/site-packages/nltk/parse/nonprojectivedependencyparser.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/nonprojectivedependencyparser.py
@@ -0,0 +1,772 @@
+# Natural Language Toolkit: Dependency Grammars
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Jason Narad <jason.narad@gmail.com>
+#
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+#
+
+import logging
+import math
+
+from nltk.parse.dependencygraph import DependencyGraph
+
+logger = logging.getLogger(__name__)
+
+#################################################################
+# DependencyScorerI - Interface for Graph-Edge Weight Calculation
+#################################################################
+
+
+class DependencyScorerI:
+    """
+    A scorer for calculated the weights on the edges of a weighted
+    dependency graph.  This is used by a
+    ``ProbabilisticNonprojectiveParser`` to initialize the edge
+    weights of a ``DependencyGraph``.  While typically this would be done
+    by training a binary classifier, any class that can return a
+    multidimensional list representation of the edge weights can
+    implement this interface.  As such, it has no necessary
+    fields.
+    """
+
+    def __init__(self):
+        if self.__class__ == DependencyScorerI:
+            raise TypeError("DependencyScorerI is an abstract interface")
+
+    def train(self, graphs):
+        """
+        :type graphs: list(DependencyGraph)
+        :param graphs: A list of dependency graphs to train the scorer.
+            Typically the edges present in the graphs can be used as
+            positive training examples, and the edges not present as negative
+            examples.
+        """
+        raise NotImplementedError()
+
+    def score(self, graph):
+        """
+        :type graph: DependencyGraph
+        :param graph: A dependency graph whose set of edges need to be
+            scored.
+        :rtype: A three-dimensional list of numbers.
+        :return: The score is returned in a multidimensional(3) list, such
+            that the outer-dimension refers to the head, and the
+            inner-dimension refers to the dependencies.  For instance,
+            scores[0][1] would reference the list of scores corresponding to
+            arcs from node 0 to node 1.  The node's 'address' field can be used
+            to determine its number identification.
+
+        For further illustration, a score list corresponding to Fig.2 of
+        Keith Hall's 'K-best Spanning Tree Parsing' paper::
+
+              scores = [[[], [5],  [1],  [1]],
+                       [[], [],   [11], [4]],
+                       [[], [10], [],   [5]],
+                       [[], [8],  [8],  []]]
+
+        When used in conjunction with a MaxEntClassifier, each score would
+        correspond to the confidence of a particular edge being classified
+        with the positive training examples.
+        """
+        raise NotImplementedError()
+
+
+#################################################################
+# NaiveBayesDependencyScorer
+#################################################################
+
+
+class NaiveBayesDependencyScorer(DependencyScorerI):
+    """
+    A dependency scorer built around a MaxEnt classifier.  In this
+    particular class that classifier is a ``NaiveBayesClassifier``.
+    It uses head-word, head-tag, child-word, and child-tag features
+    for classification.
+
+    >>> from nltk.parse.dependencygraph import DependencyGraph, conll_data2
+
+    >>> graphs = [DependencyGraph(entry) for entry in conll_data2.split('\\n\\n') if entry]
+    >>> npp = ProbabilisticNonprojectiveParser()
+    >>> npp.train(graphs, NaiveBayesDependencyScorer())
+    >>> parses = npp.parse(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc'])
+    >>> len(list(parses))
+    1
+
+    """
+
+    def __init__(self):
+        pass  # Do nothing without throwing error
+
+    def train(self, graphs):
+        """
+        Trains a ``NaiveBayesClassifier`` using the edges present in
+        graphs list as positive examples, the edges not present as
+        negative examples.  Uses a feature vector of head-word,
+        head-tag, child-word, and child-tag.
+
+        :type graphs: list(DependencyGraph)
+        :param graphs: A list of dependency graphs to train the scorer.
+        """
+
+        from nltk.classify import NaiveBayesClassifier
+
+        # Create training labeled training examples
+        labeled_examples = []
+        for graph in graphs:
+            for head_node in graph.nodes.values():
+                for child_index, child_node in graph.nodes.items():
+                    if child_index in head_node["deps"]:
+                        label = "T"
+                    else:
+                        label = "F"
+                    labeled_examples.append(
+                        (
+                            dict(
+                                a=head_node["word"],
+                                b=head_node["tag"],
+                                c=child_node["word"],
+                                d=child_node["tag"],
+                            ),
+                            label,
+                        )
+                    )
+
+        self.classifier = NaiveBayesClassifier.train(labeled_examples)
+
+    def score(self, graph):
+        """
+        Converts the graph into a feature-based representation of
+        each edge, and then assigns a score to each based on the
+        confidence of the classifier in assigning it to the
+        positive label.  Scores are returned in a multidimensional list.
+
+        :type graph: DependencyGraph
+        :param graph: A dependency graph to score.
+        :rtype: 3 dimensional list
+        :return: Edge scores for the graph parameter.
+        """
+        # Convert graph to feature representation
+        edges = []
+        for head_node in graph.nodes.values():
+            for child_node in graph.nodes.values():
+                edges.append(
+                    dict(
+                        a=head_node["word"],
+                        b=head_node["tag"],
+                        c=child_node["word"],
+                        d=child_node["tag"],
+                    )
+                )
+
+        # Score edges
+        edge_scores = []
+        row = []
+        count = 0
+        for pdist in self.classifier.prob_classify_many(edges):
+            logger.debug("%.4f %.4f", pdist.prob("T"), pdist.prob("F"))
+            # smoothing in case the probability = 0
+            row.append([math.log(pdist.prob("T") + 0.00000000001)])
+            count += 1
+            if count == len(graph.nodes):
+                edge_scores.append(row)
+                row = []
+                count = 0
+        return edge_scores
+
+
+#################################################################
+# A Scorer for Demo Purposes
+#################################################################
+# A short class necessary to show parsing example from paper
+class DemoScorer(DependencyScorerI):
+    def train(self, graphs):
+        print("Training...")
+
+    def score(self, graph):
+        # scores for Keith Hall 'K-best Spanning Tree Parsing' paper
+        return [
+            [[], [5], [1], [1]],
+            [[], [], [11], [4]],
+            [[], [10], [], [5]],
+            [[], [8], [8], []],
+        ]
+
+
+#################################################################
+# Non-Projective Probabilistic Parsing
+#################################################################
+
+
+class ProbabilisticNonprojectiveParser:
+    """A probabilistic non-projective dependency parser.
+
+    Nonprojective dependencies allows for "crossing branches" in the parse tree
+    which is necessary for representing particular linguistic phenomena, or even
+    typical parses in some languages.  This parser follows the MST parsing
+    algorithm, outlined in McDonald(2005), which likens the search for the best
+    non-projective parse to finding the maximum spanning tree in a weighted
+    directed graph.
+
+    >>> class Scorer(DependencyScorerI):
+    ...     def train(self, graphs):
+    ...         pass
+    ...
+    ...     def score(self, graph):
+    ...         return [
+    ...             [[], [5],  [1],  [1]],
+    ...             [[], [],   [11], [4]],
+    ...             [[], [10], [],   [5]],
+    ...             [[], [8],  [8],  []],
+    ...         ]
+
+
+    >>> npp = ProbabilisticNonprojectiveParser()
+    >>> npp.train([], Scorer())
+
+    >>> parses = npp.parse(['v1', 'v2', 'v3'], [None, None, None])
+    >>> len(list(parses))
+    1
+
+    Rule based example
+
+    >>> from nltk.grammar import DependencyGrammar
+
+    >>> grammar = DependencyGrammar.fromstring('''
+    ... 'taught' -> 'play' | 'man'
+    ... 'man' -> 'the' | 'in'
+    ... 'in' -> 'corner'
+    ... 'corner' -> 'the'
+    ... 'play' -> 'golf' | 'dachshund' | 'to'
+    ... 'dachshund' -> 'his'
+    ... ''')
+
+    >>> ndp = NonprojectiveDependencyParser(grammar)
+    >>> parses = ndp.parse(['the', 'man', 'in', 'the', 'corner', 'taught', 'his', 'dachshund', 'to', 'play', 'golf'])
+    >>> len(list(parses))
+    4
+
+    """
+
+    def __init__(self):
+        """
+        Creates a new non-projective parser.
+        """
+        logging.debug("initializing prob. nonprojective...")
+
+    def train(self, graphs, dependency_scorer):
+        """
+        Trains a ``DependencyScorerI`` from a set of ``DependencyGraph`` objects,
+        and establishes this as the parser's scorer.  This is used to
+        initialize the scores on a ``DependencyGraph`` during the parsing
+        procedure.
+
+        :type graphs: list(DependencyGraph)
+        :param graphs: A list of dependency graphs to train the scorer.
+        :type dependency_scorer: DependencyScorerI
+        :param dependency_scorer: A scorer which implements the
+            ``DependencyScorerI`` interface.
+        """
+        self._scorer = dependency_scorer
+        self._scorer.train(graphs)
+
+    def initialize_edge_scores(self, graph):
+        """
+        Assigns a score to every edge in the ``DependencyGraph`` graph.
+        These scores are generated via the parser's scorer which
+        was assigned during the training process.
+
+        :type graph: DependencyGraph
+        :param graph: A dependency graph to assign scores to.
+        """
+        self.scores = self._scorer.score(graph)
+
+    def collapse_nodes(self, new_node, cycle_path, g_graph, b_graph, c_graph):
+        """
+        Takes a list of nodes that have been identified to belong to a cycle,
+        and collapses them into on larger node.  The arcs of all nodes in
+        the graph must be updated to account for this.
+
+        :type new_node: Node.
+        :param new_node: A Node (Dictionary) to collapse the cycle nodes into.
+        :type cycle_path: A list of integers.
+        :param cycle_path: A list of node addresses, each of which is in the cycle.
+        :type g_graph, b_graph, c_graph: DependencyGraph
+        :param g_graph, b_graph, c_graph: Graphs which need to be updated.
+        """
+        logger.debug("Collapsing nodes...")
+        # Collapse all cycle nodes into v_n+1 in G_Graph
+        for cycle_node_index in cycle_path:
+            g_graph.remove_by_address(cycle_node_index)
+        g_graph.add_node(new_node)
+        g_graph.redirect_arcs(cycle_path, new_node["address"])
+
+    def update_edge_scores(self, new_node, cycle_path):
+        """
+        Updates the edge scores to reflect a collapse operation into
+        new_node.
+
+        :type new_node: A Node.
+        :param new_node: The node which cycle nodes are collapsed into.
+        :type cycle_path: A list of integers.
+        :param cycle_path: A list of node addresses that belong to the cycle.
+        """
+        logger.debug("cycle %s", cycle_path)
+
+        cycle_path = self.compute_original_indexes(cycle_path)
+
+        logger.debug("old cycle %s", cycle_path)
+        logger.debug("Prior to update: %s", self.scores)
+
+        for i, row in enumerate(self.scores):
+            for j, column in enumerate(self.scores[i]):
+                logger.debug(self.scores[i][j])
+                if j in cycle_path and i not in cycle_path and self.scores[i][j]:
+                    subtract_val = self.compute_max_subtract_score(j, cycle_path)
+
+                    logger.debug("%s - %s", self.scores[i][j], subtract_val)
+
+                    new_vals = []
+                    for cur_val in self.scores[i][j]:
+                        new_vals.append(cur_val - subtract_val)
+
+                    self.scores[i][j] = new_vals
+
+        for i, row in enumerate(self.scores):
+            for j, cell in enumerate(self.scores[i]):
+                if i in cycle_path and j in cycle_path:
+                    self.scores[i][j] = []
+
+        logger.debug("After update: %s", self.scores)
+
+    def compute_original_indexes(self, new_indexes):
+        """
+        As nodes are collapsed into others, they are replaced
+        by the new node in the graph, but it's still necessary
+        to keep track of what these original nodes were.  This
+        takes a list of node addresses and replaces any collapsed
+        node addresses with their original addresses.
+
+        :type new_indexes: A list of integers.
+        :param new_indexes: A list of node addresses to check for
+            subsumed nodes.
+        """
+        swapped = True
+        while swapped:
+            originals = []
+            swapped = False
+            for new_index in new_indexes:
+                if new_index in self.inner_nodes:
+                    for old_val in self.inner_nodes[new_index]:
+                        if old_val not in originals:
+                            originals.append(old_val)
+                            swapped = True
+                else:
+                    originals.append(new_index)
+            new_indexes = originals
+        return new_indexes
+
+    def compute_max_subtract_score(self, column_index, cycle_indexes):
+        """
+        When updating scores the score of the highest-weighted incoming
+        arc is subtracted upon collapse.  This returns the correct
+        amount to subtract from that edge.
+
+        :type column_index: integer.
+        :param column_index: A index representing the column of incoming arcs
+            to a particular node being updated
+        :type cycle_indexes: A list of integers.
+        :param cycle_indexes: Only arcs from cycle nodes are considered.  This
+            is a list of such nodes addresses.
+        """
+        max_score = -100000
+        for row_index in cycle_indexes:
+            for subtract_val in self.scores[row_index][column_index]:
+                if subtract_val > max_score:
+                    max_score = subtract_val
+        return max_score
+
+    def best_incoming_arc(self, node_index):
+        """
+        Returns the source of the best incoming arc to the
+        node with address: node_index
+
+        :type node_index: integer.
+        :param node_index: The address of the 'destination' node,
+            the node that is arced to.
+        """
+        originals = self.compute_original_indexes([node_index])
+        logger.debug("originals: %s", originals)
+
+        max_arc = None
+        max_score = None
+        for row_index in range(len(self.scores)):
+            for col_index in range(len(self.scores[row_index])):
+                if col_index in originals and (
+                    max_score is None or self.scores[row_index][col_index] > max_score
+                ):
+                    max_score = self.scores[row_index][col_index]
+                    max_arc = row_index
+                    logger.debug("%s, %s", row_index, col_index)
+
+        logger.debug(max_score)
+
+        for key in self.inner_nodes:
+            replaced_nodes = self.inner_nodes[key]
+            if max_arc in replaced_nodes:
+                return key
+
+        return max_arc
+
+    def original_best_arc(self, node_index):
+        originals = self.compute_original_indexes([node_index])
+        max_arc = None
+        max_score = None
+        max_orig = None
+        for row_index in range(len(self.scores)):
+            for col_index in range(len(self.scores[row_index])):
+                if col_index in originals and (
+                    max_score is None or self.scores[row_index][col_index] > max_score
+                ):
+                    max_score = self.scores[row_index][col_index]
+                    max_arc = row_index
+                    max_orig = col_index
+        return [max_arc, max_orig]
+
+    def parse(self, tokens, tags):
+        """
+        Parses a list of tokens in accordance to the MST parsing algorithm
+        for non-projective dependency parses.  Assumes that the tokens to
+        be parsed have already been tagged and those tags are provided.  Various
+        scoring methods can be used by implementing the ``DependencyScorerI``
+        interface and passing it to the training algorithm.
+
+        :type tokens: list(str)
+        :param tokens: A list of words or punctuation to be parsed.
+        :type tags: list(str)
+        :param tags: A list of tags corresponding by index to the words in the tokens list.
+        :return: An iterator of non-projective parses.
+        :rtype: iter(DependencyGraph)
+        """
+        self.inner_nodes = {}
+
+        # Initialize g_graph
+        g_graph = DependencyGraph()
+        for index, token in enumerate(tokens):
+            g_graph.nodes[index + 1].update(
+                {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1}
+            )
+
+        # Fully connect non-root nodes in g_graph
+        g_graph.connect_graph()
+        original_graph = DependencyGraph()
+        for index, token in enumerate(tokens):
+            original_graph.nodes[index + 1].update(
+                {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1}
+            )
+
+        b_graph = DependencyGraph()
+        c_graph = DependencyGraph()
+
+        for index, token in enumerate(tokens):
+            c_graph.nodes[index + 1].update(
+                {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1}
+            )
+
+        # Assign initial scores to g_graph edges
+        self.initialize_edge_scores(g_graph)
+        logger.debug(self.scores)
+        # Initialize a list of unvisited vertices (by node address)
+        unvisited_vertices = [vertex["address"] for vertex in c_graph.nodes.values()]
+        # Iterate over unvisited vertices
+        nr_vertices = len(tokens)
+        betas = {}
+        while unvisited_vertices:
+            # Mark current node as visited
+            current_vertex = unvisited_vertices.pop(0)
+            logger.debug("current_vertex: %s", current_vertex)
+            # Get corresponding node n_i to vertex v_i
+            current_node = g_graph.get_by_address(current_vertex)
+            logger.debug("current_node: %s", current_node)
+            # Get best in-edge node b for current node
+            best_in_edge = self.best_incoming_arc(current_vertex)
+            betas[current_vertex] = self.original_best_arc(current_vertex)
+            logger.debug("best in arc: %s --> %s", best_in_edge, current_vertex)
+            # b_graph = Union(b_graph, b)
+            for new_vertex in [current_vertex, best_in_edge]:
+                b_graph.nodes[new_vertex].update(
+                    {"word": "TEMP", "rel": "NTOP", "address": new_vertex}
+                )
+            b_graph.add_arc(best_in_edge, current_vertex)
+            # Beta(current node) = b  - stored for parse recovery
+            # If b_graph contains a cycle, collapse it
+            cycle_path = b_graph.contains_cycle()
+            if cycle_path:
+                # Create a new node v_n+1 with address = len(nodes) + 1
+                new_node = {"word": "NONE", "rel": "NTOP", "address": nr_vertices + 1}
+                # c_graph = Union(c_graph, v_n+1)
+                c_graph.add_node(new_node)
+                # Collapse all nodes in cycle C into v_n+1
+                self.update_edge_scores(new_node, cycle_path)
+                self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph)
+                for cycle_index in cycle_path:
+                    c_graph.add_arc(new_node["address"], cycle_index)
+                    # self.replaced_by[cycle_index] = new_node['address']
+
+                self.inner_nodes[new_node["address"]] = cycle_path
+
+                # Add v_n+1 to list of unvisited vertices
+                unvisited_vertices.insert(0, nr_vertices + 1)
+
+                # increment # of nodes counter
+                nr_vertices += 1
+
+                # Remove cycle nodes from b_graph; B = B - cycle c
+                for cycle_node_address in cycle_path:
+                    b_graph.remove_by_address(cycle_node_address)
+
+            logger.debug("g_graph: %s", g_graph)
+            logger.debug("b_graph: %s", b_graph)
+            logger.debug("c_graph: %s", c_graph)
+            logger.debug("Betas: %s", betas)
+            logger.debug("replaced nodes %s", self.inner_nodes)
+
+        # Recover parse tree
+        logger.debug("Final scores: %s", self.scores)
+
+        logger.debug("Recovering parse...")
+        for i in range(len(tokens) + 1, nr_vertices + 1):
+            betas[betas[i][1]] = betas[i]
+
+        logger.debug("Betas: %s", betas)
+        for node in original_graph.nodes.values():
+            # TODO: It's dangerous to assume that deps it a dictionary
+            # because it's a default dictionary. Ideally, here we should not
+            # be concerned how dependencies are stored inside of a dependency
+            # graph.
+            node["deps"] = {}
+        for i in range(1, len(tokens) + 1):
+            original_graph.add_arc(betas[i][0], betas[i][1])
+
+        logger.debug("Done.")
+        yield original_graph
+
+
+#################################################################
+# Rule-based Non-Projective Parser
+#################################################################
+
+
+class NonprojectiveDependencyParser:
+    """
+    A non-projective, rule-based, dependency parser.  This parser
+    will return the set of all possible non-projective parses based on
+    the word-to-word relations defined in the parser's dependency
+    grammar, and will allow the branches of the parse tree to cross
+    in order to capture a variety of linguistic phenomena that a
+    projective parser will not.
+    """
+
+    def __init__(self, dependency_grammar):
+        """
+        Creates a new ``NonprojectiveDependencyParser``.
+
+        :param dependency_grammar: a grammar of word-to-word relations.
+        :type dependency_grammar: DependencyGrammar
+        """
+        self._grammar = dependency_grammar
+
+    def parse(self, tokens):
+        """
+        Parses the input tokens with respect to the parser's grammar.  Parsing
+        is accomplished by representing the search-space of possible parses as
+        a fully-connected directed graph.  Arcs that would lead to ungrammatical
+        parses are removed and a lattice is constructed of length n, where n is
+        the number of input tokens, to represent all possible grammatical
+        traversals.  All possible paths through the lattice are then enumerated
+        to produce the set of non-projective parses.
+
+        param tokens: A list of tokens to parse.
+        type tokens: list(str)
+        return: An iterator of non-projective parses.
+        rtype: iter(DependencyGraph)
+        """
+        # Create graph representation of tokens
+        self._graph = DependencyGraph()
+
+        for index, token in enumerate(tokens):
+            self._graph.nodes[index] = {
+                "word": token,
+                "deps": [],
+                "rel": "NTOP",
+                "address": index,
+            }
+
+        for head_node in self._graph.nodes.values():
+            deps = []
+            for dep_node in self._graph.nodes.values():
+                if (
+                    self._grammar.contains(head_node["word"], dep_node["word"])
+                    and head_node["word"] != dep_node["word"]
+                ):
+                    deps.append(dep_node["address"])
+            head_node["deps"] = deps
+
+        # Create lattice of possible heads
+        roots = []
+        possible_heads = []
+        for i, word in enumerate(tokens):
+            heads = []
+            for j, head in enumerate(tokens):
+                if (i != j) and self._grammar.contains(head, word):
+                    heads.append(j)
+            if len(heads) == 0:
+                roots.append(i)
+            possible_heads.append(heads)
+
+        # Set roots to attempt
+        if len(roots) < 2:
+            if len(roots) == 0:
+                for i in range(len(tokens)):
+                    roots.append(i)
+
+            # Traverse lattice
+            analyses = []
+            for _ in roots:
+                stack = []
+                analysis = [[] for i in range(len(possible_heads))]
+            i = 0
+            forward = True
+            while i >= 0:
+                if forward:
+                    if len(possible_heads[i]) == 1:
+                        analysis[i] = possible_heads[i][0]
+                    elif len(possible_heads[i]) == 0:
+                        analysis[i] = -1
+                    else:
+                        head = possible_heads[i].pop()
+                        analysis[i] = head
+                        stack.append([i, head])
+                if not forward:
+                    index_on_stack = False
+                    for stack_item in stack:
+                        if stack_item[0] == i:
+                            index_on_stack = True
+                    orig_length = len(possible_heads[i])
+
+                    if index_on_stack and orig_length == 0:
+                        for j in range(len(stack) - 1, -1, -1):
+                            stack_item = stack[j]
+                            if stack_item[0] == i:
+                                possible_heads[i].append(stack.pop(j)[1])
+
+                    elif index_on_stack and orig_length > 0:
+                        head = possible_heads[i].pop()
+                        analysis[i] = head
+                        stack.append([i, head])
+                        forward = True
+
+                if i + 1 == len(possible_heads):
+                    analyses.append(analysis[:])
+                    forward = False
+                if forward:
+                    i += 1
+                else:
+                    i -= 1
+
+        # Filter parses
+        # ensure 1 root, every thing has 1 head
+        for analysis in analyses:
+            if analysis.count(-1) > 1:
+                # there are several root elements!
+                continue
+
+            graph = DependencyGraph()
+            graph.root = graph.nodes[analysis.index(-1) + 1]
+
+            for address, (token, head_index) in enumerate(
+                zip(tokens, analysis), start=1
+            ):
+                head_address = head_index + 1
+
+                node = graph.nodes[address]
+                node.update({"word": token, "address": address})
+
+                if head_address == 0:
+                    rel = "ROOT"
+                else:
+                    rel = ""
+                graph.nodes[head_index + 1]["deps"][rel].append(address)
+
+            # TODO: check for cycles
+            yield graph
+
+
+#################################################################
+# Demos
+#################################################################
+
+
+def demo():
+    # hall_demo()
+    nonprojective_conll_parse_demo()
+    rule_based_demo()
+
+
+def hall_demo():
+    npp = ProbabilisticNonprojectiveParser()
+    npp.train([], DemoScorer())
+    for parse_graph in npp.parse(["v1", "v2", "v3"], [None, None, None]):
+        print(parse_graph)
+
+
+def nonprojective_conll_parse_demo():
+    from nltk.parse.dependencygraph import conll_data2
+
+    graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry]
+    npp = ProbabilisticNonprojectiveParser()
+    npp.train(graphs, NaiveBayesDependencyScorer())
+    for parse_graph in npp.parse(
+        ["Cathy", "zag", "hen", "zwaaien", "."], ["N", "V", "Pron", "Adj", "N", "Punc"]
+    ):
+        print(parse_graph)
+
+
+def rule_based_demo():
+    from nltk.grammar import DependencyGrammar
+
+    grammar = DependencyGrammar.fromstring(
+        """
+    'taught' -> 'play' | 'man'
+    'man' -> 'the' | 'in'
+    'in' -> 'corner'
+    'corner' -> 'the'
+    'play' -> 'golf' | 'dachshund' | 'to'
+    'dachshund' -> 'his'
+    """
+    )
+    print(grammar)
+    ndp = NonprojectiveDependencyParser(grammar)
+    graphs = ndp.parse(
+        [
+            "the",
+            "man",
+            "in",
+            "the",
+            "corner",
+            "taught",
+            "his",
+            "dachshund",
+            "to",
+            "play",
+            "golf",
+        ]
+    )
+    print("Graphs:")
+    for graph in graphs:
+        print(graph)
+
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/pchart.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/pchart.py
@@ -0,0 +1,579 @@
+# Natural Language Toolkit: Probabilistic Chart Parsers
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Classes and interfaces for associating probabilities with tree
+structures that represent the internal organization of a text.  The
+probabilistic parser module defines ``BottomUpProbabilisticChartParser``.
+
+``BottomUpProbabilisticChartParser`` is an abstract class that implements
+a bottom-up chart parser for ``PCFG`` grammars.  It maintains a queue of edges,
+and adds them to the chart one at a time.  The ordering of this queue
+is based on the probabilities associated with the edges, allowing the
+parser to expand more likely edges before less likely ones.  Each
+subclass implements a different queue ordering, producing different
+search strategies.  Currently the following subclasses are defined:
+
+  - ``InsideChartParser`` searches edges in decreasing order of
+    their trees' inside probabilities.
+  - ``RandomChartParser`` searches edges in random order.
+  - ``LongestChartParser`` searches edges in decreasing order of their
+    location's length.
+
+The ``BottomUpProbabilisticChartParser`` constructor has an optional
+argument beam_size.  If non-zero, this controls the size of the beam
+(aka the edge queue).  This option is most useful with InsideChartParser.
+"""
+
+##//////////////////////////////////////////////////////
+##  Bottom-Up PCFG Chart Parser
+##//////////////////////////////////////////////////////
+
+# [XX] This might not be implemented quite right -- it would be better
+# to associate probabilities with child pointer lists.
+
+import random
+from functools import reduce
+
+from nltk.grammar import PCFG, Nonterminal
+from nltk.parse.api import ParserI
+from nltk.parse.chart import AbstractChartRule, Chart, LeafEdge, TreeEdge
+from nltk.tree import ProbabilisticTree, Tree
+
+
+# Probabilistic edges
+class ProbabilisticLeafEdge(LeafEdge):
+    def prob(self):
+        return 1.0
+
+
+class ProbabilisticTreeEdge(TreeEdge):
+    def __init__(self, prob, *args, **kwargs):
+        TreeEdge.__init__(self, *args, **kwargs)
+        self._prob = prob
+        # two edges with different probabilities are not equal.
+        self._comparison_key = (self._comparison_key, prob)
+
+    def prob(self):
+        return self._prob
+
+    @staticmethod
+    def from_production(production, index, p):
+        return ProbabilisticTreeEdge(
+            p, (index, index), production.lhs(), production.rhs(), 0
+        )
+
+
+# Rules using probabilistic edges
+class ProbabilisticBottomUpInitRule(AbstractChartRule):
+    NUM_EDGES = 0
+
+    def apply(self, chart, grammar):
+        for index in range(chart.num_leaves()):
+            new_edge = ProbabilisticLeafEdge(chart.leaf(index), index)
+            if chart.insert(new_edge, ()):
+                yield new_edge
+
+
+class ProbabilisticBottomUpPredictRule(AbstractChartRule):
+    NUM_EDGES = 1
+
+    def apply(self, chart, grammar, edge):
+        if edge.is_incomplete():
+            return
+        for prod in grammar.productions():
+            if edge.lhs() == prod.rhs()[0]:
+                new_edge = ProbabilisticTreeEdge.from_production(
+                    prod, edge.start(), prod.prob()
+                )
+                if chart.insert(new_edge, ()):
+                    yield new_edge
+
+
+class ProbabilisticFundamentalRule(AbstractChartRule):
+    NUM_EDGES = 2
+
+    def apply(self, chart, grammar, left_edge, right_edge):
+        # Make sure the rule is applicable.
+        if not (
+            left_edge.end() == right_edge.start()
+            and left_edge.nextsym() == right_edge.lhs()
+            and left_edge.is_incomplete()
+            and right_edge.is_complete()
+        ):
+            return
+
+        # Construct the new edge.
+        p = left_edge.prob() * right_edge.prob()
+        new_edge = ProbabilisticTreeEdge(
+            p,
+            span=(left_edge.start(), right_edge.end()),
+            lhs=left_edge.lhs(),
+            rhs=left_edge.rhs(),
+            dot=left_edge.dot() + 1,
+        )
+
+        # Add it to the chart, with appropriate child pointers.
+        changed_chart = False
+        for cpl1 in chart.child_pointer_lists(left_edge):
+            if chart.insert(new_edge, cpl1 + (right_edge,)):
+                changed_chart = True
+
+        # If we changed the chart, then generate the edge.
+        if changed_chart:
+            yield new_edge
+
+
+class SingleEdgeProbabilisticFundamentalRule(AbstractChartRule):
+    NUM_EDGES = 1
+
+    _fundamental_rule = ProbabilisticFundamentalRule()
+
+    def apply(self, chart, grammar, edge1):
+        fr = self._fundamental_rule
+        if edge1.is_incomplete():
+            # edge1 = left_edge; edge2 = right_edge
+            for edge2 in chart.select(
+                start=edge1.end(), is_complete=True, lhs=edge1.nextsym()
+            ):
+                yield from fr.apply(chart, grammar, edge1, edge2)
+        else:
+            # edge2 = left_edge; edge1 = right_edge
+            for edge2 in chart.select(
+                end=edge1.start(), is_complete=False, nextsym=edge1.lhs()
+            ):
+                yield from fr.apply(chart, grammar, edge2, edge1)
+
+    def __str__(self):
+        return "Fundamental Rule"
+
+
+class BottomUpProbabilisticChartParser(ParserI):
+    """
+    An abstract bottom-up parser for ``PCFG`` grammars that uses a ``Chart`` to
+    record partial results.  ``BottomUpProbabilisticChartParser`` maintains
+    a queue of edges that can be added to the chart.  This queue is
+    initialized with edges for each token in the text that is being
+    parsed.  ``BottomUpProbabilisticChartParser`` inserts these edges into
+    the chart one at a time, starting with the most likely edges, and
+    proceeding to less likely edges.  For each edge that is added to
+    the chart, it may become possible to insert additional edges into
+    the chart; these are added to the queue.  This process continues
+    until enough complete parses have been generated, or until the
+    queue is empty.
+
+    The sorting order for the queue is not specified by
+    ``BottomUpProbabilisticChartParser``.  Different sorting orders will
+    result in different search strategies.  The sorting order for the
+    queue is defined by the method ``sort_queue``; subclasses are required
+    to provide a definition for this method.
+
+    :type _grammar: PCFG
+    :ivar _grammar: The grammar used to parse sentences.
+    :type _trace: int
+    :ivar _trace: The level of tracing output that should be generated
+        when parsing a text.
+    """
+
+    def __init__(self, grammar, beam_size=0, trace=0):
+        """
+        Create a new ``BottomUpProbabilisticChartParser``, that uses
+        ``grammar`` to parse texts.
+
+        :type grammar: PCFG
+        :param grammar: The grammar used to parse texts.
+        :type beam_size: int
+        :param beam_size: The maximum length for the parser's edge queue.
+        :type trace: int
+        :param trace: The level of tracing that should be used when
+            parsing a text.  ``0`` will generate no tracing output;
+            and higher numbers will produce more verbose tracing
+            output.
+        """
+        if not isinstance(grammar, PCFG):
+            raise ValueError("The grammar must be probabilistic PCFG")
+        self._grammar = grammar
+        self.beam_size = beam_size
+        self._trace = trace
+
+    def grammar(self):
+        return self._grammar
+
+    def trace(self, trace=2):
+        """
+        Set the level of tracing output that should be generated when
+        parsing a text.
+
+        :type trace: int
+        :param trace: The trace level.  A trace level of ``0`` will
+            generate no tracing output; and higher trace levels will
+            produce more verbose tracing output.
+        :rtype: None
+        """
+        self._trace = trace
+
+    # TODO: change this to conform more with the standard ChartParser
+    def parse(self, tokens):
+        self._grammar.check_coverage(tokens)
+        chart = Chart(list(tokens))
+        grammar = self._grammar
+
+        # Chart parser rules.
+        bu_init = ProbabilisticBottomUpInitRule()
+        bu = ProbabilisticBottomUpPredictRule()
+        fr = SingleEdgeProbabilisticFundamentalRule()
+
+        # Our queue
+        queue = []
+
+        # Initialize the chart.
+        for edge in bu_init.apply(chart, grammar):
+            if self._trace > 1:
+                print(
+                    "  %-50s [%s]"
+                    % (chart.pretty_format_edge(edge, width=2), edge.prob())
+                )
+            queue.append(edge)
+
+        while len(queue) > 0:
+            # Re-sort the queue.
+            self.sort_queue(queue, chart)
+
+            # Prune the queue to the correct size if a beam was defined
+            if self.beam_size:
+                self._prune(queue, chart)
+
+            # Get the best edge.
+            edge = queue.pop()
+            if self._trace > 0:
+                print(
+                    "  %-50s [%s]"
+                    % (chart.pretty_format_edge(edge, width=2), edge.prob())
+                )
+
+            # Apply BU & FR to it.
+            queue.extend(bu.apply(chart, grammar, edge))
+            queue.extend(fr.apply(chart, grammar, edge))
+
+        # Get a list of complete parses.
+        parses = list(chart.parses(grammar.start(), ProbabilisticTree))
+
+        # Assign probabilities to the trees.
+        prod_probs = {}
+        for prod in grammar.productions():
+            prod_probs[prod.lhs(), prod.rhs()] = prod.prob()
+        for parse in parses:
+            self._setprob(parse, prod_probs)
+
+        # Sort by probability
+        parses.sort(reverse=True, key=lambda tree: tree.prob())
+
+        return iter(parses)
+
+    def _setprob(self, tree, prod_probs):
+        if tree.prob() is not None:
+            return
+
+        # Get the prob of the CFG production.
+        lhs = Nonterminal(tree.label())
+        rhs = []
+        for child in tree:
+            if isinstance(child, Tree):
+                rhs.append(Nonterminal(child.label()))
+            else:
+                rhs.append(child)
+        prob = prod_probs[lhs, tuple(rhs)]
+
+        # Get the probs of children.
+        for child in tree:
+            if isinstance(child, Tree):
+                self._setprob(child, prod_probs)
+                prob *= child.prob()
+
+        tree.set_prob(prob)
+
+    def sort_queue(self, queue, chart):
+        """
+        Sort the given queue of ``Edge`` objects, placing the edge that should
+        be tried first at the beginning of the queue.  This method
+        will be called after each ``Edge`` is added to the queue.
+
+        :param queue: The queue of ``Edge`` objects to sort.  Each edge in
+            this queue is an edge that could be added to the chart by
+            the fundamental rule; but that has not yet been added.
+        :type queue: list(Edge)
+        :param chart: The chart being used to parse the text.  This
+            chart can be used to provide extra information for sorting
+            the queue.
+        :type chart: Chart
+        :rtype: None
+        """
+        raise NotImplementedError()
+
+    def _prune(self, queue, chart):
+        """Discard items in the queue if the queue is longer than the beam."""
+        if len(queue) > self.beam_size:
+            split = len(queue) - self.beam_size
+            if self._trace > 2:
+                for edge in queue[:split]:
+                    print("  %-50s [DISCARDED]" % chart.pretty_format_edge(edge, 2))
+            del queue[:split]
+
+
+class InsideChartParser(BottomUpProbabilisticChartParser):
+    """
+    A bottom-up parser for ``PCFG`` grammars that tries edges in descending
+    order of the inside probabilities of their trees.  The "inside
+    probability" of a tree is simply the
+    probability of the entire tree, ignoring its context.  In
+    particular, the inside probability of a tree generated by
+    production *p* with children *c[1], c[2], ..., c[n]* is
+    *P(p)P(c[1])P(c[2])...P(c[n])*; and the inside
+    probability of a token is 1 if it is present in the text, and 0 if
+    it is absent.
+
+    This sorting order results in a type of lowest-cost-first search
+    strategy.
+    """
+
+    # Inherit constructor.
+    def sort_queue(self, queue, chart):
+        """
+        Sort the given queue of edges, in descending order of the
+        inside probabilities of the edges' trees.
+
+        :param queue: The queue of ``Edge`` objects to sort.  Each edge in
+            this queue is an edge that could be added to the chart by
+            the fundamental rule; but that has not yet been added.
+        :type queue: list(Edge)
+        :param chart: The chart being used to parse the text.  This
+            chart can be used to provide extra information for sorting
+            the queue.
+        :type chart: Chart
+        :rtype: None
+        """
+        queue.sort(key=lambda edge: edge.prob())
+
+
+# Eventually, this will become some sort of inside-outside parser:
+# class InsideOutsideParser(BottomUpProbabilisticChartParser):
+#     def __init__(self, grammar, trace=0):
+#         # Inherit docs.
+#         BottomUpProbabilisticChartParser.__init__(self, grammar, trace)
+#
+#         # Find the best path from S to each nonterminal
+#         bestp = {}
+#         for production in grammar.productions(): bestp[production.lhs()]=0
+#         bestp[grammar.start()] = 1.0
+#
+#         for i in range(len(grammar.productions())):
+#             for production in grammar.productions():
+#                 lhs = production.lhs()
+#                 for elt in production.rhs():
+#                     bestp[elt] = max(bestp[lhs]*production.prob(),
+#                                      bestp.get(elt,0))
+#
+#         self._bestp = bestp
+#         for (k,v) in self._bestp.items(): print(k,v)
+#
+#     def _sortkey(self, edge):
+#         return edge.structure()[PROB] * self._bestp[edge.lhs()]
+#
+#     def sort_queue(self, queue, chart):
+#         queue.sort(key=self._sortkey)
+
+
+class RandomChartParser(BottomUpProbabilisticChartParser):
+    """
+    A bottom-up parser for ``PCFG`` grammars that tries edges in random order.
+    This sorting order results in a random search strategy.
+    """
+
+    # Inherit constructor
+    def sort_queue(self, queue, chart):
+        i = random.randint(0, len(queue) - 1)
+        (queue[-1], queue[i]) = (queue[i], queue[-1])
+
+
+class UnsortedChartParser(BottomUpProbabilisticChartParser):
+    """
+    A bottom-up parser for ``PCFG`` grammars that tries edges in whatever order.
+    """
+
+    # Inherit constructor
+    def sort_queue(self, queue, chart):
+        return
+
+
+class LongestChartParser(BottomUpProbabilisticChartParser):
+    """
+    A bottom-up parser for ``PCFG`` grammars that tries longer edges before
+    shorter ones.  This sorting order results in a type of best-first
+    search strategy.
+    """
+
+    # Inherit constructor
+    def sort_queue(self, queue, chart):
+        queue.sort(key=lambda edge: edge.length())
+
+
+##//////////////////////////////////////////////////////
+##  Test Code
+##//////////////////////////////////////////////////////
+
+
+def demo(choice=None, draw_parses=None, print_parses=None):
+    """
+    A demonstration of the probabilistic parsers.  The user is
+    prompted to select which demo to run, and how many parses should
+    be found; and then each parser is run on the same demo, and a
+    summary of the results are displayed.
+    """
+    import sys
+    import time
+
+    from nltk import tokenize
+    from nltk.parse import pchart
+
+    # Define two demos.  Each demo has a sentence and a grammar.
+    toy_pcfg1 = PCFG.fromstring(
+        """
+    S -> NP VP [1.0]
+    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
+    Det -> 'the' [0.8] | 'my' [0.2]
+    N -> 'man' [0.5] | 'telescope' [0.5]
+    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
+    V -> 'ate' [0.35] | 'saw' [0.65]
+    PP -> P NP [1.0]
+    P -> 'with' [0.61] | 'under' [0.39]
+    """
+    )
+
+    toy_pcfg2 = PCFG.fromstring(
+        """
+    S    -> NP VP         [1.0]
+    VP   -> V NP          [.59]
+    VP   -> V             [.40]
+    VP   -> VP PP         [.01]
+    NP   -> Det N         [.41]
+    NP   -> Name          [.28]
+    NP   -> NP PP         [.31]
+    PP   -> P NP          [1.0]
+    V    -> 'saw'         [.21]
+    V    -> 'ate'         [.51]
+    V    -> 'ran'         [.28]
+    N    -> 'boy'         [.11]
+    N    -> 'cookie'      [.12]
+    N    -> 'table'       [.13]
+    N    -> 'telescope'   [.14]
+    N    -> 'hill'        [.5]
+    Name -> 'Jack'        [.52]
+    Name -> 'Bob'         [.48]
+    P    -> 'with'        [.61]
+    P    -> 'under'       [.39]
+    Det  -> 'the'         [.41]
+    Det  -> 'a'           [.31]
+    Det  -> 'my'          [.28]
+    """
+    )
+
+    demos = [
+        ("I saw John with my telescope", toy_pcfg1),
+        ("the boy saw Jack with Bob under the table with a telescope", toy_pcfg2),
+    ]
+
+    if choice is None:
+        # Ask the user which demo they want to use.
+        print()
+        for i in range(len(demos)):
+            print(f"{i + 1:>3}: {demos[i][0]}")
+            print("     %r" % demos[i][1])
+            print()
+        print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
+        choice = int(sys.stdin.readline().strip()) - 1
+    try:
+        sent, grammar = demos[choice]
+    except:
+        print("Bad sentence number")
+        return
+
+    # Tokenize the sentence.
+    tokens = sent.split()
+
+    # Define a list of parsers.  We'll use all parsers.
+    parsers = [
+        pchart.InsideChartParser(grammar),
+        pchart.RandomChartParser(grammar),
+        pchart.UnsortedChartParser(grammar),
+        pchart.LongestChartParser(grammar),
+        pchart.InsideChartParser(grammar, beam_size=len(tokens) + 1),  # was BeamParser
+    ]
+
+    # Run the parsers on the tokenized sentence.
+    times = []
+    average_p = []
+    num_parses = []
+    all_parses = {}
+    for parser in parsers:
+        print(f"\ns: {sent}\nparser: {parser}\ngrammar: {grammar}")
+        parser.trace(3)
+        t = time.time()
+        parses = list(parser.parse(tokens))
+        times.append(time.time() - t)
+        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) if parses else 0
+        average_p.append(p)
+        num_parses.append(len(parses))
+        for p in parses:
+            all_parses[p.freeze()] = 1
+
+    # Print some summary statistics
+    print()
+    print("       Parser      Beam | Time (secs)   # Parses   Average P(parse)")
+    print("------------------------+------------------------------------------")
+    for i in range(len(parsers)):
+        print(
+            "%18s %4d |%11.4f%11d%19.14f"
+            % (
+                parsers[i].__class__.__name__,
+                parsers[i].beam_size,
+                times[i],
+                num_parses[i],
+                average_p[i],
+            )
+        )
+    parses = all_parses.keys()
+    if parses:
+        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
+    else:
+        p = 0
+    print("------------------------+------------------------------------------")
+    print("%18s      |%11s%11d%19.14f" % ("(All Parses)", "n/a", len(parses), p))
+
+    if draw_parses is None:
+        # Ask the user if we should draw the parses.
+        print()
+        print("Draw parses (y/n)? ", end=" ")
+        draw_parses = sys.stdin.readline().strip().lower().startswith("y")
+    if draw_parses:
+        from nltk.draw.tree import draw_trees
+
+        print("  please wait...")
+        draw_trees(*parses)
+
+    if print_parses is None:
+        # Ask the user if we should print the parses.
+        print()
+        print("Print parses (y/n)? ", end=" ")
+        print_parses = sys.stdin.readline().strip().lower().startswith("y")
+    if print_parses:
+        for parse in parses:
+            print(parse)
+
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/projectivedependencyparser.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/projectivedependencyparser.py
@@ -0,0 +1,716 @@
+# Natural Language Toolkit: Dependency Grammars
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Jason Narad <jason.narad@gmail.com>
+#
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+#
+
+from collections import defaultdict
+from functools import total_ordering
+from itertools import chain
+
+from nltk.grammar import (
+    DependencyGrammar,
+    DependencyProduction,
+    ProbabilisticDependencyGrammar,
+)
+from nltk.internals import raise_unorderable_types
+from nltk.parse.dependencygraph import DependencyGraph
+
+#################################################################
+# Dependency Span
+#################################################################
+
+
+@total_ordering
+class DependencySpan:
+    """
+    A contiguous span over some part of the input string representing
+    dependency (head -> modifier) relationships amongst words.  An atomic
+    span corresponds to only one word so it isn't a 'span' in the conventional
+    sense, as its _start_index = _end_index = _head_index for concatenation
+    purposes.  All other spans are assumed to have arcs between all nodes
+    within the start and end indexes of the span, and one head index corresponding
+    to the head word for the entire span.  This is the same as the root node if
+    the dependency structure were depicted as a graph.
+    """
+
+    def __init__(self, start_index, end_index, head_index, arcs, tags):
+        self._start_index = start_index
+        self._end_index = end_index
+        self._head_index = head_index
+        self._arcs = arcs
+        self._tags = tags
+        self._comparison_key = (start_index, end_index, head_index, tuple(arcs))
+        self._hash = hash(self._comparison_key)
+
+    def head_index(self):
+        """
+        :return: An value indexing the head of the entire ``DependencySpan``.
+        :rtype: int
+        """
+        return self._head_index
+
+    def __repr__(self):
+        """
+        :return: A concise string representatino of the ``DependencySpan``.
+        :rtype: str.
+        """
+        return "Span %d-%d; Head Index: %d" % (
+            self._start_index,
+            self._end_index,
+            self._head_index,
+        )
+
+    def __str__(self):
+        """
+        :return: A verbose string representation of the ``DependencySpan``.
+        :rtype: str
+        """
+        str = "Span %d-%d; Head Index: %d" % (
+            self._start_index,
+            self._end_index,
+            self._head_index,
+        )
+        for i in range(len(self._arcs)):
+            str += "\n%d <- %d, %s" % (i, self._arcs[i], self._tags[i])
+        return str
+
+    def __eq__(self, other):
+        return (
+            type(self) == type(other) and self._comparison_key == other._comparison_key
+        )
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __lt__(self, other):
+        if not isinstance(other, DependencySpan):
+            raise_unorderable_types("<", self, other)
+        return self._comparison_key < other._comparison_key
+
+    def __hash__(self):
+        """
+        :return: The hash value of this ``DependencySpan``.
+        """
+        return self._hash
+
+
+#################################################################
+# Chart Cell
+#################################################################
+
+
+class ChartCell:
+    """
+    A cell from the parse chart formed when performing the CYK algorithm.
+    Each cell keeps track of its x and y coordinates (though this will probably
+    be discarded), and a list of spans serving as the cell's entries.
+    """
+
+    def __init__(self, x, y):
+        """
+        :param x: This cell's x coordinate.
+        :type x: int.
+        :param y: This cell's y coordinate.
+        :type y: int.
+        """
+        self._x = x
+        self._y = y
+        self._entries = set()
+
+    def add(self, span):
+        """
+        Appends the given span to the list of spans
+        representing the chart cell's entries.
+
+        :param span: The span to add.
+        :type span: DependencySpan
+        """
+        self._entries.add(span)
+
+    def __str__(self):
+        """
+        :return: A verbose string representation of this ``ChartCell``.
+        :rtype: str.
+        """
+        return "CC[%d,%d]: %s" % (self._x, self._y, self._entries)
+
+    def __repr__(self):
+        """
+        :return: A concise string representation of this ``ChartCell``.
+        :rtype: str.
+        """
+        return "%s" % self
+
+
+#################################################################
+# Parsing  with Dependency Grammars
+#################################################################
+
+
+class ProjectiveDependencyParser:
+    """
+    A projective, rule-based, dependency parser.  A ProjectiveDependencyParser
+    is created with a DependencyGrammar, a set of productions specifying
+    word-to-word dependency relations.  The parse() method will then
+    return the set of all parses, in tree representation, for a given input
+    sequence of tokens.  Each parse must meet the requirements of the both
+    the grammar and the projectivity constraint which specifies that the
+    branches of the dependency tree are not allowed to cross.  Alternatively,
+    this can be understood as stating that each parent node and its children
+    in the parse tree form a continuous substring of the input sequence.
+    """
+
+    def __init__(self, dependency_grammar):
+        """
+        Create a new ProjectiveDependencyParser, from a word-to-word
+        dependency grammar ``DependencyGrammar``.
+
+        :param dependency_grammar: A word-to-word relation dependencygrammar.
+        :type dependency_grammar: DependencyGrammar
+        """
+        self._grammar = dependency_grammar
+
+    def parse(self, tokens):
+        """
+        Performs a projective dependency parse on the list of tokens using
+        a chart-based, span-concatenation algorithm similar to Eisner (1996).
+
+        :param tokens: The list of input tokens.
+        :type tokens: list(str)
+        :return: An iterator over parse trees.
+        :rtype: iter(Tree)
+        """
+        self._tokens = list(tokens)
+        chart = []
+        for i in range(0, len(self._tokens) + 1):
+            chart.append([])
+            for j in range(0, len(self._tokens) + 1):
+                chart[i].append(ChartCell(i, j))
+                if i == j + 1:
+                    chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ["null"]))
+
+        for i in range(1, len(self._tokens) + 1):
+            for j in range(i - 2, -1, -1):
+                for k in range(i - 1, j, -1):
+                    for span1 in chart[k][j]._entries:
+                        for span2 in chart[i][k]._entries:
+                            for newspan in self.concatenate(span1, span2):
+                                chart[i][j].add(newspan)
+
+        for parse in chart[len(self._tokens)][0]._entries:
+            conll_format = ""
+            #            malt_format = ""
+            for i in range(len(tokens)):
+                #                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
+                # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                # Modify to comply with the new Dependency Graph requirement (at least must have an root elements)
+                conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
+                    i + 1,
+                    tokens[i],
+                    tokens[i],
+                    "null",
+                    "null",
+                    "null",
+                    parse._arcs[i] + 1,
+                    "ROOT",
+                    "-",
+                    "-",
+                )
+            dg = DependencyGraph(conll_format)
+            #           if self.meets_arity(dg):
+            yield dg.tree()
+
+    def concatenate(self, span1, span2):
+        """
+        Concatenates the two spans in whichever way possible.  This
+        includes rightward concatenation (from the leftmost word of the
+        leftmost span to the rightmost word of the rightmost span) and
+        leftward concatenation (vice-versa) between adjacent spans.  Unlike
+        Eisner's presentation of span concatenation, these spans do not
+        share or pivot on a particular word/word-index.
+
+        :return: A list of new spans formed through concatenation.
+        :rtype: list(DependencySpan)
+        """
+        spans = []
+        if span1._start_index == span2._start_index:
+            print("Error: Mismatched spans - replace this with thrown error")
+        if span1._start_index > span2._start_index:
+            temp_span = span1
+            span1 = span2
+            span2 = temp_span
+        # adjacent rightward covered concatenation
+        new_arcs = span1._arcs + span2._arcs
+        new_tags = span1._tags + span2._tags
+        if self._grammar.contains(
+            self._tokens[span1._head_index], self._tokens[span2._head_index]
+        ):
+            #           print('Performing rightward cover %d to %d' % (span1._head_index, span2._head_index))
+            new_arcs[span2._head_index - span1._start_index] = span1._head_index
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span1._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
+        # adjacent leftward covered concatenation
+        new_arcs = span1._arcs + span2._arcs
+        if self._grammar.contains(
+            self._tokens[span2._head_index], self._tokens[span1._head_index]
+        ):
+            #           print('performing leftward cover %d to %d' % (span2._head_index, span1._head_index))
+            new_arcs[span1._head_index - span1._start_index] = span2._head_index
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span2._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
+        return spans
+
+
+#################################################################
+# Parsing  with Probabilistic Dependency Grammars
+#################################################################
+
+
+class ProbabilisticProjectiveDependencyParser:
+    """A probabilistic, projective dependency parser.
+
+    This parser returns the most probable projective parse derived from the
+    probabilistic dependency grammar derived from the train() method.  The
+    probabilistic model is an implementation of Eisner's (1996) Model C, which
+    conditions on head-word, head-tag, child-word, and child-tag.  The decoding
+    uses a bottom-up chart-based span concatenation algorithm that's identical
+    to the one utilized by the rule-based projective parser.
+
+    Usage example
+
+    >>> from nltk.parse.dependencygraph import conll_data2
+
+    >>> graphs = [
+    ... DependencyGraph(entry) for entry in conll_data2.split('\\n\\n') if entry
+    ... ]
+
+    >>> ppdp = ProbabilisticProjectiveDependencyParser()
+    >>> ppdp.train(graphs)
+
+    >>> sent = ['Cathy', 'zag', 'hen', 'wild', 'zwaaien', '.']
+    >>> list(ppdp.parse(sent))
+    [Tree('zag', ['Cathy', 'hen', Tree('zwaaien', ['wild', '.'])])]
+
+    """
+
+    def __init__(self):
+        """
+        Create a new probabilistic dependency parser.  No additional
+        operations are necessary.
+        """
+
+    def parse(self, tokens):
+        """
+        Parses the list of tokens subject to the projectivity constraint
+        and the productions in the parser's grammar.  This uses a method
+        similar to the span-concatenation algorithm defined in Eisner (1996).
+        It returns the most probable parse derived from the parser's
+        probabilistic dependency grammar.
+        """
+        self._tokens = list(tokens)
+        chart = []
+        for i in range(0, len(self._tokens) + 1):
+            chart.append([])
+            for j in range(0, len(self._tokens) + 1):
+                chart[i].append(ChartCell(i, j))
+                if i == j + 1:
+                    if tokens[i - 1] in self._grammar._tags:
+                        for tag in self._grammar._tags[tokens[i - 1]]:
+                            chart[i][j].add(
+                                DependencySpan(i - 1, i, i - 1, [-1], [tag])
+                            )
+                    else:
+                        print(
+                            "No tag found for input token '%s', parse is impossible."
+                            % tokens[i - 1]
+                        )
+                        return []
+        for i in range(1, len(self._tokens) + 1):
+            for j in range(i - 2, -1, -1):
+                for k in range(i - 1, j, -1):
+                    for span1 in chart[k][j]._entries:
+                        for span2 in chart[i][k]._entries:
+                            for newspan in self.concatenate(span1, span2):
+                                chart[i][j].add(newspan)
+        trees = []
+        max_parse = None
+        max_score = 0
+        for parse in chart[len(self._tokens)][0]._entries:
+            conll_format = ""
+            malt_format = ""
+            for i in range(len(tokens)):
+                malt_format += "%s\t%s\t%d\t%s\n" % (
+                    tokens[i],
+                    "null",
+                    parse._arcs[i] + 1,
+                    "null",
+                )
+                # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                # Modify to comply with recent change in dependency graph such that there must be a ROOT element.
+                conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
+                    i + 1,
+                    tokens[i],
+                    tokens[i],
+                    parse._tags[i],
+                    parse._tags[i],
+                    "null",
+                    parse._arcs[i] + 1,
+                    "ROOT",
+                    "-",
+                    "-",
+                )
+            dg = DependencyGraph(conll_format)
+            score = self.compute_prob(dg)
+            trees.append((score, dg.tree()))
+        trees.sort()
+        return (tree for (score, tree) in trees)
+
+    def concatenate(self, span1, span2):
+        """
+        Concatenates the two spans in whichever way possible.  This
+        includes rightward concatenation (from the leftmost word of the
+        leftmost span to the rightmost word of the rightmost span) and
+        leftward concatenation (vice-versa) between adjacent spans.  Unlike
+        Eisner's presentation of span concatenation, these spans do not
+        share or pivot on a particular word/word-index.
+
+        :return: A list of new spans formed through concatenation.
+        :rtype: list(DependencySpan)
+        """
+        spans = []
+        if span1._start_index == span2._start_index:
+            print("Error: Mismatched spans - replace this with thrown error")
+        if span1._start_index > span2._start_index:
+            temp_span = span1
+            span1 = span2
+            span2 = temp_span
+        # adjacent rightward covered concatenation
+        new_arcs = span1._arcs + span2._arcs
+        new_tags = span1._tags + span2._tags
+        if self._grammar.contains(
+            self._tokens[span1._head_index], self._tokens[span2._head_index]
+        ):
+            new_arcs[span2._head_index - span1._start_index] = span1._head_index
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span1._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
+        # adjacent leftward covered concatenation
+        new_arcs = span1._arcs + span2._arcs
+        new_tags = span1._tags + span2._tags
+        if self._grammar.contains(
+            self._tokens[span2._head_index], self._tokens[span1._head_index]
+        ):
+            new_arcs[span1._head_index - span1._start_index] = span2._head_index
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span2._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
+        return spans
+
+    def train(self, graphs):
+        """
+        Trains a ProbabilisticDependencyGrammar based on the list of input
+        DependencyGraphs.  This model is an implementation of Eisner's (1996)
+        Model C, which derives its statistics from head-word, head-tag,
+        child-word, and child-tag relationships.
+
+        :param graphs: A list of dependency graphs to train from.
+        :type: list(DependencyGraph)
+        """
+        productions = []
+        events = defaultdict(int)
+        tags = {}
+        for dg in graphs:
+            for node_index in range(1, len(dg.nodes)):
+                # children = dg.nodes[node_index]['deps']
+                children = list(
+                    chain.from_iterable(dg.nodes[node_index]["deps"].values())
+                )
+
+                nr_left_children = dg.left_children(node_index)
+                nr_right_children = dg.right_children(node_index)
+                nr_children = nr_left_children + nr_right_children
+                for child_index in range(
+                    0 - (nr_left_children + 1), nr_right_children + 2
+                ):
+                    head_word = dg.nodes[node_index]["word"]
+                    head_tag = dg.nodes[node_index]["tag"]
+                    if head_word in tags:
+                        tags[head_word].add(head_tag)
+                    else:
+                        tags[head_word] = {head_tag}
+                    child = "STOP"
+                    child_tag = "STOP"
+                    prev_word = "START"
+                    prev_tag = "START"
+                    if child_index < 0:
+                        array_index = child_index + nr_left_children
+                        if array_index >= 0:
+                            child = dg.nodes[children[array_index]]["word"]
+                            child_tag = dg.nodes[children[array_index]]["tag"]
+                        if child_index != -1:
+                            prev_word = dg.nodes[children[array_index + 1]]["word"]
+                            prev_tag = dg.nodes[children[array_index + 1]]["tag"]
+                        if child != "STOP":
+                            productions.append(DependencyProduction(head_word, [child]))
+                        head_event = "(head ({} {}) (mods ({}, {}, {}) left))".format(
+                            child,
+                            child_tag,
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
+                        mod_event = "(mods ({}, {}, {}) left))".format(
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
+                        events[head_event] += 1
+                        events[mod_event] += 1
+                    elif child_index > 0:
+                        array_index = child_index + nr_left_children - 1
+                        if array_index < nr_children:
+                            child = dg.nodes[children[array_index]]["word"]
+                            child_tag = dg.nodes[children[array_index]]["tag"]
+                        if child_index != 1:
+                            prev_word = dg.nodes[children[array_index - 1]]["word"]
+                            prev_tag = dg.nodes[children[array_index - 1]]["tag"]
+                        if child != "STOP":
+                            productions.append(DependencyProduction(head_word, [child]))
+                        head_event = "(head ({} {}) (mods ({}, {}, {}) right))".format(
+                            child,
+                            child_tag,
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
+                        mod_event = "(mods ({}, {}, {}) right))".format(
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
+                        events[head_event] += 1
+                        events[mod_event] += 1
+        self._grammar = ProbabilisticDependencyGrammar(productions, events, tags)
+
+    def compute_prob(self, dg):
+        """
+        Computes the probability of a dependency graph based
+        on the parser's probability model (defined by the parser's
+        statistical dependency grammar).
+
+        :param dg: A dependency graph to score.
+        :type dg: DependencyGraph
+        :return: The probability of the dependency graph.
+        :rtype: int
+        """
+        prob = 1.0
+        for node_index in range(1, len(dg.nodes)):
+            # children = dg.nodes[node_index]['deps']
+            children = list(chain.from_iterable(dg.nodes[node_index]["deps"].values()))
+
+            nr_left_children = dg.left_children(node_index)
+            nr_right_children = dg.right_children(node_index)
+            nr_children = nr_left_children + nr_right_children
+            for child_index in range(0 - (nr_left_children + 1), nr_right_children + 2):
+                head_word = dg.nodes[node_index]["word"]
+                head_tag = dg.nodes[node_index]["tag"]
+                child = "STOP"
+                child_tag = "STOP"
+                prev_word = "START"
+                prev_tag = "START"
+                if child_index < 0:
+                    array_index = child_index + nr_left_children
+                    if array_index >= 0:
+                        child = dg.nodes[children[array_index]]["word"]
+                        child_tag = dg.nodes[children[array_index]]["tag"]
+                    if child_index != -1:
+                        prev_word = dg.nodes[children[array_index + 1]]["word"]
+                        prev_tag = dg.nodes[children[array_index + 1]]["tag"]
+                    head_event = "(head ({} {}) (mods ({}, {}, {}) left))".format(
+                        child,
+                        child_tag,
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
+                    mod_event = "(mods ({}, {}, {}) left))".format(
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
+                    h_count = self._grammar._events[head_event]
+                    m_count = self._grammar._events[mod_event]
+
+                    # If the grammar is not covered
+                    if m_count != 0:
+                        prob *= h_count / m_count
+                    else:
+                        prob = 0.00000001  # Very small number
+
+                elif child_index > 0:
+                    array_index = child_index + nr_left_children - 1
+                    if array_index < nr_children:
+                        child = dg.nodes[children[array_index]]["word"]
+                        child_tag = dg.nodes[children[array_index]]["tag"]
+                    if child_index != 1:
+                        prev_word = dg.nodes[children[array_index - 1]]["word"]
+                        prev_tag = dg.nodes[children[array_index - 1]]["tag"]
+                    head_event = "(head ({} {}) (mods ({}, {}, {}) right))".format(
+                        child,
+                        child_tag,
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
+                    mod_event = "(mods ({}, {}, {}) right))".format(
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
+                    h_count = self._grammar._events[head_event]
+                    m_count = self._grammar._events[mod_event]
+
+                    if m_count != 0:
+                        prob *= h_count / m_count
+                    else:
+                        prob = 0.00000001  # Very small number
+
+        return prob
+
+
+#################################################################
+# Demos
+#################################################################
+
+
+def demo():
+    projective_rule_parse_demo()
+    #    arity_parse_demo()
+    projective_prob_parse_demo()
+
+
+def projective_rule_parse_demo():
+    """
+    A demonstration showing the creation and use of a
+    ``DependencyGrammar`` to perform a projective dependency
+    parse.
+    """
+    grammar = DependencyGrammar.fromstring(
+        """
+    'scratch' -> 'cats' | 'walls'
+    'walls' -> 'the'
+    'cats' -> 'the'
+    """
+    )
+    print(grammar)
+    pdp = ProjectiveDependencyParser(grammar)
+    trees = pdp.parse(["the", "cats", "scratch", "the", "walls"])
+    for tree in trees:
+        print(tree)
+
+
+def arity_parse_demo():
+    """
+    A demonstration showing the creation of a ``DependencyGrammar``
+    in which a specific number of modifiers is listed for a given
+    head.  This can further constrain the number of possible parses
+    created by a ``ProjectiveDependencyParser``.
+    """
+    print()
+    print("A grammar with no arity constraints. Each DependencyProduction")
+    print("specifies a relationship between one head word and only one")
+    print("modifier word.")
+    grammar = DependencyGrammar.fromstring(
+        """
+    'fell' -> 'price' | 'stock'
+    'price' -> 'of' | 'the'
+    'of' -> 'stock'
+    'stock' -> 'the'
+    """
+    )
+    print(grammar)
+
+    print()
+    print("For the sentence 'The price of the stock fell', this grammar")
+    print("will produce the following three parses:")
+    pdp = ProjectiveDependencyParser(grammar)
+    trees = pdp.parse(["the", "price", "of", "the", "stock", "fell"])
+    for tree in trees:
+        print(tree)
+
+    print()
+    print("By contrast, the following grammar contains a ")
+    print("DependencyProduction that specifies a relationship")
+    print("between a single head word, 'price', and two modifier")
+    print("words, 'of' and 'the'.")
+    grammar = DependencyGrammar.fromstring(
+        """
+    'fell' -> 'price' | 'stock'
+    'price' -> 'of' 'the'
+    'of' -> 'stock'
+    'stock' -> 'the'
+    """
+    )
+    print(grammar)
+
+    print()
+    print(
+        "This constrains the number of possible parses to just one:"
+    )  # unimplemented, soon to replace
+    pdp = ProjectiveDependencyParser(grammar)
+    trees = pdp.parse(["the", "price", "of", "the", "stock", "fell"])
+    for tree in trees:
+        print(tree)
+
+
+def projective_prob_parse_demo():
+    """
+    A demo showing the training and use of a projective
+    dependency parser.
+    """
+    from nltk.parse.dependencygraph import conll_data2
+
+    graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry]
+    ppdp = ProbabilisticProjectiveDependencyParser()
+    print("Training Probabilistic Projective Dependency Parser...")
+    ppdp.train(graphs)
+
+    sent = ["Cathy", "zag", "hen", "wild", "zwaaien", "."]
+    print("Parsing '", " ".join(sent), "'...")
+    print("Parse:")
+    for tree in ppdp.parse(sent):
+        print(tree)
+
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/recursivedescent.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/recursivedescent.py
@@ -0,0 +1,684 @@
+# Natural Language Toolkit: Recursive Descent Parser
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+from nltk.grammar import Nonterminal
+from nltk.parse.api import ParserI
+from nltk.tree import ImmutableTree, Tree
+
+
+##//////////////////////////////////////////////////////
+##  Recursive Descent Parser
+##//////////////////////////////////////////////////////
+class RecursiveDescentParser(ParserI):
+    """
+    A simple top-down CFG parser that parses texts by recursively
+    expanding the fringe of a Tree, and matching it against a
+    text.
+
+    ``RecursiveDescentParser`` uses a list of tree locations called a
+    "frontier" to remember which subtrees have not yet been expanded
+    and which leaves have not yet been matched against the text.  Each
+    tree location consists of a list of child indices specifying the
+    path from the root of the tree to a subtree or a leaf; see the
+    reference documentation for Tree for more information
+    about tree locations.
+
+    When the parser begins parsing a text, it constructs a tree
+    containing only the start symbol, and a frontier containing the
+    location of the tree's root node.  It then extends the tree to
+    cover the text, using the following recursive procedure:
+
+      - If the frontier is empty, and the text is covered by the tree,
+        then return the tree as a possible parse.
+      - If the frontier is empty, and the text is not covered by the
+        tree, then return no parses.
+      - If the first element of the frontier is a subtree, then
+        use CFG productions to "expand" it.  For each applicable
+        production, add the expanded subtree's children to the
+        frontier, and recursively find all parses that can be
+        generated by the new tree and frontier.
+      - If the first element of the frontier is a token, then "match"
+        it against the next token from the text.  Remove the token
+        from the frontier, and recursively find all parses that can be
+        generated by the new tree and frontier.
+
+    :see: ``nltk.grammar``
+    """
+
+    def __init__(self, grammar, trace=0):
+        """
+        Create a new ``RecursiveDescentParser``, that uses ``grammar``
+        to parse texts.
+
+        :type grammar: CFG
+        :param grammar: The grammar used to parse texts.
+        :type trace: int
+        :param trace: The level of tracing that should be used when
+            parsing a text.  ``0`` will generate no tracing output;
+            and higher numbers will produce more verbose tracing
+            output.
+        """
+        self._grammar = grammar
+        self._trace = trace
+
+    def grammar(self):
+        return self._grammar
+
+    def parse(self, tokens):
+        # Inherit docs from ParserI
+
+        tokens = list(tokens)
+        self._grammar.check_coverage(tokens)
+
+        # Start a recursive descent parse, with an initial tree
+        # containing just the start symbol.
+        start = self._grammar.start().symbol()
+        initial_tree = Tree(start, [])
+        frontier = [()]
+        if self._trace:
+            self._trace_start(initial_tree, frontier, tokens)
+        return self._parse(tokens, initial_tree, frontier)
+
+    def _parse(self, remaining_text, tree, frontier):
+        """
+        Recursively expand and match each elements of ``tree``
+        specified by ``frontier``, to cover ``remaining_text``.  Return
+        a list of all parses found.
+
+        :return: An iterator of all parses that can be generated by
+            matching and expanding the elements of ``tree``
+            specified by ``frontier``.
+        :rtype: iter(Tree)
+        :type tree: Tree
+        :param tree: A partial structure for the text that is
+            currently being parsed.  The elements of ``tree``
+            that are specified by ``frontier`` have not yet been
+            expanded or matched.
+        :type remaining_text: list(str)
+        :param remaining_text: The portion of the text that is not yet
+            covered by ``tree``.
+        :type frontier: list(tuple(int))
+        :param frontier: A list of the locations within ``tree`` of
+            all subtrees that have not yet been expanded, and all
+            leaves that have not yet been matched.  This list sorted
+            in left-to-right order of location within the tree.
+        """
+
+        # If the tree covers the text, and there's nothing left to
+        # expand, then we've found a complete parse; return it.
+        if len(remaining_text) == 0 and len(frontier) == 0:
+            if self._trace:
+                self._trace_succeed(tree, frontier)
+            yield tree
+
+        # If there's still text, but nothing left to expand, we failed.
+        elif len(frontier) == 0:
+            if self._trace:
+                self._trace_backtrack(tree, frontier)
+
+        # If the next element on the frontier is a tree, expand it.
+        elif isinstance(tree[frontier[0]], Tree):
+            yield from self._expand(remaining_text, tree, frontier)
+
+        # If the next element on the frontier is a token, match it.
+        else:
+            yield from self._match(remaining_text, tree, frontier)
+
+    def _match(self, rtext, tree, frontier):
+        """
+        :rtype: iter(Tree)
+        :return: an iterator of all parses that can be generated by
+            matching the first element of ``frontier`` against the
+            first token in ``rtext``.  In particular, if the first
+            element of ``frontier`` has the same type as the first
+            token in ``rtext``, then substitute the token into
+            ``tree``; and return all parses that can be generated by
+            matching and expanding the remaining elements of
+            ``frontier``.  If the first element of ``frontier`` does not
+            have the same type as the first token in ``rtext``, then
+            return empty list.
+
+        :type tree: Tree
+        :param tree: A partial structure for the text that is
+            currently being parsed.  The elements of ``tree``
+            that are specified by ``frontier`` have not yet been
+            expanded or matched.
+        :type rtext: list(str)
+        :param rtext: The portion of the text that is not yet
+            covered by ``tree``.
+        :type frontier: list of tuple of int
+        :param frontier: A list of the locations within ``tree`` of
+            all subtrees that have not yet been expanded, and all
+            leaves that have not yet been matched.
+        """
+
+        tree_leaf = tree[frontier[0]]
+        if len(rtext) > 0 and tree_leaf == rtext[0]:
+            # If it's a terminal that matches rtext[0], then substitute
+            # in the token, and continue parsing.
+            newtree = tree.copy(deep=True)
+            newtree[frontier[0]] = rtext[0]
+            if self._trace:
+                self._trace_match(newtree, frontier[1:], rtext[0])
+            yield from self._parse(rtext[1:], newtree, frontier[1:])
+        else:
+            # If it's a non-matching terminal, fail.
+            if self._trace:
+                self._trace_backtrack(tree, frontier, rtext[:1])
+
+    def _expand(self, remaining_text, tree, frontier, production=None):
+        """
+        :rtype: iter(Tree)
+        :return: An iterator of all parses that can be generated by
+            expanding the first element of ``frontier`` with
+            ``production``.  In particular, if the first element of
+            ``frontier`` is a subtree whose node type is equal to
+            ``production``'s left hand side, then add a child to that
+            subtree for each element of ``production``'s right hand
+            side; and return all parses that can be generated by
+            matching and expanding the remaining elements of
+            ``frontier``.  If the first element of ``frontier`` is not a
+            subtree whose node type is equal to ``production``'s left
+            hand side, then return an empty list.  If ``production`` is
+            not specified, then return a list of all parses that can
+            be generated by expanding the first element of ``frontier``
+            with *any* CFG production.
+
+        :type tree: Tree
+        :param tree: A partial structure for the text that is
+            currently being parsed.  The elements of ``tree``
+            that are specified by ``frontier`` have not yet been
+            expanded or matched.
+        :type remaining_text: list(str)
+        :param remaining_text: The portion of the text that is not yet
+            covered by ``tree``.
+        :type frontier: list(tuple(int))
+        :param frontier: A list of the locations within ``tree`` of
+            all subtrees that have not yet been expanded, and all
+            leaves that have not yet been matched.
+        """
+
+        if production is None:
+            productions = self._grammar.productions()
+        else:
+            productions = [production]
+
+        for production in productions:
+            lhs = production.lhs().symbol()
+            if lhs == tree[frontier[0]].label():
+                subtree = self._production_to_tree(production)
+                if frontier[0] == ():
+                    newtree = subtree
+                else:
+                    newtree = tree.copy(deep=True)
+                    newtree[frontier[0]] = subtree
+                new_frontier = [
+                    frontier[0] + (i,) for i in range(len(production.rhs()))
+                ]
+                if self._trace:
+                    self._trace_expand(newtree, new_frontier, production)
+                yield from self._parse(
+                    remaining_text, newtree, new_frontier + frontier[1:]
+                )
+
+    def _production_to_tree(self, production):
+        """
+        :rtype: Tree
+        :return: The Tree that is licensed by ``production``.
+            In particular, given the production ``[lhs -> elt[1] ... elt[n]]``
+            return a tree that has a node ``lhs.symbol``, and
+            ``n`` children.  For each nonterminal element
+            ``elt[i]`` in the production, the tree token has a
+            childless subtree with node value ``elt[i].symbol``; and
+            for each terminal element ``elt[j]``, the tree token has
+            a leaf token with type ``elt[j]``.
+
+        :param production: The CFG production that licenses the tree
+            token that should be returned.
+        :type production: Production
+        """
+        children = []
+        for elt in production.rhs():
+            if isinstance(elt, Nonterminal):
+                children.append(Tree(elt.symbol(), []))
+            else:
+                # This will be matched.
+                children.append(elt)
+        return Tree(production.lhs().symbol(), children)
+
+    def trace(self, trace=2):
+        """
+        Set the level of tracing output that should be generated when
+        parsing a text.
+
+        :type trace: int
+        :param trace: The trace level.  A trace level of ``0`` will
+            generate no tracing output; and higher trace levels will
+            produce more verbose tracing output.
+        :rtype: None
+        """
+        self._trace = trace
+
+    def _trace_fringe(self, tree, treeloc=None):
+        """
+        Print trace output displaying the fringe of ``tree``.  The
+        fringe of ``tree`` consists of all of its leaves and all of
+        its childless subtrees.
+
+        :rtype: None
+        """
+
+        if treeloc == ():
+            print("*", end=" ")
+        if isinstance(tree, Tree):
+            if len(tree) == 0:
+                print(repr(Nonterminal(tree.label())), end=" ")
+            for i in range(len(tree)):
+                if treeloc is not None and i == treeloc[0]:
+                    self._trace_fringe(tree[i], treeloc[1:])
+                else:
+                    self._trace_fringe(tree[i])
+        else:
+            print(repr(tree), end=" ")
+
+    def _trace_tree(self, tree, frontier, operation):
+        """
+        Print trace output displaying the parser's current state.
+
+        :param operation: A character identifying the operation that
+            generated the current state.
+        :rtype: None
+        """
+        if self._trace == 2:
+            print("  %c [" % operation, end=" ")
+        else:
+            print("    [", end=" ")
+        if len(frontier) > 0:
+            self._trace_fringe(tree, frontier[0])
+        else:
+            self._trace_fringe(tree)
+        print("]")
+
+    def _trace_start(self, tree, frontier, text):
+        print("Parsing %r" % " ".join(text))
+        if self._trace > 2:
+            print("Start:")
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, " ")
+
+    def _trace_expand(self, tree, frontier, production):
+        if self._trace > 2:
+            print("Expand: %s" % production)
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, "E")
+
+    def _trace_match(self, tree, frontier, tok):
+        if self._trace > 2:
+            print("Match: %r" % tok)
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, "M")
+
+    def _trace_succeed(self, tree, frontier):
+        if self._trace > 2:
+            print("GOOD PARSE:")
+        if self._trace == 1:
+            print("Found a parse:\n%s" % tree)
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, "+")
+
+    def _trace_backtrack(self, tree, frontier, toks=None):
+        if self._trace > 2:
+            if toks:
+                print("Backtrack: %r match failed" % toks[0])
+            else:
+                print("Backtrack")
+
+
+##//////////////////////////////////////////////////////
+##  Stepping Recursive Descent Parser
+##//////////////////////////////////////////////////////
+class SteppingRecursiveDescentParser(RecursiveDescentParser):
+    """
+    A ``RecursiveDescentParser`` that allows you to step through the
+    parsing process, performing a single operation at a time.
+
+    The ``initialize`` method is used to start parsing a text.
+    ``expand`` expands the first element on the frontier using a single
+    CFG production, and ``match`` matches the first element on the
+    frontier against the next text token. ``backtrack`` undoes the most
+    recent expand or match operation.  ``step`` performs a single
+    expand, match, or backtrack operation.  ``parses`` returns the set
+    of parses that have been found by the parser.
+
+    :ivar _history: A list of ``(rtext, tree, frontier)`` tripples,
+        containing the previous states of the parser.  This history is
+        used to implement the ``backtrack`` operation.
+    :ivar _tried_e: A record of all productions that have been tried
+        for a given tree.  This record is used by ``expand`` to perform
+        the next untried production.
+    :ivar _tried_m: A record of what tokens have been matched for a
+        given tree.  This record is used by ``step`` to decide whether
+        or not to match a token.
+    :see: ``nltk.grammar``
+    """
+
+    def __init__(self, grammar, trace=0):
+        super().__init__(grammar, trace)
+        self._rtext = None
+        self._tree = None
+        self._frontier = [()]
+        self._tried_e = {}
+        self._tried_m = {}
+        self._history = []
+        self._parses = []
+
+    # [XX] TEMPORARY HACK WARNING!  This should be replaced with
+    # something nicer when we get the chance.
+    def _freeze(self, tree):
+        c = tree.copy()
+        #        for pos in c.treepositions('leaves'):
+        #            c[pos] = c[pos].freeze()
+        return ImmutableTree.convert(c)
+
+    def parse(self, tokens):
+        tokens = list(tokens)
+        self.initialize(tokens)
+        while self.step() is not None:
+            pass
+        return self.parses()
+
+    def initialize(self, tokens):
+        """
+        Start parsing a given text.  This sets the parser's tree to
+        the start symbol, its frontier to the root node, and its
+        remaining text to ``token['SUBTOKENS']``.
+        """
+
+        self._rtext = tokens
+        start = self._grammar.start().symbol()
+        self._tree = Tree(start, [])
+        self._frontier = [()]
+        self._tried_e = {}
+        self._tried_m = {}
+        self._history = []
+        self._parses = []
+        if self._trace:
+            self._trace_start(self._tree, self._frontier, self._rtext)
+
+    def remaining_text(self):
+        """
+        :return: The portion of the text that is not yet covered by the
+            tree.
+        :rtype: list(str)
+        """
+        return self._rtext
+
+    def frontier(self):
+        """
+        :return: A list of the tree locations of all subtrees that
+            have not yet been expanded, and all leaves that have not
+            yet been matched.
+        :rtype: list(tuple(int))
+        """
+        return self._frontier
+
+    def tree(self):
+        """
+        :return: A partial structure for the text that is
+            currently being parsed.  The elements specified by the
+            frontier have not yet been expanded or matched.
+        :rtype: Tree
+        """
+        return self._tree
+
+    def step(self):
+        """
+        Perform a single parsing operation.  If an untried match is
+        possible, then perform the match, and return the matched
+        token.  If an untried expansion is possible, then perform the
+        expansion, and return the production that it is based on.  If
+        backtracking is possible, then backtrack, and return True.
+        Otherwise, return None.
+
+        :return: None if no operation was performed; a token if a match
+            was performed; a production if an expansion was performed;
+            and True if a backtrack operation was performed.
+        :rtype: Production or String or bool
+        """
+        # Try matching (if we haven't already)
+        if self.untried_match():
+            token = self.match()
+            if token is not None:
+                return token
+
+        # Try expanding.
+        production = self.expand()
+        if production is not None:
+            return production
+
+        # Try backtracking
+        if self.backtrack():
+            self._trace_backtrack(self._tree, self._frontier)
+            return True
+
+        # Nothing left to do.
+        return None
+
+    def expand(self, production=None):
+        """
+        Expand the first element of the frontier.  In particular, if
+        the first element of the frontier is a subtree whose node type
+        is equal to ``production``'s left hand side, then add a child
+        to that subtree for each element of ``production``'s right hand
+        side.  If ``production`` is not specified, then use the first
+        untried expandable production.  If all expandable productions
+        have been tried, do nothing.
+
+        :return: The production used to expand the frontier, if an
+           expansion was performed.  If no expansion was performed,
+           return None.
+        :rtype: Production or None
+        """
+
+        # Make sure we *can* expand.
+        if len(self._frontier) == 0:
+            return None
+        if not isinstance(self._tree[self._frontier[0]], Tree):
+            return None
+
+        # If they didn't specify a production, check all untried ones.
+        if production is None:
+            productions = self.untried_expandable_productions()
+        else:
+            productions = [production]
+
+        parses = []
+        for prod in productions:
+            # Record that we've tried this production now.
+            self._tried_e.setdefault(self._freeze(self._tree), []).append(prod)
+
+            # Try expanding.
+            for _result in self._expand(self._rtext, self._tree, self._frontier, prod):
+                return prod
+
+        # We didn't expand anything.
+        return None
+
+    def match(self):
+        """
+        Match the first element of the frontier.  In particular, if
+        the first element of the frontier has the same type as the
+        next text token, then substitute the text token into the tree.
+
+        :return: The token matched, if a match operation was
+            performed.  If no match was performed, return None
+        :rtype: str or None
+        """
+
+        # Record that we've tried matching this token.
+        tok = self._rtext[0]
+        self._tried_m.setdefault(self._freeze(self._tree), []).append(tok)
+
+        # Make sure we *can* match.
+        if len(self._frontier) == 0:
+            return None
+        if isinstance(self._tree[self._frontier[0]], Tree):
+            return None
+
+        for _result in self._match(self._rtext, self._tree, self._frontier):
+            # Return the token we just matched.
+            return self._history[-1][0][0]
+        return None
+
+    def backtrack(self):
+        """
+        Return the parser to its state before the most recent
+        match or expand operation.  Calling ``undo`` repeatedly return
+        the parser to successively earlier states.  If no match or
+        expand operations have been performed, ``undo`` will make no
+        changes.
+
+        :return: true if an operation was successfully undone.
+        :rtype: bool
+        """
+        if len(self._history) == 0:
+            return False
+        (self._rtext, self._tree, self._frontier) = self._history.pop()
+        return True
+
+    def expandable_productions(self):
+        """
+        :return: A list of all the productions for which expansions
+            are available for the current parser state.
+        :rtype: list(Production)
+        """
+        # Make sure we *can* expand.
+        if len(self._frontier) == 0:
+            return []
+        frontier_child = self._tree[self._frontier[0]]
+        if len(self._frontier) == 0 or not isinstance(frontier_child, Tree):
+            return []
+
+        return [
+            p
+            for p in self._grammar.productions()
+            if p.lhs().symbol() == frontier_child.label()
+        ]
+
+    def untried_expandable_productions(self):
+        """
+        :return: A list of all the untried productions for which
+            expansions are available for the current parser state.
+        :rtype: list(Production)
+        """
+
+        tried_expansions = self._tried_e.get(self._freeze(self._tree), [])
+        return [p for p in self.expandable_productions() if p not in tried_expansions]
+
+    def untried_match(self):
+        """
+        :return: Whether the first element of the frontier is a token
+            that has not yet been matched.
+        :rtype: bool
+        """
+
+        if len(self._rtext) == 0:
+            return False
+        tried_matches = self._tried_m.get(self._freeze(self._tree), [])
+        return self._rtext[0] not in tried_matches
+
+    def currently_complete(self):
+        """
+        :return: Whether the parser's current state represents a
+            complete parse.
+        :rtype: bool
+        """
+        return len(self._frontier) == 0 and len(self._rtext) == 0
+
+    def _parse(self, remaining_text, tree, frontier):
+        """
+        A stub version of ``_parse`` that sets the parsers current
+        state to the given arguments.  In ``RecursiveDescentParser``,
+        the ``_parse`` method is used to recursively continue parsing a
+        text.  ``SteppingRecursiveDescentParser`` overrides it to
+        capture these recursive calls.  It records the parser's old
+        state in the history (to allow for backtracking), and updates
+        the parser's new state using the given arguments.  Finally, it
+        returns ``[1]``, which is used by ``match`` and ``expand`` to
+        detect whether their operations were successful.
+
+        :return: ``[1]``
+        :rtype: list of int
+        """
+        self._history.append((self._rtext, self._tree, self._frontier))
+        self._rtext = remaining_text
+        self._tree = tree
+        self._frontier = frontier
+
+        # Is it a good parse?  If so, record it.
+        if len(frontier) == 0 and len(remaining_text) == 0:
+            self._parses.append(tree)
+            self._trace_succeed(self._tree, self._frontier)
+
+        return [1]
+
+    def parses(self):
+        """
+        :return: An iterator of the parses that have been found by this
+            parser so far.
+        :rtype: list of Tree
+        """
+        return iter(self._parses)
+
+    def set_grammar(self, grammar):
+        """
+        Change the grammar used to parse texts.
+
+        :param grammar: The new grammar.
+        :type grammar: CFG
+        """
+        self._grammar = grammar
+
+
+##//////////////////////////////////////////////////////
+##  Demonstration Code
+##//////////////////////////////////////////////////////
+
+
+def demo():
+    """
+    A demonstration of the recursive descent parser.
+    """
+
+    from nltk import CFG, parse
+
+    grammar = CFG.fromstring(
+        """
+    S -> NP VP
+    NP -> Det N | Det N PP
+    VP -> V NP | V NP PP
+    PP -> P NP
+    NP -> 'I'
+    N -> 'man' | 'park' | 'telescope' | 'dog'
+    Det -> 'the' | 'a'
+    P -> 'in' | 'with'
+    V -> 'saw'
+    """
+    )
+
+    for prod in grammar.productions():
+        print(prod)
+
+    sent = "I saw a man in the park".split()
+    parser = parse.RecursiveDescentParser(grammar, trace=2)
+    for p in parser.parse(sent):
+        print(p)
+
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/shiftreduce.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/shiftreduce.py
@@ -0,0 +1,478 @@
+# Natural Language Toolkit: Shift-Reduce Parser
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+from nltk.grammar import Nonterminal
+from nltk.parse.api import ParserI
+from nltk.tree import Tree
+
+
+##//////////////////////////////////////////////////////
+##  Shift/Reduce Parser
+##//////////////////////////////////////////////////////
+class ShiftReduceParser(ParserI):
+    """
+    A simple bottom-up CFG parser that uses two operations, "shift"
+    and "reduce", to find a single parse for a text.
+
+    ``ShiftReduceParser`` maintains a stack, which records the
+    structure of a portion of the text.  This stack is a list of
+    strings and Trees that collectively cover a portion of
+    the text.  For example, while parsing the sentence "the dog saw
+    the man" with a typical grammar, ``ShiftReduceParser`` will produce
+    the following stack, which covers "the dog saw"::
+
+       [(NP: (Det: 'the') (N: 'dog')), (V: 'saw')]
+
+    ``ShiftReduceParser`` attempts to extend the stack to cover the
+    entire text, and to combine the stack elements into a single tree,
+    producing a complete parse for the sentence.
+
+    Initially, the stack is empty.  It is extended to cover the text,
+    from left to right, by repeatedly applying two operations:
+
+      - "shift" moves a token from the beginning of the text to the
+        end of the stack.
+      - "reduce" uses a CFG production to combine the rightmost stack
+        elements into a single Tree.
+
+    Often, more than one operation can be performed on a given stack.
+    In this case, ``ShiftReduceParser`` uses the following heuristics
+    to decide which operation to perform:
+
+      - Only shift if no reductions are available.
+      - If multiple reductions are available, then apply the reduction
+        whose CFG production is listed earliest in the grammar.
+
+    Note that these heuristics are not guaranteed to choose an
+    operation that leads to a parse of the text.  Also, if multiple
+    parses exists, ``ShiftReduceParser`` will return at most one of
+    them.
+
+    :see: ``nltk.grammar``
+    """
+
+    def __init__(self, grammar, trace=0):
+        """
+        Create a new ``ShiftReduceParser``, that uses ``grammar`` to
+        parse texts.
+
+        :type grammar: Grammar
+        :param grammar: The grammar used to parse texts.
+        :type trace: int
+        :param trace: The level of tracing that should be used when
+            parsing a text.  ``0`` will generate no tracing output;
+            and higher numbers will produce more verbose tracing
+            output.
+        """
+        self._grammar = grammar
+        self._trace = trace
+        self._check_grammar()
+
+    def grammar(self):
+        return self._grammar
+
+    def parse(self, tokens):
+        tokens = list(tokens)
+        self._grammar.check_coverage(tokens)
+
+        # initialize the stack.
+        stack = []
+        remaining_text = tokens
+
+        # Trace output.
+        if self._trace:
+            print("Parsing %r" % " ".join(tokens))
+            self._trace_stack(stack, remaining_text)
+
+        # iterate through the text, pushing the token onto
+        # the stack, then reducing the stack.
+        while len(remaining_text) > 0:
+            self._shift(stack, remaining_text)
+            while self._reduce(stack, remaining_text):
+                pass
+
+        # Did we reduce everything?
+        if len(stack) == 1:
+            # Did we end up with the right category?
+            if stack[0].label() == self._grammar.start().symbol():
+                yield stack[0]
+
+    def _shift(self, stack, remaining_text):
+        """
+        Move a token from the beginning of ``remaining_text`` to the
+        end of ``stack``.
+
+        :type stack: list(str and Tree)
+        :param stack: A list of strings and Trees, encoding
+            the structure of the text that has been parsed so far.
+        :type remaining_text: list(str)
+        :param remaining_text: The portion of the text that is not yet
+            covered by ``stack``.
+        :rtype: None
+        """
+        stack.append(remaining_text[0])
+        remaining_text.remove(remaining_text[0])
+        if self._trace:
+            self._trace_shift(stack, remaining_text)
+
+    def _match_rhs(self, rhs, rightmost_stack):
+        """
+        :rtype: bool
+        :return: true if the right hand side of a CFG production
+            matches the rightmost elements of the stack.  ``rhs``
+            matches ``rightmost_stack`` if they are the same length,
+            and each element of ``rhs`` matches the corresponding
+            element of ``rightmost_stack``.  A nonterminal element of
+            ``rhs`` matches any Tree whose node value is equal
+            to the nonterminal's symbol.  A terminal element of ``rhs``
+            matches any string whose type is equal to the terminal.
+        :type rhs: list(terminal and Nonterminal)
+        :param rhs: The right hand side of a CFG production.
+        :type rightmost_stack: list(string and Tree)
+        :param rightmost_stack: The rightmost elements of the parser's
+            stack.
+        """
+
+        if len(rightmost_stack) != len(rhs):
+            return False
+        for i in range(len(rightmost_stack)):
+            if isinstance(rightmost_stack[i], Tree):
+                if not isinstance(rhs[i], Nonterminal):
+                    return False
+                if rightmost_stack[i].label() != rhs[i].symbol():
+                    return False
+            else:
+                if isinstance(rhs[i], Nonterminal):
+                    return False
+                if rightmost_stack[i] != rhs[i]:
+                    return False
+        return True
+
+    def _reduce(self, stack, remaining_text, production=None):
+        """
+        Find a CFG production whose right hand side matches the
+        rightmost stack elements; and combine those stack elements
+        into a single Tree, with the node specified by the
+        production's left-hand side.  If more than one CFG production
+        matches the stack, then use the production that is listed
+        earliest in the grammar.  The new Tree replaces the
+        elements in the stack.
+
+        :rtype: Production or None
+        :return: If a reduction is performed, then return the CFG
+            production that the reduction is based on; otherwise,
+            return false.
+        :type stack: list(string and Tree)
+        :param stack: A list of strings and Trees, encoding
+            the structure of the text that has been parsed so far.
+        :type remaining_text: list(str)
+        :param remaining_text: The portion of the text that is not yet
+            covered by ``stack``.
+        """
+        if production is None:
+            productions = self._grammar.productions()
+        else:
+            productions = [production]
+
+        # Try each production, in order.
+        for production in productions:
+            rhslen = len(production.rhs())
+
+            # check if the RHS of a production matches the top of the stack
+            if self._match_rhs(production.rhs(), stack[-rhslen:]):
+                # combine the tree to reflect the reduction
+                tree = Tree(production.lhs().symbol(), stack[-rhslen:])
+                stack[-rhslen:] = [tree]
+
+                # We reduced something
+                if self._trace:
+                    self._trace_reduce(stack, production, remaining_text)
+                return production
+
+        # We didn't reduce anything
+        return None
+
+    def trace(self, trace=2):
+        """
+        Set the level of tracing output that should be generated when
+        parsing a text.
+
+        :type trace: int
+        :param trace: The trace level.  A trace level of ``0`` will
+            generate no tracing output; and higher trace levels will
+            produce more verbose tracing output.
+        :rtype: None
+        """
+        # 1: just show shifts.
+        # 2: show shifts & reduces
+        # 3: display which tokens & productions are shifed/reduced
+        self._trace = trace
+
+    def _trace_stack(self, stack, remaining_text, marker=" "):
+        """
+        Print trace output displaying the given stack and text.
+
+        :rtype: None
+        :param marker: A character that is printed to the left of the
+            stack.  This is used with trace level 2 to print 'S'
+            before shifted stacks and 'R' before reduced stacks.
+        """
+        s = "  " + marker + " [ "
+        for elt in stack:
+            if isinstance(elt, Tree):
+                s += repr(Nonterminal(elt.label())) + " "
+            else:
+                s += repr(elt) + " "
+        s += "* " + " ".join(remaining_text) + "]"
+        print(s)
+
+    def _trace_shift(self, stack, remaining_text):
+        """
+        Print trace output displaying that a token has been shifted.
+
+        :rtype: None
+        """
+        if self._trace > 2:
+            print("Shift %r:" % stack[-1])
+        if self._trace == 2:
+            self._trace_stack(stack, remaining_text, "S")
+        elif self._trace > 0:
+            self._trace_stack(stack, remaining_text)
+
+    def _trace_reduce(self, stack, production, remaining_text):
+        """
+        Print trace output displaying that ``production`` was used to
+        reduce ``stack``.
+
+        :rtype: None
+        """
+        if self._trace > 2:
+            rhs = " ".join(production.rhs())
+            print(f"Reduce {production.lhs()!r} <- {rhs}")
+        if self._trace == 2:
+            self._trace_stack(stack, remaining_text, "R")
+        elif self._trace > 1:
+            self._trace_stack(stack, remaining_text)
+
+    def _check_grammar(self):
+        """
+        Check to make sure that all of the CFG productions are
+        potentially useful.  If any productions can never be used,
+        then print a warning.
+
+        :rtype: None
+        """
+        productions = self._grammar.productions()
+
+        # Any production whose RHS is an extension of another production's RHS
+        # will never be used.
+        for i in range(len(productions)):
+            for j in range(i + 1, len(productions)):
+                rhs1 = productions[i].rhs()
+                rhs2 = productions[j].rhs()
+                if rhs1[: len(rhs2)] == rhs2:
+                    print("Warning: %r will never be used" % productions[i])
+
+
+##//////////////////////////////////////////////////////
+##  Stepping Shift/Reduce Parser
+##//////////////////////////////////////////////////////
+class SteppingShiftReduceParser(ShiftReduceParser):
+    """
+    A ``ShiftReduceParser`` that allows you to setp through the parsing
+    process, performing a single operation at a time.  It also allows
+    you to change the parser's grammar midway through parsing a text.
+
+    The ``initialize`` method is used to start parsing a text.
+    ``shift`` performs a single shift operation, and ``reduce`` performs
+    a single reduce operation.  ``step`` will perform a single reduce
+    operation if possible; otherwise, it will perform a single shift
+    operation.  ``parses`` returns the set of parses that have been
+    found by the parser.
+
+    :ivar _history: A list of ``(stack, remaining_text)`` pairs,
+        containing all of the previous states of the parser.  This
+        history is used to implement the ``undo`` operation.
+    :see: ``nltk.grammar``
+    """
+
+    def __init__(self, grammar, trace=0):
+        super().__init__(grammar, trace)
+        self._stack = None
+        self._remaining_text = None
+        self._history = []
+
+    def parse(self, tokens):
+        tokens = list(tokens)
+        self.initialize(tokens)
+        while self.step():
+            pass
+        return self.parses()
+
+    def stack(self):
+        """
+        :return: The parser's stack.
+        :rtype: list(str and Tree)
+        """
+        return self._stack
+
+    def remaining_text(self):
+        """
+        :return: The portion of the text that is not yet covered by the
+            stack.
+        :rtype: list(str)
+        """
+        return self._remaining_text
+
+    def initialize(self, tokens):
+        """
+        Start parsing a given text.  This sets the parser's stack to
+        ``[]`` and sets its remaining text to ``tokens``.
+        """
+        self._stack = []
+        self._remaining_text = tokens
+        self._history = []
+
+    def step(self):
+        """
+        Perform a single parsing operation.  If a reduction is
+        possible, then perform that reduction, and return the
+        production that it is based on.  Otherwise, if a shift is
+        possible, then perform it, and return True.  Otherwise,
+        return False.
+
+        :return: False if no operation was performed; True if a shift was
+            performed; and the CFG production used to reduce if a
+            reduction was performed.
+        :rtype: Production or bool
+        """
+        return self.reduce() or self.shift()
+
+    def shift(self):
+        """
+        Move a token from the beginning of the remaining text to the
+        end of the stack.  If there are no more tokens in the
+        remaining text, then do nothing.
+
+        :return: True if the shift operation was successful.
+        :rtype: bool
+        """
+        if len(self._remaining_text) == 0:
+            return False
+        self._history.append((self._stack[:], self._remaining_text[:]))
+        self._shift(self._stack, self._remaining_text)
+        return True
+
+    def reduce(self, production=None):
+        """
+        Use ``production`` to combine the rightmost stack elements into
+        a single Tree.  If ``production`` does not match the
+        rightmost stack elements, then do nothing.
+
+        :return: The production used to reduce the stack, if a
+            reduction was performed.  If no reduction was performed,
+            return None.
+
+        :rtype: Production or None
+        """
+        self._history.append((self._stack[:], self._remaining_text[:]))
+        return_val = self._reduce(self._stack, self._remaining_text, production)
+
+        if not return_val:
+            self._history.pop()
+        return return_val
+
+    def undo(self):
+        """
+        Return the parser to its state before the most recent
+        shift or reduce operation.  Calling ``undo`` repeatedly return
+        the parser to successively earlier states.  If no shift or
+        reduce operations have been performed, ``undo`` will make no
+        changes.
+
+        :return: true if an operation was successfully undone.
+        :rtype: bool
+        """
+        if len(self._history) == 0:
+            return False
+        (self._stack, self._remaining_text) = self._history.pop()
+        return True
+
+    def reducible_productions(self):
+        """
+        :return: A list of the productions for which reductions are
+            available for the current parser state.
+        :rtype: list(Production)
+        """
+        productions = []
+        for production in self._grammar.productions():
+            rhslen = len(production.rhs())
+            if self._match_rhs(production.rhs(), self._stack[-rhslen:]):
+                productions.append(production)
+        return productions
+
+    def parses(self):
+        """
+        :return: An iterator of the parses that have been found by this
+            parser so far.
+        :rtype: iter(Tree)
+        """
+        if (
+            len(self._remaining_text) == 0
+            and len(self._stack) == 1
+            and self._stack[0].label() == self._grammar.start().symbol()
+        ):
+            yield self._stack[0]
+
+    # copied from nltk.parser
+
+    def set_grammar(self, grammar):
+        """
+        Change the grammar used to parse texts.
+
+        :param grammar: The new grammar.
+        :type grammar: CFG
+        """
+        self._grammar = grammar
+
+
+##//////////////////////////////////////////////////////
+##  Demonstration Code
+##//////////////////////////////////////////////////////
+
+
+def demo():
+    """
+    A demonstration of the shift-reduce parser.
+    """
+
+    from nltk import CFG, parse
+
+    grammar = CFG.fromstring(
+        """
+    S -> NP VP
+    NP -> Det N | Det N PP
+    VP -> V NP | V NP PP
+    PP -> P NP
+    NP -> 'I'
+    N -> 'man' | 'park' | 'telescope' | 'dog'
+    Det -> 'the' | 'a'
+    P -> 'in' | 'with'
+    V -> 'saw'
+    """
+    )
+
+    sent = "I saw a man in the park".split()
+
+    parser = parse.ShiftReduceParser(grammar, trace=2)
+    for p in parser.parse(sent):
+        print(p)
+
+
+if __name__ == "__main__":
+    demo()
--- a/backend/venv/Lib/site-packages/nltk/parse/stanford.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/stanford.py
@@ -0,0 +1,468 @@
+# Natural Language Toolkit: Interface to the Stanford Parser
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Steven Xu <xxu@student.unimelb.edu.au>
+#
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import os
+import tempfile
+import warnings
+from subprocess import PIPE
+
+from nltk.internals import (
+    _java_options,
+    config_java,
+    find_jar_iter,
+    find_jars_within_path,
+    java,
+)
+from nltk.parse.api import ParserI
+from nltk.parse.dependencygraph import DependencyGraph
+from nltk.tree import Tree
+
+_stanford_url = "https://nlp.stanford.edu/software/lex-parser.shtml"
+
+
+class GenericStanfordParser(ParserI):
+    """Interface to the Stanford Parser"""
+
+    _MODEL_JAR_PATTERN = r"stanford-parser-(\d+)(\.(\d+))+-models\.jar"
+    _JAR = r"stanford-parser\.jar"
+    _MAIN_CLASS = "edu.stanford.nlp.parser.lexparser.LexicalizedParser"
+
+    _USE_STDIN = False
+    _DOUBLE_SPACED_OUTPUT = False
+
+    def __init__(
+        self,
+        path_to_jar=None,
+        path_to_models_jar=None,
+        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz",
+        encoding="utf8",
+        verbose=False,
+        java_options="-mx4g",
+        corenlp_options="",
+    ):
+        # find the most recent code and model jar
+        stanford_jar = max(
+            find_jar_iter(
+                self._JAR,
+                path_to_jar,
+                env_vars=("STANFORD_PARSER", "STANFORD_CORENLP"),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+                is_regex=True,
+            ),
+            key=lambda model_path: os.path.dirname(model_path),
+        )
+
+        model_jar = max(
+            find_jar_iter(
+                self._MODEL_JAR_PATTERN,
+                path_to_models_jar,
+                env_vars=("STANFORD_MODELS", "STANFORD_CORENLP"),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+                is_regex=True,
+            ),
+            key=lambda model_path: os.path.dirname(model_path),
+        )
+
+        # self._classpath = (stanford_jar, model_jar)
+
+        # Adding logging jar files to classpath
+        stanford_dir = os.path.split(stanford_jar)[0]
+        self._classpath = tuple([model_jar] + find_jars_within_path(stanford_dir))
+
+        self.model_path = model_path
+        self._encoding = encoding
+        self.corenlp_options = corenlp_options
+        self.java_options = java_options
+
+    def _parse_trees_output(self, output_):
+        res = []
+        cur_lines = []
+        cur_trees = []
+        blank = False
+        for line in output_.splitlines(False):
+            if line == "":
+                if blank:
+                    res.append(iter(cur_trees))
+                    cur_trees = []
+                    blank = False
+                elif self._DOUBLE_SPACED_OUTPUT:
+                    cur_trees.append(self._make_tree("\n".join(cur_lines)))
+                    cur_lines = []
+                    blank = True
+                else:
+                    res.append(iter([self._make_tree("\n".join(cur_lines))]))
+                    cur_lines = []
+            else:
+                cur_lines.append(line)
+                blank = False
+        return iter(res)
+
+    def parse_sents(self, sentences, verbose=False):
+        """
+        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
+        list where each sentence is a list of words.
+        Each sentence will be automatically tagged with this StanfordParser instance's
+        tagger.
+        If whitespaces exists inside a token, then the token will be treated as
+        separate tokens.
+
+        :param sentences: Input sentences to parse
+        :type sentences: list(list(str))
+        :rtype: iter(iter(Tree))
+        """
+        cmd = [
+            self._MAIN_CLASS,
+            "-model",
+            self.model_path,
+            "-sentences",
+            "newline",
+            "-outputFormat",
+            self._OUTPUT_FORMAT,
+            "-tokenized",
+            "-escaper",
+            "edu.stanford.nlp.process.PTBEscapingProcessor",
+        ]
+        return self._parse_trees_output(
+            self._execute(
+                cmd, "\n".join(" ".join(sentence) for sentence in sentences), verbose
+            )
+        )
+
+    def raw_parse(self, sentence, verbose=False):
+        """
+        Use StanfordParser to parse a sentence. Takes a sentence as a string;
+        before parsing, it will be automatically tokenized and tagged by
+        the Stanford Parser.
+
+        :param sentence: Input sentence to parse
+        :type sentence: str
+        :rtype: iter(Tree)
+        """
+        return next(self.raw_parse_sents([sentence], verbose))
+
+    def raw_parse_sents(self, sentences, verbose=False):
+        """
+        Use StanfordParser to parse multiple sentences. Takes multiple sentences as a
+        list of strings.
+        Each sentence will be automatically tokenized and tagged by the Stanford Parser.
+
+        :param sentences: Input sentences to parse
+        :type sentences: list(str)
+        :rtype: iter(iter(Tree))
+        """
+        cmd = [
+            self._MAIN_CLASS,
+            "-model",
+            self.model_path,
+            "-sentences",
+            "newline",
+            "-outputFormat",
+            self._OUTPUT_FORMAT,
+        ]
+        return self._parse_trees_output(
+            self._execute(cmd, "\n".join(sentences), verbose)
+        )
+
+    def tagged_parse(self, sentence, verbose=False):
+        """
+        Use StanfordParser to parse a sentence. Takes a sentence as a list of
+        (word, tag) tuples; the sentence must have already been tokenized and
+        tagged.
+
+        :param sentence: Input sentence to parse
+        :type sentence: list(tuple(str, str))
+        :rtype: iter(Tree)
+        """
+        return next(self.tagged_parse_sents([sentence], verbose))
+
+    def tagged_parse_sents(self, sentences, verbose=False):
+        """
+        Use StanfordParser to parse multiple sentences. Takes multiple sentences
+        where each sentence is a list of (word, tag) tuples.
+        The sentences must have already been tokenized and tagged.
+
+        :param sentences: Input sentences to parse
+        :type sentences: list(list(tuple(str, str)))
+        :rtype: iter(iter(Tree))
+        """
+        tag_separator = "/"
+        cmd = [
+            self._MAIN_CLASS,
+            "-model",
+            self.model_path,
+            "-sentences",
+            "newline",
+            "-outputFormat",
+            self._OUTPUT_FORMAT,
+            "-tokenized",
+            "-tagSeparator",
+            tag_separator,
+            "-tokenizerFactory",
+            "edu.stanford.nlp.process.WhitespaceTokenizer",
+            "-tokenizerMethod",
+            "newCoreLabelTokenizerFactory",
+        ]
+        # We don't need to escape slashes as "splitting is done on the last instance of the character in the token"
+        return self._parse_trees_output(
+            self._execute(
+                cmd,
+                "\n".join(
+                    " ".join(tag_separator.join(tagged) for tagged in sentence)
+                    for sentence in sentences
+                ),
+                verbose,
+            )
+        )
+
+    def _execute(self, cmd, input_, verbose=False):
+        encoding = self._encoding
+        cmd.extend(["-encoding", encoding])
+        if self.corenlp_options:
+            cmd.extend(self.corenlp_options.split())
+
+        default_options = " ".join(_java_options)
+
+        # Configure java.
+        config_java(options=self.java_options, verbose=verbose)
+
+        # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
+        with tempfile.NamedTemporaryFile(mode="wb", delete=False) as input_file:
+            # Write the actual sentences to the temporary input file
+            if isinstance(input_, str) and encoding:
+                input_ = input_.encode(encoding)
+            input_file.write(input_)
+            input_file.flush()
+
+            # Run the tagger and get the output.
+            if self._USE_STDIN:
+                input_file.seek(0)
+                stdout, stderr = java(
+                    cmd,
+                    classpath=self._classpath,
+                    stdin=input_file,
+                    stdout=PIPE,
+                    stderr=PIPE,
+                )
+            else:
+                cmd.append(input_file.name)
+                stdout, stderr = java(
+                    cmd, classpath=self._classpath, stdout=PIPE, stderr=PIPE
+                )
+
+            stdout = stdout.replace(b"\xc2\xa0", b" ")
+            stdout = stdout.replace(b"\x00\xa0", b" ")
+            stdout = stdout.decode(encoding)
+
+        os.unlink(input_file.name)
+
+        # Return java configurations to their default values.
+        config_java(options=default_options, verbose=False)
+
+        return stdout
+
+
+class StanfordParser(GenericStanfordParser):
+    """
+    >>> parser=StanfordParser(
+    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
+    ... ) # doctest: +SKIP
+
+    >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
+    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
+    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]
+
+    >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
+    ...     "the quick brown fox jumps over the lazy dog",
+    ...     "the quick grey wolf jumps over the lazy fox"
+    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
+    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
+    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
+    [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP',
+    [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
+    Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]
+
+    >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
+    ...     "I 'm a dog".split(),
+    ...     "This is my friends ' cat ( the tabby )".split(),
+    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
+    Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
+    [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
+    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
+    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]
+
+    >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
+    ...     (
+    ...         ("The", "DT"),
+    ...         ("quick", "JJ"),
+    ...         ("brown", "JJ"),
+    ...         ("fox", "NN"),
+    ...         ("jumped", "VBD"),
+    ...         ("over", "IN"),
+    ...         ("the", "DT"),
+    ...         ("lazy", "JJ"),
+    ...         ("dog", "NN"),
+    ...         (".", "."),
+    ...     ),
+    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
+    Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
+    [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
+    """
+
+    _OUTPUT_FORMAT = "penn"
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The StanfordParser will be deprecated\n"
+            "Please use \033[91mnltk.parse.corenlp.CoreNLPParser\033[0m instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+        super().__init__(*args, **kwargs)
+
+    def _make_tree(self, result):
+        return Tree.fromstring(result)
+
+
+class StanfordDependencyParser(GenericStanfordParser):
+    """
+    >>> dep_parser=StanfordDependencyParser(
+    ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
+    ... ) # doctest: +SKIP
+
+    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])]
+
+    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
+    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
+    ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
+    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]
+
+    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
+    ...     "The quick brown fox jumps over the lazy dog.",
+    ...     "The quick grey wolf jumps over the lazy fox."
+    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]),
+    Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])]
+
+    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
+    ...     "I 'm a dog".split(),
+    ...     "This is my friends ' cat ( the tabby )".split(),
+    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])]
+
+    >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents((
+    ...     (
+    ...         ("The", "DT"),
+    ...         ("quick", "JJ"),
+    ...         ("brown", "JJ"),
+    ...         ("fox", "NN"),
+    ...         ("jumped", "VBD"),
+    ...         ("over", "IN"),
+    ...         ("the", "DT"),
+    ...         ("lazy", "JJ"),
+    ...         ("dog", "NN"),
+    ...         (".", "."),
+    ...     ),
+    ... ))],[]) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')),
+    ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')),
+    ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')),
+    ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]]
+
+    """
+
+    _OUTPUT_FORMAT = "conll2007"
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The StanfordDependencyParser will be deprecated\n"
+            "Please use \033[91mnltk.parse.corenlp.CoreNLPDependencyParser\033[0m instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+        super().__init__(*args, **kwargs)
+
+    def _make_tree(self, result):
+        return DependencyGraph(result, top_relation_label="root")
+
+
+class StanfordNeuralDependencyParser(GenericStanfordParser):
+    """
+    >>> from nltk.parse.stanford import StanfordNeuralDependencyParser # doctest: +SKIP
+    >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')# doctest: +SKIP
+
+    >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]
+
+    >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det',
+    (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'),
+    u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')),
+    ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det',
+    (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ')), ((u'jumps', u'VBZ'),
+    u'punct', (u'.', u'.'))]]
+
+    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents((
+    ...     "The quick brown fox jumps over the lazy dog.",
+    ...     "The quick grey wolf jumps over the lazy fox."
+    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over',
+    'the', 'lazy']), '.']), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']),
+    Tree('fox', ['over', 'the', 'lazy']), '.'])]
+
+    >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents((
+    ...     "I 'm a dog".split(),
+    ...     "This is my friends ' cat ( the tabby )".split(),
+    ... ))], []) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
+    ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
+    """
+
+    _OUTPUT_FORMAT = "conll"
+    _MAIN_CLASS = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
+    _JAR = r"stanford-corenlp-(\d+)(\.(\d+))+\.jar"
+    _MODEL_JAR_PATTERN = r"stanford-corenlp-(\d+)(\.(\d+))+-models\.jar"
+    _USE_STDIN = True
+    _DOUBLE_SPACED_OUTPUT = True
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The StanfordNeuralDependencyParser will be deprecated\n"
+            "Please use \033[91mnltk.parse.corenlp.CoreNLPDependencyParser\033[0m instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+        super().__init__(*args, **kwargs)
+        self.corenlp_options += "-annotators tokenize,ssplit,pos,depparse"
+
+    def tagged_parse_sents(self, sentences, verbose=False):
+        """
+        Currently unimplemented because the neural dependency parser (and
+        the StanfordCoreNLP pipeline class) doesn't support passing in pre-
+        tagged tokens.
+        """
+        raise NotImplementedError(
+            "tagged_parse[_sents] is not supported by "
+            "StanfordNeuralDependencyParser; use "
+            "parse[_sents] or raw_parse[_sents] instead."
+        )
+
+    def _make_tree(self, result):
+        return DependencyGraph(result, top_relation_label="ROOT")
--- a/backend/venv/Lib/site-packages/nltk/parse/transitionparser.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/transitionparser.py
@@ -0,0 +1,793 @@
+# Natural Language Toolkit: Arc-Standard and Arc-eager Transition Based Parsers
+#
+# Author: Long Duong <longdt219@gmail.com>
+#
+# Copyright (C) 2001-2025 NLTK Project
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+import pickle
+import tempfile
+from copy import deepcopy
+from operator import itemgetter
+from os import remove
+
+try:
+    from numpy import array
+    from scipy import sparse
+    from sklearn import svm
+    from sklearn.datasets import load_svmlight_file
+except ImportError:
+    pass
+
+from nltk.parse import DependencyEvaluator, DependencyGraph, ParserI
+
+
+class Configuration:
+    """
+    Class for holding configuration which is the partial analysis of the input sentence.
+    The transition based parser aims at finding set of operators that transfer the initial
+    configuration to the terminal configuration.
+
+    The configuration includes:
+        - Stack: for storing partially proceeded words
+        - Buffer: for storing remaining input words
+        - Set of arcs: for storing partially built dependency tree
+
+    This class also provides a method to represent a configuration as list of features.
+    """
+
+    def __init__(self, dep_graph):
+        """
+        :param dep_graph: the representation of an input in the form of dependency graph.
+        :type dep_graph: DependencyGraph where the dependencies are not specified.
+        """
+        # dep_graph.nodes contain list of token for a sentence
+        self.stack = [0]  # The root element
+        self.buffer = list(range(1, len(dep_graph.nodes)))  # The rest is in the buffer
+        self.arcs = []  # empty set of arc
+        self._tokens = dep_graph.nodes
+        self._max_address = len(self.buffer)
+
+    def __str__(self):
+        return (
+            "Stack : "
+            + str(self.stack)
+            + "  Buffer : "
+            + str(self.buffer)
+            + "   Arcs : "
+            + str(self.arcs)
+        )
+
+    def _check_informative(self, feat, flag=False):
+        """
+        Check whether a feature is informative
+        The flag control whether "_" is informative or not
+        """
+        if feat is None:
+            return False
+        if feat == "":
+            return False
+        if flag is False:
+            if feat == "_":
+                return False
+        return True
+
+    def extract_features(self):
+        """
+        Extract the set of features for the current configuration. Implement standard features as describe in
+        Table 3.2 (page 31) in Dependency Parsing book by Sandra Kubler, Ryan McDonal, Joakim Nivre.
+        Please note that these features are very basic.
+        :return: list(str)
+        """
+        result = []
+        # Todo : can come up with more complicated features set for better
+        # performance.
+        if len(self.stack) > 0:
+            # Stack 0
+            stack_idx0 = self.stack[len(self.stack) - 1]
+            token = self._tokens[stack_idx0]
+            if self._check_informative(token["word"], True):
+                result.append("STK_0_FORM_" + token["word"])
+            if "lemma" in token and self._check_informative(token["lemma"]):
+                result.append("STK_0_LEMMA_" + token["lemma"])
+            if self._check_informative(token["tag"]):
+                result.append("STK_0_POS_" + token["tag"])
+            if "feats" in token and self._check_informative(token["feats"]):
+                feats = token["feats"].split("|")
+                for feat in feats:
+                    result.append("STK_0_FEATS_" + feat)
+            # Stack 1
+            if len(self.stack) > 1:
+                stack_idx1 = self.stack[len(self.stack) - 2]
+                token = self._tokens[stack_idx1]
+                if self._check_informative(token["tag"]):
+                    result.append("STK_1_POS_" + token["tag"])
+
+            # Left most, right most dependency of stack[0]
+            left_most = 1000000
+            right_most = -1
+            dep_left_most = ""
+            dep_right_most = ""
+            for wi, r, wj in self.arcs:
+                if wi == stack_idx0:
+                    if (wj > wi) and (wj > right_most):
+                        right_most = wj
+                        dep_right_most = r
+                    if (wj < wi) and (wj < left_most):
+                        left_most = wj
+                        dep_left_most = r
+            if self._check_informative(dep_left_most):
+                result.append("STK_0_LDEP_" + dep_left_most)
+            if self._check_informative(dep_right_most):
+                result.append("STK_0_RDEP_" + dep_right_most)
+
+        # Check Buffered 0
+        if len(self.buffer) > 0:
+            # Buffer 0
+            buffer_idx0 = self.buffer[0]
+            token = self._tokens[buffer_idx0]
+            if self._check_informative(token["word"], True):
+                result.append("BUF_0_FORM_" + token["word"])
+            if "lemma" in token and self._check_informative(token["lemma"]):
+                result.append("BUF_0_LEMMA_" + token["lemma"])
+            if self._check_informative(token["tag"]):
+                result.append("BUF_0_POS_" + token["tag"])
+            if "feats" in token and self._check_informative(token["feats"]):
+                feats = token["feats"].split("|")
+                for feat in feats:
+                    result.append("BUF_0_FEATS_" + feat)
+            # Buffer 1
+            if len(self.buffer) > 1:
+                buffer_idx1 = self.buffer[1]
+                token = self._tokens[buffer_idx1]
+                if self._check_informative(token["word"], True):
+                    result.append("BUF_1_FORM_" + token["word"])
+                if self._check_informative(token["tag"]):
+                    result.append("BUF_1_POS_" + token["tag"])
+            if len(self.buffer) > 2:
+                buffer_idx2 = self.buffer[2]
+                token = self._tokens[buffer_idx2]
+                if self._check_informative(token["tag"]):
+                    result.append("BUF_2_POS_" + token["tag"])
+            if len(self.buffer) > 3:
+                buffer_idx3 = self.buffer[3]
+                token = self._tokens[buffer_idx3]
+                if self._check_informative(token["tag"]):
+                    result.append("BUF_3_POS_" + token["tag"])
+                    # Left most, right most dependency of stack[0]
+            left_most = 1000000
+            right_most = -1
+            dep_left_most = ""
+            dep_right_most = ""
+            for wi, r, wj in self.arcs:
+                if wi == buffer_idx0:
+                    if (wj > wi) and (wj > right_most):
+                        right_most = wj
+                        dep_right_most = r
+                    if (wj < wi) and (wj < left_most):
+                        left_most = wj
+                        dep_left_most = r
+            if self._check_informative(dep_left_most):
+                result.append("BUF_0_LDEP_" + dep_left_most)
+            if self._check_informative(dep_right_most):
+                result.append("BUF_0_RDEP_" + dep_right_most)
+
+        return result
+
+
+class Transition:
+    """
+    This class defines a set of transition which is applied to a configuration to get another configuration
+    Note that for different parsing algorithm, the transition is different.
+    """
+
+    # Define set of transitions
+    LEFT_ARC = "LEFTARC"
+    RIGHT_ARC = "RIGHTARC"
+    SHIFT = "SHIFT"
+    REDUCE = "REDUCE"
+
+    def __init__(self, alg_option):
+        """
+        :param alg_option: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm
+        :type alg_option: str
+        """
+        self._algo = alg_option
+        if alg_option not in [
+            TransitionParser.ARC_STANDARD,
+            TransitionParser.ARC_EAGER,
+        ]:
+            raise ValueError(
+                " Currently we only support %s and %s "
+                % (TransitionParser.ARC_STANDARD, TransitionParser.ARC_EAGER)
+            )
+
+    def left_arc(self, conf, relation):
+        """
+        Note that the algorithm for left-arc is quite similar except for precondition for both arc-standard and arc-eager
+
+        :param configuration: is the current configuration
+        :return: A new configuration or -1 if the pre-condition is not satisfied
+        """
+        if (len(conf.buffer) <= 0) or (len(conf.stack) <= 0):
+            return -1
+        if conf.buffer[0] == 0:
+            # here is the Root element
+            return -1
+
+        idx_wi = conf.stack[len(conf.stack) - 1]
+
+        flag = True
+        if self._algo == TransitionParser.ARC_EAGER:
+            for idx_parent, r, idx_child in conf.arcs:
+                if idx_child == idx_wi:
+                    flag = False
+
+        if flag:
+            conf.stack.pop()
+            idx_wj = conf.buffer[0]
+            conf.arcs.append((idx_wj, relation, idx_wi))
+        else:
+            return -1
+
+    def right_arc(self, conf, relation):
+        """
+        Note that the algorithm for right-arc is DIFFERENT for arc-standard and arc-eager
+
+        :param configuration: is the current configuration
+        :return: A new configuration or -1 if the pre-condition is not satisfied
+        """
+        if (len(conf.buffer) <= 0) or (len(conf.stack) <= 0):
+            return -1
+        if self._algo == TransitionParser.ARC_STANDARD:
+            idx_wi = conf.stack.pop()
+            idx_wj = conf.buffer[0]
+            conf.buffer[0] = idx_wi
+            conf.arcs.append((idx_wi, relation, idx_wj))
+        else:  # arc-eager
+            idx_wi = conf.stack[len(conf.stack) - 1]
+            idx_wj = conf.buffer.pop(0)
+            conf.stack.append(idx_wj)
+            conf.arcs.append((idx_wi, relation, idx_wj))
+
+    def reduce(self, conf):
+        """
+        Note that the algorithm for reduce is only available for arc-eager
+
+        :param configuration: is the current configuration
+        :return: A new configuration or -1 if the pre-condition is not satisfied
+        """
+
+        if self._algo != TransitionParser.ARC_EAGER:
+            return -1
+        if len(conf.stack) <= 0:
+            return -1
+
+        idx_wi = conf.stack[len(conf.stack) - 1]
+        flag = False
+        for idx_parent, r, idx_child in conf.arcs:
+            if idx_child == idx_wi:
+                flag = True
+        if flag:
+            conf.stack.pop()  # reduce it
+        else:
+            return -1
+
+    def shift(self, conf):
+        """
+        Note that the algorithm for shift is the SAME for arc-standard and arc-eager
+
+        :param configuration: is the current configuration
+        :return: A new configuration or -1 if the pre-condition is not satisfied
+        """
+        if len(conf.buffer) <= 0:
+            return -1
+        idx_wi = conf.buffer.pop(0)
+        conf.stack.append(idx_wi)
+
+
+class TransitionParser(ParserI):
+    """
+    Class for transition based parser. Implement 2 algorithms which are "arc-standard" and "arc-eager"
+    """
+
+    ARC_STANDARD = "arc-standard"
+    ARC_EAGER = "arc-eager"
+
+    def __init__(self, algorithm):
+        """
+        :param algorithm: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm
+        :type algorithm: str
+        """
+        if not (algorithm in [self.ARC_STANDARD, self.ARC_EAGER]):
+            raise ValueError(
+                " Currently we only support %s and %s "
+                % (self.ARC_STANDARD, self.ARC_EAGER)
+            )
+        self._algorithm = algorithm
+
+        self._dictionary = {}
+        self._transition = {}
+        self._match_transition = {}
+
+    def _get_dep_relation(self, idx_parent, idx_child, depgraph):
+        p_node = depgraph.nodes[idx_parent]
+        c_node = depgraph.nodes[idx_child]
+
+        if c_node["word"] is None:
+            return None  # Root word
+
+        if c_node["head"] == p_node["address"]:
+            return c_node["rel"]
+        else:
+            return None
+
+    def _convert_to_binary_features(self, features):
+        """
+        :param features: list of feature string which is needed to convert to binary features
+        :type features: list(str)
+        :return : string of binary features in libsvm format  which is 'featureID:value' pairs
+        """
+        unsorted_result = []
+        for feature in features:
+            self._dictionary.setdefault(feature, len(self._dictionary))
+            unsorted_result.append(self._dictionary[feature])
+
+        # Default value of each feature is 1.0
+        return " ".join(
+            str(featureID) + ":1.0" for featureID in sorted(unsorted_result)
+        )
+
+    def _is_projective(self, depgraph):
+        arc_list = []
+        for key in depgraph.nodes:
+            node = depgraph.nodes[key]
+
+            if "head" in node:
+                childIdx = node["address"]
+                parentIdx = node["head"]
+                if parentIdx is not None:
+                    arc_list.append((parentIdx, childIdx))
+
+        for parentIdx, childIdx in arc_list:
+            # Ensure that childIdx < parentIdx
+            if childIdx > parentIdx:
+                temp = childIdx
+                childIdx = parentIdx
+                parentIdx = temp
+            for k in range(childIdx + 1, parentIdx):
+                for m in range(len(depgraph.nodes)):
+                    if (m < childIdx) or (m > parentIdx):
+                        if (k, m) in arc_list:
+                            return False
+                        if (m, k) in arc_list:
+                            return False
+        return True
+
+    def _write_to_file(self, key, binary_features, input_file):
+        """
+        write the binary features to input file and update the transition dictionary
+        """
+        self._transition.setdefault(key, len(self._transition) + 1)
+        self._match_transition[self._transition[key]] = key
+
+        input_str = str(self._transition[key]) + " " + binary_features + "\n"
+        input_file.write(input_str.encode("utf-8"))
+
+    def _create_training_examples_arc_std(self, depgraphs, input_file):
+        """
+        Create the training example in the libsvm format and write it to the input_file.
+        Reference : Page 32, Chapter 3. Dependency Parsing by Sandra Kubler, Ryan McDonal and Joakim Nivre (2009)
+        """
+        operation = Transition(self.ARC_STANDARD)
+        count_proj = 0
+        training_seq = []
+
+        for depgraph in depgraphs:
+            if not self._is_projective(depgraph):
+                continue
+
+            count_proj += 1
+            conf = Configuration(depgraph)
+            while len(conf.buffer) > 0:
+                b0 = conf.buffer[0]
+                features = conf.extract_features()
+                binary_features = self._convert_to_binary_features(features)
+
+                if len(conf.stack) > 0:
+                    s0 = conf.stack[len(conf.stack) - 1]
+                    # Left-arc operation
+                    rel = self._get_dep_relation(b0, s0, depgraph)
+                    if rel is not None:
+                        key = Transition.LEFT_ARC + ":" + rel
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.left_arc(conf, rel)
+                        training_seq.append(key)
+                        continue
+
+                    # Right-arc operation
+                    rel = self._get_dep_relation(s0, b0, depgraph)
+                    if rel is not None:
+                        precondition = True
+                        # Get the max-index of buffer
+                        maxID = conf._max_address
+
+                        for w in range(maxID + 1):
+                            if w != b0:
+                                relw = self._get_dep_relation(b0, w, depgraph)
+                                if relw is not None:
+                                    if (b0, relw, w) not in conf.arcs:
+                                        precondition = False
+
+                        if precondition:
+                            key = Transition.RIGHT_ARC + ":" + rel
+                            self._write_to_file(key, binary_features, input_file)
+                            operation.right_arc(conf, rel)
+                            training_seq.append(key)
+                            continue
+
+                # Shift operation as the default
+                key = Transition.SHIFT
+                self._write_to_file(key, binary_features, input_file)
+                operation.shift(conf)
+                training_seq.append(key)
+
+        print(" Number of training examples : " + str(len(depgraphs)))
+        print(" Number of valid (projective) examples : " + str(count_proj))
+        return training_seq
+
+    def _create_training_examples_arc_eager(self, depgraphs, input_file):
+        """
+        Create the training example in the libsvm format and write it to the input_file.
+        Reference : 'A Dynamic Oracle for Arc-Eager Dependency Parsing' by Joav Goldberg and Joakim Nivre
+        """
+        operation = Transition(self.ARC_EAGER)
+        countProj = 0
+        training_seq = []
+
+        for depgraph in depgraphs:
+            if not self._is_projective(depgraph):
+                continue
+
+            countProj += 1
+            conf = Configuration(depgraph)
+            while len(conf.buffer) > 0:
+                b0 = conf.buffer[0]
+                features = conf.extract_features()
+                binary_features = self._convert_to_binary_features(features)
+
+                if len(conf.stack) > 0:
+                    s0 = conf.stack[len(conf.stack) - 1]
+                    # Left-arc operation
+                    rel = self._get_dep_relation(b0, s0, depgraph)
+                    if rel is not None:
+                        key = Transition.LEFT_ARC + ":" + rel
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.left_arc(conf, rel)
+                        training_seq.append(key)
+                        continue
+
+                    # Right-arc operation
+                    rel = self._get_dep_relation(s0, b0, depgraph)
+                    if rel is not None:
+                        key = Transition.RIGHT_ARC + ":" + rel
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.right_arc(conf, rel)
+                        training_seq.append(key)
+                        continue
+
+                    # reduce operation
+                    flag = False
+                    for k in range(s0):
+                        if self._get_dep_relation(k, b0, depgraph) is not None:
+                            flag = True
+                        if self._get_dep_relation(b0, k, depgraph) is not None:
+                            flag = True
+                    if flag:
+                        key = Transition.REDUCE
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.reduce(conf)
+                        training_seq.append(key)
+                        continue
+
+                # Shift operation as the default
+                key = Transition.SHIFT
+                self._write_to_file(key, binary_features, input_file)
+                operation.shift(conf)
+                training_seq.append(key)
+
+        print(" Number of training examples : " + str(len(depgraphs)))
+        print(" Number of valid (projective) examples : " + str(countProj))
+        return training_seq
+
+    def train(self, depgraphs, modelfile, verbose=True):
+        """
+        :param depgraphs : list of DependencyGraph as the training data
+        :type depgraphs : DependencyGraph
+        :param modelfile : file name to save the trained model
+        :type modelfile : str
+        """
+
+        try:
+            input_file = tempfile.NamedTemporaryFile(
+                prefix="transition_parse.train", dir=tempfile.gettempdir(), delete=False
+            )
+
+            if self._algorithm == self.ARC_STANDARD:
+                self._create_training_examples_arc_std(depgraphs, input_file)
+            else:
+                self._create_training_examples_arc_eager(depgraphs, input_file)
+
+            input_file.close()
+            # Using the temporary file to train the libsvm classifier
+            x_train, y_train = load_svmlight_file(input_file.name)
+            # The parameter is set according to the paper:
+            # Algorithms for Deterministic Incremental Dependency Parsing by Joakim Nivre
+            # Todo : because of probability = True => very slow due to
+            # cross-validation. Need to improve the speed here
+            model = svm.SVC(
+                kernel="poly",
+                degree=2,
+                coef0=0,
+                gamma=0.2,
+                C=0.5,
+                verbose=verbose,
+                probability=True,
+            )
+
+            model.fit(x_train, y_train)
+            # Save the model to file name (as pickle)
+            pickle.dump(model, open(modelfile, "wb"))
+        finally:
+            remove(input_file.name)
+
+    def parse(self, depgraphs, modelFile):
+        """
+        :param depgraphs: the list of test sentence, each sentence is represented as a dependency graph where the 'head' information is dummy
+        :type depgraphs: list(DependencyGraph)
+        :param modelfile: the model file
+        :type modelfile: str
+        :return: list (DependencyGraph) with the 'head' and 'rel' information
+        """
+        result = []
+        # First load the model
+        model = pickle.load(open(modelFile, "rb"))
+        operation = Transition(self._algorithm)
+
+        for depgraph in depgraphs:
+            conf = Configuration(depgraph)
+            while len(conf.buffer) > 0:
+                features = conf.extract_features()
+                col = []
+                row = []
+                data = []
+                for feature in features:
+                    if feature in self._dictionary:
+                        col.append(self._dictionary[feature])
+                        row.append(0)
+                        data.append(1.0)
+                np_col = array(sorted(col))  # NB : index must be sorted
+                np_row = array(row)
+                np_data = array(data)
+
+                x_test = sparse.csr_matrix(
+                    (np_data, (np_row, np_col)), shape=(1, len(self._dictionary))
+                )
+
+                # It's best to use decision function as follow BUT it's not supported yet for sparse SVM
+                # Using decision function to build the votes array
+                # dec_func = model.decision_function(x_test)[0]
+                # votes = {}
+                # k = 0
+                # for i in range(len(model.classes_)):
+                #    for j in range(i+1, len(model.classes_)):
+                #        #if  dec_func[k] > 0:
+                #            votes.setdefault(i,0)
+                #            votes[i] +=1
+                #        else:
+                #           votes.setdefault(j,0)
+                #           votes[j] +=1
+                #        k +=1
+                # Sort votes according to the values
+                # sorted_votes = sorted(votes.items(), key=itemgetter(1), reverse=True)
+
+                # We will use predict_proba instead of decision_function
+                prob_dict = {}
+                pred_prob = model.predict_proba(x_test)[0]
+                for i in range(len(pred_prob)):
+                    prob_dict[i] = pred_prob[i]
+                sorted_Prob = sorted(prob_dict.items(), key=itemgetter(1), reverse=True)
+
+                # Note that SHIFT is always a valid operation
+                for y_pred_idx, confidence in sorted_Prob:
+                    # y_pred = model.predict(x_test)[0]
+                    # From the prediction match to the operation
+                    y_pred = model.classes_[y_pred_idx]
+
+                    if y_pred in self._match_transition:
+                        strTransition = self._match_transition[y_pred]
+                        baseTransition = strTransition.split(":")[0]
+
+                        if baseTransition == Transition.LEFT_ARC:
+                            if (
+                                operation.left_arc(conf, strTransition.split(":")[1])
+                                != -1
+                            ):
+                                break
+                        elif baseTransition == Transition.RIGHT_ARC:
+                            if (
+                                operation.right_arc(conf, strTransition.split(":")[1])
+                                != -1
+                            ):
+                                break
+                        elif baseTransition == Transition.REDUCE:
+                            if operation.reduce(conf) != -1:
+                                break
+                        elif baseTransition == Transition.SHIFT:
+                            if operation.shift(conf) != -1:
+                                break
+                    else:
+                        raise ValueError(
+                            "The predicted transition is not recognized, expected errors"
+                        )
+
+            # Finish with operations build the dependency graph from Conf.arcs
+
+            new_depgraph = deepcopy(depgraph)
+            for key in new_depgraph.nodes:
+                node = new_depgraph.nodes[key]
+                node["rel"] = ""
+                # With the default, all the token depend on the Root
+                node["head"] = 0
+            for head, rel, child in conf.arcs:
+                c_node = new_depgraph.nodes[child]
+                c_node["head"] = head
+                c_node["rel"] = rel
+            result.append(new_depgraph)
+
+        return result
+
+
+def demo():
+    """
+    >>> from nltk.parse import DependencyGraph, DependencyEvaluator
+    >>> from nltk.parse.transitionparser import TransitionParser, Configuration, Transition
+    >>> gold_sent = DependencyGraph(\"""
+    ... Economic  JJ     2      ATT
+    ... news  NN     3       SBJ
+    ... has       VBD       0       ROOT
+    ... little      JJ      5       ATT
+    ... effect   NN     3       OBJ
+    ... on     IN      5       ATT
+    ... financial       JJ       8       ATT
+    ... markets    NNS      6       PC
+    ... .    .      3       PU
+    ... \""")
+
+    >>> conf = Configuration(gold_sent)
+
+    ###################### Check the Initial Feature ########################
+
+    >>> print(', '.join(conf.extract_features()))
+    STK_0_POS_TOP, BUF_0_FORM_Economic, BUF_0_LEMMA_Economic, BUF_0_POS_JJ, BUF_1_FORM_news, BUF_1_POS_NN, BUF_2_POS_VBD, BUF_3_POS_JJ
+
+    ###################### Check The Transition #######################
+    Check the Initialized Configuration
+    >>> print(conf)
+    Stack : [0]  Buffer : [1, 2, 3, 4, 5, 6, 7, 8, 9]   Arcs : []
+
+    A. Do some transition checks for ARC-STANDARD
+
+    >>> operation = Transition('arc-standard')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf, "ATT")
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,"SBJ")
+    >>> operation.shift(conf)
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf, "ATT")
+    >>> operation.shift(conf)
+    >>> operation.shift(conf)
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf, "ATT")
+
+    Middle Configuration and Features Check
+    >>> print(conf)
+    Stack : [0, 3, 5, 6]  Buffer : [8, 9]   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7)]
+
+    >>> print(', '.join(conf.extract_features()))
+    STK_0_FORM_on, STK_0_LEMMA_on, STK_0_POS_IN, STK_1_POS_NN, BUF_0_FORM_markets, BUF_0_LEMMA_markets, BUF_0_POS_NNS, BUF_1_FORM_., BUF_1_POS_., BUF_0_LDEP_ATT
+
+    >>> operation.right_arc(conf, "PC")
+    >>> operation.right_arc(conf, "ATT")
+    >>> operation.right_arc(conf, "OBJ")
+    >>> operation.shift(conf)
+    >>> operation.right_arc(conf, "PU")
+    >>> operation.right_arc(conf, "ROOT")
+    >>> operation.shift(conf)
+
+    Terminated Configuration Check
+    >>> print(conf)
+    Stack : [0]  Buffer : []   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7), (6, 'PC', 8), (5, 'ATT', 6), (3, 'OBJ', 5), (3, 'PU', 9), (0, 'ROOT', 3)]
+
+
+    B. Do some transition checks for ARC-EAGER
+
+    >>> conf = Configuration(gold_sent)
+    >>> operation = Transition('arc-eager')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'ATT')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'SBJ')
+    >>> operation.right_arc(conf,'ROOT')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'ATT')
+    >>> operation.right_arc(conf,'OBJ')
+    >>> operation.right_arc(conf,'ATT')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'ATT')
+    >>> operation.right_arc(conf,'PC')
+    >>> operation.reduce(conf)
+    >>> operation.reduce(conf)
+    >>> operation.reduce(conf)
+    >>> operation.right_arc(conf,'PU')
+    >>> print(conf)
+    Stack : [0, 3, 9]  Buffer : []   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (0, 'ROOT', 3), (5, 'ATT', 4), (3, 'OBJ', 5), (5, 'ATT', 6), (8, 'ATT', 7), (6, 'PC', 8), (3, 'PU', 9)]
+
+    ###################### Check The Training Function #######################
+
+    A. Check the ARC-STANDARD training
+    >>> import tempfile
+    >>> import os
+    >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(), delete=False)
+
+    >>> parser_std = TransitionParser('arc-standard')
+    >>> print(', '.join(parser_std._create_training_examples_arc_std([gold_sent], input_file)))
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+    SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, SHIFT, SHIFT, LEFTARC:ATT, SHIFT, SHIFT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, RIGHTARC:ATT, RIGHTARC:OBJ, SHIFT, RIGHTARC:PU, RIGHTARC:ROOT, SHIFT
+
+    >>> parser_std.train([gold_sent],'temp.arcstd.model', verbose=False)
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+    >>> input_file.close()
+    >>> remove(input_file.name)
+
+    B. Check the ARC-EAGER training
+
+    >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(),delete=False)
+    >>> parser_eager = TransitionParser('arc-eager')
+    >>> print(', '.join(parser_eager._create_training_examples_arc_eager([gold_sent], input_file)))
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+    SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, RIGHTARC:ROOT, SHIFT, LEFTARC:ATT, RIGHTARC:OBJ, RIGHTARC:ATT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, REDUCE, REDUCE, REDUCE, RIGHTARC:PU
+
+    >>> parser_eager.train([gold_sent],'temp.arceager.model', verbose=False)
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+
+    >>> input_file.close()
+    >>> remove(input_file.name)
+
+    ###################### Check The Parsing Function ########################
+
+    A. Check the ARC-STANDARD parser
+
+    >>> result = parser_std.parse([gold_sent], 'temp.arcstd.model')
+    >>> de = DependencyEvaluator(result, [gold_sent])
+    >>> de.eval() >= (0, 0)
+    True
+
+    B. Check the ARC-EAGER parser
+    >>> result = parser_eager.parse([gold_sent], 'temp.arceager.model')
+    >>> de = DependencyEvaluator(result, [gold_sent])
+    >>> de.eval() >= (0, 0)
+    True
+
+    Remove test temporary files
+    >>> remove('temp.arceager.model')
+    >>> remove('temp.arcstd.model')
+
+    Note that result is very poor because of only one training example.
+    """
--- a/backend/venv/Lib/site-packages/nltk/parse/util.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/util.py
@@ -0,0 +1,234 @@
+# Natural Language Toolkit: Parser Utility Functions
+#
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+#         Tom Aarsen <>
+#
+# Copyright (C) 2001-2025 NLTK Project
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+"""
+Utility functions for parsers.
+"""
+
+from nltk.data import load
+from nltk.grammar import CFG, PCFG, FeatureGrammar
+from nltk.parse.chart import Chart, ChartParser
+from nltk.parse.featurechart import FeatureChart, FeatureChartParser
+from nltk.parse.pchart import InsideChartParser
+
+
+def load_parser(
+    grammar_url, trace=0, parser=None, chart_class=None, beam_size=0, **load_args
+):
+    """
+    Load a grammar from a file, and build a parser based on that grammar.
+    The parser depends on the grammar format, and might also depend
+    on properties of the grammar itself.
+
+    The following grammar formats are currently supported:
+      - ``'cfg'``  (CFGs: ``CFG``)
+      - ``'pcfg'`` (probabilistic CFGs: ``PCFG``)
+      - ``'fcfg'`` (feature-based CFGs: ``FeatureGrammar``)
+
+    :type grammar_url: str
+    :param grammar_url: A URL specifying where the grammar is located.
+        The default protocol is ``"nltk:"``, which searches for the file
+        in the the NLTK data package.
+    :type trace: int
+    :param trace: The level of tracing that should be used when
+        parsing a text.  ``0`` will generate no tracing output;
+        and higher numbers will produce more verbose tracing output.
+    :param parser: The class used for parsing; should be ``ChartParser``
+        or a subclass.
+        If None, the class depends on the grammar format.
+    :param chart_class: The class used for storing the chart;
+        should be ``Chart`` or a subclass.
+        Only used for CFGs and feature CFGs.
+        If None, the chart class depends on the grammar format.
+    :type beam_size: int
+    :param beam_size: The maximum length for the parser's edge queue.
+        Only used for probabilistic CFGs.
+    :param load_args: Keyword parameters used when loading the grammar.
+        See ``data.load`` for more information.
+    """
+    grammar = load(grammar_url, **load_args)
+    if not isinstance(grammar, CFG):
+        raise ValueError("The grammar must be a CFG, " "or a subclass thereof.")
+    if isinstance(grammar, PCFG):
+        if parser is None:
+            parser = InsideChartParser
+        return parser(grammar, trace=trace, beam_size=beam_size)
+
+    elif isinstance(grammar, FeatureGrammar):
+        if parser is None:
+            parser = FeatureChartParser
+        if chart_class is None:
+            chart_class = FeatureChart
+        return parser(grammar, trace=trace, chart_class=chart_class)
+
+    else:  # Plain CFG.
+        if parser is None:
+            parser = ChartParser
+        if chart_class is None:
+            chart_class = Chart
+        return parser(grammar, trace=trace, chart_class=chart_class)
+
+
+def taggedsent_to_conll(sentence):
+    """
+    A module to convert a single POS tagged sentence into CONLL format.
+
+    >>> from nltk import word_tokenize, pos_tag
+    >>> text = "This is a foobar sentence."
+    >>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))): # doctest: +NORMALIZE_WHITESPACE
+    ... 	print(line, end="")
+        1	This	_	DT	DT	_	0	a	_	_
+        2	is	_	VBZ	VBZ	_	0	a	_	_
+        3	a	_	DT	DT	_	0	a	_	_
+        4	foobar	_	JJ	JJ	_	0	a	_	_
+        5	sentence	_	NN	NN	_	0	a	_	_
+        6	.		_	.	.	_	0	a	_	_
+
+    :param sentence: A single input sentence to parse
+    :type sentence: list(tuple(str, str))
+    :rtype: iter(str)
+    :return: a generator yielding a single sentence in CONLL format.
+    """
+    for i, (word, tag) in enumerate(sentence, start=1):
+        input_str = [str(i), word, "_", tag, tag, "_", "0", "a", "_", "_"]
+        input_str = "\t".join(input_str) + "\n"
+        yield input_str
+
+
+def taggedsents_to_conll(sentences):
+    """
+    A module to convert the a POS tagged document stream
+    (i.e. list of list of tuples, a list of sentences) and yield lines
+    in CONLL format. This module yields one line per word and two newlines
+    for end of sentence.
+
+    >>> from nltk import word_tokenize, sent_tokenize, pos_tag
+    >>> text = "This is a foobar sentence. Is that right?"
+    >>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)]
+    >>> for line in taggedsents_to_conll(sentences): # doctest: +NORMALIZE_WHITESPACE
+    ...     if line:
+    ...         print(line, end="")
+    1	This	_	DT	DT	_	0	a	_	_
+    2	is	_	VBZ	VBZ	_	0	a	_	_
+    3	a	_	DT	DT	_	0	a	_	_
+    4	foobar	_	JJ	JJ	_	0	a	_	_
+    5	sentence	_	NN	NN	_	0	a	_	_
+    6	.		_	.	.	_	0	a	_	_
+    <BLANKLINE>
+    <BLANKLINE>
+    1	Is	_	VBZ	VBZ	_	0	a	_	_
+    2	that	_	IN	IN	_	0	a	_	_
+    3	right	_	NN	NN	_	0	a	_	_
+    4	?	_	.	.	_	0	a	_	_
+    <BLANKLINE>
+    <BLANKLINE>
+
+    :param sentences: Input sentences to parse
+    :type sentence: list(list(tuple(str, str)))
+    :rtype: iter(str)
+    :return: a generator yielding sentences in CONLL format.
+    """
+    for sentence in sentences:
+        yield from taggedsent_to_conll(sentence)
+        yield "\n\n"
+
+
+######################################################################
+# { Test Suites
+######################################################################
+
+
+class TestGrammar:
+    """
+    Unit tests for  CFG.
+    """
+
+    def __init__(self, grammar, suite, accept=None, reject=None):
+        self.test_grammar = grammar
+
+        self.cp = load_parser(grammar, trace=0)
+        self.suite = suite
+        self._accept = accept
+        self._reject = reject
+
+    def run(self, show_trees=False):
+        """
+        Sentences in the test suite are divided into two classes:
+
+        - grammatical (``accept``) and
+        - ungrammatical (``reject``).
+
+        If a sentence should parse according to the grammar, the value of
+        ``trees`` will be a non-empty list. If a sentence should be rejected
+        according to the grammar, then the value of ``trees`` will be None.
+        """
+        for test in self.suite:
+            print(test["doc"] + ":", end=" ")
+            for key in ["accept", "reject"]:
+                for sent in test[key]:
+                    tokens = sent.split()
+                    trees = list(self.cp.parse(tokens))
+                    if show_trees and trees:
+                        print()
+                        print(sent)
+                        for tree in trees:
+                            print(tree)
+                    if key == "accept":
+                        if trees == []:
+                            raise ValueError("Sentence '%s' failed to parse'" % sent)
+                        else:
+                            accepted = True
+                    else:
+                        if trees:
+                            raise ValueError("Sentence '%s' received a parse'" % sent)
+                        else:
+                            rejected = True
+            if accepted and rejected:
+                print("All tests passed!")
+
+
+def extract_test_sentences(string, comment_chars="#%;", encoding=None):
+    """
+    Parses a string with one test sentence per line.
+    Lines can optionally begin with:
+
+    - a bool, saying if the sentence is grammatical or not, or
+    - an int, giving the number of parse trees is should have,
+
+    The result information is followed by a colon, and then the sentence.
+    Empty lines and lines beginning with a comment char are ignored.
+
+    :return: a list of tuple of sentences and expected results,
+        where a sentence is a list of str,
+        and a result is None, or bool, or int
+
+    :param comment_chars: ``str`` of possible comment characters.
+    :param encoding: the encoding of the string, if it is binary
+    """
+    if encoding is not None:
+        string = string.decode(encoding)
+    sentences = []
+    for sentence in string.split("\n"):
+        if sentence == "" or sentence[0] in comment_chars:
+            continue
+        split_info = sentence.split(":", 1)
+        result = None
+        if len(split_info) == 2:
+            if split_info[0] in ["True", "true", "False", "false"]:
+                result = split_info[0] in ["True", "true"]
+                sentence = split_info[1]
+            else:
+                result = int(split_info[0])
+                sentence = split_info[1]
+        tokens = sentence.split()
+        if tokens == []:
+            continue
+        sentences += [(tokens, result)]
+    return sentences
--- a/backend/venv/Lib/site-packages/nltk/parse/viterbi.py
+++ b/backend/venv/Lib/site-packages/nltk/parse/viterbi.py
@@ -0,0 +1,453 @@
+# Natural Language Toolkit: Viterbi Probabilistic Parser
+#
+# Copyright (C) 2001-2025 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <https://www.nltk.org/>
+# For license information, see LICENSE.TXT
+
+from functools import reduce
+
+from nltk.parse.api import ParserI
+from nltk.tree import ProbabilisticTree, Tree
+
+##//////////////////////////////////////////////////////
+##  Viterbi PCFG Parser
+##//////////////////////////////////////////////////////
+
+
+class ViterbiParser(ParserI):
+    """
+    A bottom-up ``PCFG`` parser that uses dynamic programming to find
+    the single most likely parse for a text.  The ``ViterbiParser`` parser
+    parses texts by filling in a "most likely constituent table".
+    This table records the most probable tree representation for any
+    given span and node value.  In particular, it has an entry for
+    every start index, end index, and node value, recording the most
+    likely subtree that spans from the start index to the end index,
+    and has the given node value.
+
+    The ``ViterbiParser`` parser fills in this table incrementally.  It starts
+    by filling in all entries for constituents that span one element
+    of text (i.e., entries where the end index is one greater than the
+    start index).  After it has filled in all table entries for
+    constituents that span one element of text, it fills in the
+    entries for constitutants that span two elements of text.  It
+    continues filling in the entries for constituents spanning larger
+    and larger portions of the text, until the entire table has been
+    filled.  Finally, it returns the table entry for a constituent
+    spanning the entire text, whose node value is the grammar's start
+    symbol.
+
+    In order to find the most likely constituent with a given span and
+    node value, the ``ViterbiParser`` parser considers all productions that
+    could produce that node value.  For each production, it finds all
+    children that collectively cover the span and have the node values
+    specified by the production's right hand side.  If the probability
+    of the tree formed by applying the production to the children is
+    greater than the probability of the current entry in the table,
+    then the table is updated with this new tree.
+
+    A pseudo-code description of the algorithm used by
+    ``ViterbiParser`` is:
+
+    | Create an empty most likely constituent table, *MLC*.
+    | For width in 1...len(text):
+    |   For start in 1...len(text)-width:
+    |     For prod in grammar.productions:
+    |       For each sequence of subtrees [t[1], t[2], ..., t[n]] in MLC,
+    |         where t[i].label()==prod.rhs[i],
+    |         and the sequence covers [start:start+width]:
+    |           old_p = MLC[start, start+width, prod.lhs]
+    |           new_p = P(t[1])P(t[1])...P(t[n])P(prod)
+    |           if new_p > old_p:
+    |             new_tree = Tree(prod.lhs, t[1], t[2], ..., t[n])
+    |             MLC[start, start+width, prod.lhs] = new_tree
+    | Return MLC[0, len(text), start_symbol]
+
+    :type _grammar: PCFG
+    :ivar _grammar: The grammar used to parse sentences.
+    :type _trace: int
+    :ivar _trace: The level of tracing output that should be generated
+        when parsing a text.
+    """
+
+    def __init__(self, grammar, trace=0):
+        """
+        Create a new ``ViterbiParser`` parser, that uses ``grammar`` to
+        parse texts.
+
+        :type grammar: PCFG
+        :param grammar: The grammar used to parse texts.
+        :type trace: int
+        :param trace: The level of tracing that should be used when
+            parsing a text.  ``0`` will generate no tracing output;
+            and higher numbers will produce more verbose tracing
+            output.
+        """
+        self._grammar = grammar
+        self._trace = trace
+
+    def grammar(self):
+        return self._grammar
+
+    def trace(self, trace=2):
+        """
+        Set the level of tracing output that should be generated when
+        parsing a text.
+
+        :type trace: int
+        :param trace: The trace level.  A trace level of ``0`` will
+            generate no tracing output; and higher trace levels will
+            produce more verbose tracing output.
+        :rtype: None
+        """
+        self._trace = trace
+
+    def parse(self, tokens):
+        # Inherit docs from ParserI
+
+        tokens = list(tokens)
+        self._grammar.check_coverage(tokens)
+
+        # The most likely constituent table.  This table specifies the
+        # most likely constituent for a given span and type.
+        # Constituents can be either Trees or tokens.  For Trees,
+        # the "type" is the Nonterminal for the tree's root node
+        # value.  For Tokens, the "type" is the token's type.
+        # The table is stored as a dictionary, since it is sparse.
+        constituents = {}
+
+        # Initialize the constituents dictionary with the words from
+        # the text.
+        if self._trace:
+            print("Inserting tokens into the most likely" + " constituents table...")
+        for index in range(len(tokens)):
+            token = tokens[index]
+            constituents[index, index + 1, token] = token
+            if self._trace > 1:
+                self._trace_lexical_insertion(token, index, len(tokens))
+
+        # Consider each span of length 1, 2, ..., n; and add any trees
+        # that might cover that span to the constituents dictionary.
+        for length in range(1, len(tokens) + 1):
+            if self._trace:
+                print(
+                    "Finding the most likely constituents"
+                    + " spanning %d text elements..." % length
+                )
+            for start in range(len(tokens) - length + 1):
+                span = (start, start + length)
+                self._add_constituents_spanning(span, constituents, tokens)
+
+        # Return the tree that spans the entire text & have the right cat
+        tree = constituents.get((0, len(tokens), self._grammar.start()))
+        if tree is not None:
+            yield tree
+
+    def _add_constituents_spanning(self, span, constituents, tokens):
+        """
+        Find any constituents that might cover ``span``, and add them
+        to the most likely constituents table.
+
+        :rtype: None
+        :type span: tuple(int, int)
+        :param span: The section of the text for which we are
+            trying to find possible constituents.  The span is
+            specified as a pair of integers, where the first integer
+            is the index of the first token that should be included in
+            the constituent; and the second integer is the index of
+            the first token that should not be included in the
+            constituent.  I.e., the constituent should cover
+            ``text[span[0]:span[1]]``, where ``text`` is the text
+            that we are parsing.
+
+        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
+        :param constituents: The most likely constituents table.  This
+            table records the most probable tree representation for
+            any given span and node value.  In particular,
+            ``constituents(s,e,nv)`` is the most likely
+            ``ProbabilisticTree`` that covers ``text[s:e]``
+            and has a node value ``nv.symbol()``, where ``text``
+            is the text that we are parsing.  When
+            ``_add_constituents_spanning`` is called, ``constituents``
+            should contain all possible constituents that are shorter
+            than ``span``.
+
+        :type tokens: list of tokens
+        :param tokens: The text we are parsing.  This is only used for
+            trace output.
+        """
+        # Since some of the grammar productions may be unary, we need to
+        # repeatedly try all of the productions until none of them add any
+        # new constituents.
+        changed = True
+        while changed:
+            changed = False
+
+            # Find all ways instantiations of the grammar productions that
+            # cover the span.
+            instantiations = self._find_instantiations(span, constituents)
+
+            # For each production instantiation, add a new
+            # ProbabilisticTree whose probability is the product
+            # of the childrens' probabilities and the production's
+            # probability.
+            for production, children in instantiations:
+                subtrees = [c for c in children if isinstance(c, Tree)]
+                p = reduce(lambda pr, t: pr * t.prob(), subtrees, production.prob())
+                node = production.lhs().symbol()
+                tree = ProbabilisticTree(node, children, prob=p)
+
+                # If it's new a constituent, then add it to the
+                # constituents dictionary.
+                c = constituents.get((span[0], span[1], production.lhs()))
+                if self._trace > 1:
+                    if c is None or c != tree:
+                        if c is None or c.prob() < tree.prob():
+                            print("   Insert:", end=" ")
+                        else:
+                            print("  Discard:", end=" ")
+                        self._trace_production(production, p, span, len(tokens))
+                if c is None or c.prob() < tree.prob():
+                    constituents[span[0], span[1], production.lhs()] = tree
+                    changed = True
+
+    def _find_instantiations(self, span, constituents):
+        """
+        :return: a list of the production instantiations that cover a
+            given span of the text.  A "production instantiation" is
+            a tuple containing a production and a list of children,
+            where the production's right hand side matches the list of
+            children; and the children cover ``span``.  :rtype: list
+            of ``pair`` of ``Production``, (list of
+            (``ProbabilisticTree`` or token.
+
+        :type span: tuple(int, int)
+        :param span: The section of the text for which we are
+            trying to find production instantiations.  The span is
+            specified as a pair of integers, where the first integer
+            is the index of the first token that should be covered by
+            the production instantiation; and the second integer is
+            the index of the first token that should not be covered by
+            the production instantiation.
+        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
+        :param constituents: The most likely constituents table.  This
+            table records the most probable tree representation for
+            any given span and node value.  See the module
+            documentation for more information.
+        """
+        rv = []
+        for production in self._grammar.productions():
+            childlists = self._match_rhs(production.rhs(), span, constituents)
+
+            for childlist in childlists:
+                rv.append((production, childlist))
+        return rv
+
+    def _match_rhs(self, rhs, span, constituents):
+        """
+        :return: a set of all the lists of children that cover ``span``
+            and that match ``rhs``.
+        :rtype: list(list(ProbabilisticTree or token)
+
+        :type rhs: list(Nonterminal or any)
+        :param rhs: The list specifying what kinds of children need to
+            cover ``span``.  Each nonterminal in ``rhs`` specifies
+            that the corresponding child should be a tree whose node
+            value is that nonterminal's symbol.  Each terminal in ``rhs``
+            specifies that the corresponding child should be a token
+            whose type is that terminal.
+        :type span: tuple(int, int)
+        :param span: The section of the text for which we are
+            trying to find child lists.  The span is specified as a
+            pair of integers, where the first integer is the index of
+            the first token that should be covered by the child list;
+            and the second integer is the index of the first token
+            that should not be covered by the child list.
+        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
+        :param constituents: The most likely constituents table.  This
+            table records the most probable tree representation for
+            any given span and node value.  See the module
+            documentation for more information.
+        """
+        (start, end) = span
+
+        # Base case
+        if start >= end and rhs == ():
+            return [[]]
+        if start >= end or rhs == ():
+            return []
+
+        # Find everything that matches the 1st symbol of the RHS
+        childlists = []
+        for split in range(start, end + 1):
+            l = constituents.get((start, split, rhs[0]))
+            if l is not None:
+                rights = self._match_rhs(rhs[1:], (split, end), constituents)
+                childlists += [[l] + r for r in rights]
+
+        return childlists
+
+    def _trace_production(self, production, p, span, width):
+        """
+        Print trace output indicating that a given production has been
+        applied at a given location.
+
+        :param production: The production that has been applied
+        :type production: Production
+        :param p: The probability of the tree produced by the production.
+        :type p: float
+        :param span: The span of the production
+        :type span: tuple
+        :rtype: None
+        """
+
+        str = "|" + "." * span[0]
+        str += "=" * (span[1] - span[0])
+        str += "." * (width - span[1]) + "| "
+        str += "%s" % production
+        if self._trace > 2:
+            str = f"{str:<40} {p:12.10f} "
+
+        print(str)
+
+    def _trace_lexical_insertion(self, token, index, width):
+        str = "   Insert: |" + "." * index + "=" + "." * (width - index - 1) + "| "
+        str += f"{token}"
+        print(str)
+
+    def __repr__(self):
+        return "<ViterbiParser for %r>" % self._grammar
+
+
+##//////////////////////////////////////////////////////
+##  Test Code
+##//////////////////////////////////////////////////////
+
+
+def demo():
+    """
+    A demonstration of the probabilistic parsers.  The user is
+    prompted to select which demo to run, and how many parses should
+    be found; and then each parser is run on the same demo, and a
+    summary of the results are displayed.
+    """
+    import sys
+    import time
+
+    from nltk import tokenize
+    from nltk.grammar import PCFG
+    from nltk.parse import ViterbiParser
+
+    toy_pcfg1 = PCFG.fromstring(
+        """
+    S -> NP VP [1.0]
+    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
+    Det -> 'the' [0.8] | 'my' [0.2]
+    N -> 'man' [0.5] | 'telescope' [0.5]
+    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
+    V -> 'ate' [0.35] | 'saw' [0.65]
+    PP -> P NP [1.0]
+    P -> 'with' [0.61] | 'under' [0.39]
+    """
+    )
+
+    toy_pcfg2 = PCFG.fromstring(
+        """
+    S    -> NP VP         [1.0]
+    VP   -> V NP          [.59]
+    VP   -> V             [.40]
+    VP   -> VP PP         [.01]
+    NP   -> Det N         [.41]
+    NP   -> Name          [.28]
+    NP   -> NP PP         [.31]
+    PP   -> P NP          [1.0]
+    V    -> 'saw'         [.21]
+    V    -> 'ate'         [.51]
+    V    -> 'ran'         [.28]
+    N    -> 'boy'         [.11]
+    N    -> 'cookie'      [.12]
+    N    -> 'table'       [.13]
+    N    -> 'telescope'   [.14]
+    N    -> 'hill'        [.5]
+    Name -> 'Jack'        [.52]
+    Name -> 'Bob'         [.48]
+    P    -> 'with'        [.61]
+    P    -> 'under'       [.39]
+    Det  -> 'the'         [.41]
+    Det  -> 'a'           [.31]
+    Det  -> 'my'          [.28]
+    """
+    )
+
+    # Define two demos.  Each demo has a sentence and a grammar.
+    demos = [
+        ("I saw the man with my telescope", toy_pcfg1),
+        ("the boy saw Jack with Bob under the table with a telescope", toy_pcfg2),
+    ]
+
+    # Ask the user which demo they want to use.
+    print()
+    for i in range(len(demos)):
+        print(f"{i + 1:>3}: {demos[i][0]}")
+        print("     %r" % demos[i][1])
+        print()
+    print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
+    try:
+        snum = int(sys.stdin.readline().strip()) - 1
+        sent, grammar = demos[snum]
+    except:
+        print("Bad sentence number")
+        return
+
+    # Tokenize the sentence.
+    tokens = sent.split()
+
+    parser = ViterbiParser(grammar)
+    all_parses = {}
+
+    print(f"\nsent: {sent}\nparser: {parser}\ngrammar: {grammar}")
+    parser.trace(3)
+    t = time.time()
+    parses = parser.parse_all(tokens)
+    time = time.time() - t
+    average = (
+        reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) if parses else 0
+    )
+    num_parses = len(parses)
+    for p in parses:
+        all_parses[p.freeze()] = 1
+
+    # Print some summary statistics
+    print()
+    print("Time (secs)   # Parses   Average P(parse)")
+    print("-----------------------------------------")
+    print("%11.4f%11d%19.14f" % (time, num_parses, average))
+    parses = all_parses.keys()
+    if parses:
+        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
+    else:
+        p = 0
+    print("------------------------------------------")
+    print("%11s%11d%19.14f" % ("n/a", len(parses), p))
+
+    # Ask the user if we should draw the parses.
+    print()
+    print("Draw parses (y/n)? ", end=" ")
+    if sys.stdin.readline().strip().lower().startswith("y"):
+        from nltk.draw.tree import draw_trees
+
+        print("  please wait...")
+        draw_trees(*parses)
+
+    # Ask the user if we should print the parses.
+    print()
+    print("Print parses (y/n)? ", end=" ")
+    if sys.stdin.readline().strip().lower().startswith("y"):
+        for parse in parses:
+            print(parse)
+
+
+if __name__ == "__main__":
+    demo()