Initial commit
This commit is contained in:
75
backend/venv/Lib/site-packages/nltk/sem/__init__.py
Normal file
75
backend/venv/Lib/site-packages/nltk/sem/__init__.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# Natural Language Toolkit: Semantic Interpretation
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
NLTK Semantic Interpretation Package
|
||||
|
||||
This package contains classes for representing semantic structure in
|
||||
formulas of first-order logic and for evaluating such formulas in
|
||||
set-theoretic models.
|
||||
|
||||
>>> from nltk.sem import logic
|
||||
>>> logic._counter._value = 0
|
||||
|
||||
The package has two main components:
|
||||
|
||||
- ``logic`` provides support for analyzing expressions of First
|
||||
Order Logic (FOL).
|
||||
- ``evaluate`` allows users to recursively determine truth in a
|
||||
model for formulas of FOL.
|
||||
|
||||
A model consists of a domain of discourse and a valuation function,
|
||||
which assigns values to non-logical constants. We assume that entities
|
||||
in the domain are represented as strings such as ``'b1'``, ``'g1'``,
|
||||
etc. A ``Valuation`` is initialized with a list of (symbol, value)
|
||||
pairs, where values are entities, sets of entities or sets of tuples
|
||||
of entities.
|
||||
The domain of discourse can be inferred from the valuation, and model
|
||||
is then created with domain and valuation as parameters.
|
||||
|
||||
>>> from nltk.sem import Valuation, Model
|
||||
>>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
|
||||
... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
|
||||
... ('dog', set(['d1'])),
|
||||
... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
|
||||
>>> val = Valuation(v)
|
||||
>>> dom = val.domain
|
||||
>>> m = Model(dom, val)
|
||||
"""
|
||||
|
||||
from nltk.sem.boxer import Boxer
|
||||
from nltk.sem.drt import DRS, DrtExpression
|
||||
from nltk.sem.evaluate import (
|
||||
Assignment,
|
||||
Model,
|
||||
Undefined,
|
||||
Valuation,
|
||||
arity,
|
||||
is_rel,
|
||||
read_valuation,
|
||||
set2rel,
|
||||
)
|
||||
from nltk.sem.lfg import FStructure
|
||||
from nltk.sem.logic import (
|
||||
ApplicationExpression,
|
||||
Expression,
|
||||
LogicalExpressionException,
|
||||
Variable,
|
||||
binding_ops,
|
||||
boolean_ops,
|
||||
equality_preds,
|
||||
read_logic,
|
||||
)
|
||||
from nltk.sem.relextract import clause, extract_rels, rtuple
|
||||
from nltk.sem.skolemize import skolemize
|
||||
from nltk.sem.util import evaluate_sents, interpret_sents, parse_sents, root_semrep
|
||||
|
||||
# from nltk.sem.glue import Glue
|
||||
# from nltk.sem.hole import HoleSemantics
|
||||
# from nltk.sem.cooper_storage import CooperStore
|
||||
|
||||
# don't import chat80 as its names are too generic
|
||||
1609
backend/venv/Lib/site-packages/nltk/sem/boxer.py
Normal file
1609
backend/venv/Lib/site-packages/nltk/sem/boxer.py
Normal file
File diff suppressed because it is too large
Load Diff
857
backend/venv/Lib/site-packages/nltk/sem/chat80.py
Normal file
857
backend/venv/Lib/site-packages/nltk/sem/chat80.py
Normal file
@@ -0,0 +1,857 @@
|
||||
# Natural Language Toolkit: Chat-80 KB Reader
|
||||
# See https://www.w3.org/TR/swbp-skos-core-guide/
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>,
|
||||
# URL: <https://www.nltk.org>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
r"""
|
||||
Overview
|
||||
========
|
||||
|
||||
Chat-80 was a natural language system which allowed the user to
|
||||
interrogate a Prolog knowledge base in the domain of world
|
||||
geography. It was developed in the early '80s by Warren and Pereira; see
|
||||
``https://www.aclweb.org/anthology/J82-3002.pdf`` for a description and
|
||||
``http://www.cis.upenn.edu/~pereira/oldies.html`` for the source
|
||||
files.
|
||||
|
||||
This module contains functions to extract data from the Chat-80
|
||||
relation files ('the world database'), and convert then into a format
|
||||
that can be incorporated in the FOL models of
|
||||
``nltk.sem.evaluate``. The code assumes that the Prolog
|
||||
input files are available in the NLTK corpora directory.
|
||||
|
||||
The Chat-80 World Database consists of the following files::
|
||||
|
||||
world0.pl
|
||||
rivers.pl
|
||||
cities.pl
|
||||
countries.pl
|
||||
contain.pl
|
||||
borders.pl
|
||||
|
||||
This module uses a slightly modified version of ``world0.pl``, in which
|
||||
a set of Prolog rules have been omitted. The modified file is named
|
||||
``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since
|
||||
it uses a list rather than a string in the second field.
|
||||
|
||||
Reading Chat-80 Files
|
||||
=====================
|
||||
|
||||
Chat-80 relations are like tables in a relational database. The
|
||||
relation acts as the name of the table; the first argument acts as the
|
||||
'primary key'; and subsequent arguments are further fields in the
|
||||
table. In general, the name of the table provides a label for a unary
|
||||
predicate whose extension is all the primary keys. For example,
|
||||
relations in ``cities.pl`` are of the following form::
|
||||
|
||||
'city(athens,greece,1368).'
|
||||
|
||||
Here, ``'athens'`` is the key, and will be mapped to a member of the
|
||||
unary predicate *city*.
|
||||
|
||||
The fields in the table are mapped to binary predicates. The first
|
||||
argument of the predicate is the primary key, while the second
|
||||
argument is the data in the relevant field. Thus, in the above
|
||||
example, the third field is mapped to the binary predicate
|
||||
*population_of*, whose extension is a set of pairs such as
|
||||
``'(athens, 1368)'``.
|
||||
|
||||
An exception to this general framework is required by the relations in
|
||||
the files ``borders.pl`` and ``contains.pl``. These contain facts of the
|
||||
following form::
|
||||
|
||||
'borders(albania,greece).'
|
||||
|
||||
'contains0(africa,central_africa).'
|
||||
|
||||
We do not want to form a unary concept out the element in
|
||||
the first field of these records, and we want the label of the binary
|
||||
relation just to be ``'border'``/``'contain'`` respectively.
|
||||
|
||||
In order to drive the extraction process, we use 'relation metadata bundles'
|
||||
which are Python dictionaries such as the following::
|
||||
|
||||
city = {'label': 'city',
|
||||
'closures': [],
|
||||
'schema': ['city', 'country', 'population'],
|
||||
'filename': 'cities.pl'}
|
||||
|
||||
According to this, the file ``city['filename']`` contains a list of
|
||||
relational tuples (or more accurately, the corresponding strings in
|
||||
Prolog form) whose predicate symbol is ``city['label']`` and whose
|
||||
relational schema is ``city['schema']``. The notion of a ``closure`` is
|
||||
discussed in the next section.
|
||||
|
||||
Concepts
|
||||
========
|
||||
In order to encapsulate the results of the extraction, a class of
|
||||
``Concept`` objects is introduced. A ``Concept`` object has a number of
|
||||
attributes, in particular a ``prefLabel`` and ``extension``, which make
|
||||
it easier to inspect the output of the extraction. In addition, the
|
||||
``extension`` can be further processed: in the case of the ``'border'``
|
||||
relation, we check that the relation is symmetric, and in the case
|
||||
of the ``'contain'`` relation, we carry out the transitive
|
||||
closure. The closure properties associated with a concept is
|
||||
indicated in the relation metadata, as indicated earlier.
|
||||
|
||||
The ``extension`` of a ``Concept`` object is then incorporated into a
|
||||
``Valuation`` object.
|
||||
|
||||
Persistence
|
||||
===========
|
||||
The functions ``val_dump`` and ``val_load`` are provided to allow a
|
||||
valuation to be stored in a persistent database and re-loaded, rather
|
||||
than having to be re-computed each time.
|
||||
|
||||
Individuals and Lexical Items
|
||||
=============================
|
||||
As well as deriving relations from the Chat-80 data, we also create a
|
||||
set of individual constants, one for each entity in the domain. The
|
||||
individual constants are string-identical to the entities. For
|
||||
example, given a data item such as ``'zloty'``, we add to the valuation
|
||||
a pair ``('zloty', 'zloty')``. In order to parse English sentences that
|
||||
refer to these entities, we also create a lexical item such as the
|
||||
following for each individual constant::
|
||||
|
||||
PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
|
||||
|
||||
The set of rules is written to the file ``chat_pnames.cfg`` in the
|
||||
current directory.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shelve
|
||||
import sys
|
||||
|
||||
import nltk.data
|
||||
|
||||
###########################################################################
|
||||
# Chat-80 relation metadata bundles needed to build the valuation
|
||||
###########################################################################
|
||||
|
||||
borders = {
|
||||
"rel_name": "borders",
|
||||
"closures": ["symmetric"],
|
||||
"schema": ["region", "border"],
|
||||
"filename": "borders.pl",
|
||||
}
|
||||
|
||||
contains = {
|
||||
"rel_name": "contains0",
|
||||
"closures": ["transitive"],
|
||||
"schema": ["region", "contain"],
|
||||
"filename": "contain.pl",
|
||||
}
|
||||
|
||||
city = {
|
||||
"rel_name": "city",
|
||||
"closures": [],
|
||||
"schema": ["city", "country", "population"],
|
||||
"filename": "cities.pl",
|
||||
}
|
||||
|
||||
country = {
|
||||
"rel_name": "country",
|
||||
"closures": [],
|
||||
"schema": [
|
||||
"country",
|
||||
"region",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"area",
|
||||
"population",
|
||||
"capital",
|
||||
"currency",
|
||||
],
|
||||
"filename": "countries.pl",
|
||||
}
|
||||
|
||||
circle_of_lat = {
|
||||
"rel_name": "circle_of_latitude",
|
||||
"closures": [],
|
||||
"schema": ["circle_of_latitude", "degrees"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
circle_of_long = {
|
||||
"rel_name": "circle_of_longitude",
|
||||
"closures": [],
|
||||
"schema": ["circle_of_longitude", "degrees"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
continent = {
|
||||
"rel_name": "continent",
|
||||
"closures": [],
|
||||
"schema": ["continent"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
region = {
|
||||
"rel_name": "in_continent",
|
||||
"closures": [],
|
||||
"schema": ["region", "continent"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
ocean = {
|
||||
"rel_name": "ocean",
|
||||
"closures": [],
|
||||
"schema": ["ocean"],
|
||||
"filename": "world1.pl",
|
||||
}
|
||||
|
||||
sea = {"rel_name": "sea", "closures": [], "schema": ["sea"], "filename": "world1.pl"}
|
||||
|
||||
|
||||
items = [
|
||||
"borders",
|
||||
"contains",
|
||||
"city",
|
||||
"country",
|
||||
"circle_of_lat",
|
||||
"circle_of_long",
|
||||
"continent",
|
||||
"region",
|
||||
"ocean",
|
||||
"sea",
|
||||
]
|
||||
items = tuple(sorted(items))
|
||||
|
||||
item_metadata = {
|
||||
"borders": borders,
|
||||
"contains": contains,
|
||||
"city": city,
|
||||
"country": country,
|
||||
"circle_of_lat": circle_of_lat,
|
||||
"circle_of_long": circle_of_long,
|
||||
"continent": continent,
|
||||
"region": region,
|
||||
"ocean": ocean,
|
||||
"sea": sea,
|
||||
}
|
||||
|
||||
rels = item_metadata.values()
|
||||
|
||||
not_unary = ["borders.pl", "contain.pl"]
|
||||
|
||||
###########################################################################
|
||||
|
||||
|
||||
class Concept:
|
||||
"""
|
||||
A Concept class, loosely based on SKOS
|
||||
(https://www.w3.org/TR/swbp-skos-core-guide/).
|
||||
"""
|
||||
|
||||
def __init__(self, prefLabel, arity, altLabels=[], closures=[], extension=set()):
|
||||
"""
|
||||
:param prefLabel: the preferred label for the concept
|
||||
:type prefLabel: str
|
||||
:param arity: the arity of the concept
|
||||
:type arity: int
|
||||
:param altLabels: other (related) labels
|
||||
:type altLabels: list
|
||||
:param closures: closure properties of the extension
|
||||
(list items can be ``symmetric``, ``reflexive``, ``transitive``)
|
||||
:type closures: list
|
||||
:param extension: the extensional value of the concept
|
||||
:type extension: set
|
||||
"""
|
||||
self.prefLabel = prefLabel
|
||||
self.arity = arity
|
||||
self.altLabels = altLabels
|
||||
self.closures = closures
|
||||
# keep _extension internally as a set
|
||||
self._extension = extension
|
||||
# public access is via a list (for slicing)
|
||||
self.extension = sorted(list(extension))
|
||||
|
||||
def __str__(self):
|
||||
# _extension = ''
|
||||
# for element in sorted(self.extension):
|
||||
# if isinstance(element, tuple):
|
||||
# element = '(%s, %s)' % (element)
|
||||
# _extension += element + ', '
|
||||
# _extension = _extension[:-1]
|
||||
|
||||
return "Label = '{}'\nArity = {}\nExtension = {}".format(
|
||||
self.prefLabel,
|
||||
self.arity,
|
||||
self.extension,
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return "Concept('%s')" % self.prefLabel
|
||||
|
||||
def augment(self, data):
|
||||
"""
|
||||
Add more data to the ``Concept``'s extension set.
|
||||
|
||||
:param data: a new semantic value
|
||||
:type data: string or pair of strings
|
||||
:rtype: set
|
||||
|
||||
"""
|
||||
self._extension.add(data)
|
||||
self.extension = sorted(list(self._extension))
|
||||
return self._extension
|
||||
|
||||
def _make_graph(self, s):
|
||||
"""
|
||||
Convert a set of pairs into an adjacency linked list encoding of a graph.
|
||||
"""
|
||||
g = {}
|
||||
for x, y in s:
|
||||
if x in g:
|
||||
g[x].append(y)
|
||||
else:
|
||||
g[x] = [y]
|
||||
return g
|
||||
|
||||
def _transclose(self, g):
|
||||
"""
|
||||
Compute the transitive closure of a graph represented as a linked list.
|
||||
"""
|
||||
for x in g:
|
||||
for adjacent in g[x]:
|
||||
# check that adjacent is a key
|
||||
if adjacent in g:
|
||||
for y in g[adjacent]:
|
||||
if y not in g[x]:
|
||||
g[x].append(y)
|
||||
return g
|
||||
|
||||
def _make_pairs(self, g):
|
||||
"""
|
||||
Convert an adjacency linked list back into a set of pairs.
|
||||
"""
|
||||
pairs = []
|
||||
for node in g:
|
||||
for adjacent in g[node]:
|
||||
pairs.append((node, adjacent))
|
||||
return set(pairs)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close a binary relation in the ``Concept``'s extension set.
|
||||
|
||||
:return: a new extension for the ``Concept`` in which the
|
||||
relation is closed under a given property
|
||||
"""
|
||||
from nltk.sem import is_rel
|
||||
|
||||
assert is_rel(self._extension)
|
||||
if "symmetric" in self.closures:
|
||||
pairs = []
|
||||
for x, y in self._extension:
|
||||
pairs.append((y, x))
|
||||
sym = set(pairs)
|
||||
self._extension = self._extension.union(sym)
|
||||
if "transitive" in self.closures:
|
||||
all = self._make_graph(self._extension)
|
||||
closed = self._transclose(all)
|
||||
trans = self._make_pairs(closed)
|
||||
self._extension = self._extension.union(trans)
|
||||
self.extension = sorted(list(self._extension))
|
||||
|
||||
|
||||
def clause2concepts(filename, rel_name, schema, closures=[]):
|
||||
"""
|
||||
Convert a file of Prolog clauses into a list of ``Concept`` objects.
|
||||
|
||||
:param filename: filename containing the relations
|
||||
:type filename: str
|
||||
:param rel_name: name of the relation
|
||||
:type rel_name: str
|
||||
:param schema: the schema used in a set of relational tuples
|
||||
:type schema: list
|
||||
:param closures: closure properties for the extension of the concept
|
||||
:type closures: list
|
||||
:return: a list of ``Concept`` objects
|
||||
:rtype: list
|
||||
"""
|
||||
concepts = []
|
||||
# position of the subject of a binary relation
|
||||
subj = 0
|
||||
# label of the 'primary key'
|
||||
pkey = schema[0]
|
||||
# fields other than the primary key
|
||||
fields = schema[1:]
|
||||
|
||||
# convert a file into a list of lists
|
||||
records = _str2records(filename, rel_name)
|
||||
|
||||
# add a unary concept corresponding to the set of entities
|
||||
# in the primary key position
|
||||
# relations in 'not_unary' are more like ordinary binary relations
|
||||
if not filename in not_unary:
|
||||
concepts.append(unary_concept(pkey, subj, records))
|
||||
|
||||
# add a binary concept for each non-key field
|
||||
for field in fields:
|
||||
obj = schema.index(field)
|
||||
concepts.append(binary_concept(field, closures, subj, obj, records))
|
||||
|
||||
return concepts
|
||||
|
||||
|
||||
def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
|
||||
"""
|
||||
Convert a file of Prolog clauses into a database table.
|
||||
|
||||
This is not generic, since it doesn't allow arbitrary
|
||||
schemas to be set as a parameter.
|
||||
|
||||
Intended usage::
|
||||
|
||||
cities2table('cities.pl', 'city', 'city.db', verbose=True, setup=True)
|
||||
|
||||
:param filename: filename containing the relations
|
||||
:type filename: str
|
||||
:param rel_name: name of the relation
|
||||
:type rel_name: str
|
||||
:param dbname: filename of persistent store
|
||||
:type schema: str
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
records = _str2records(filename, rel_name)
|
||||
connection = sqlite3.connect(dbname)
|
||||
cur = connection.cursor()
|
||||
if setup:
|
||||
cur.execute(
|
||||
"""CREATE TABLE city_table
|
||||
(City text, Country text, Population int)"""
|
||||
)
|
||||
|
||||
table_name = "city_table"
|
||||
for t in records:
|
||||
cur.execute("insert into %s values (?,?,?)" % table_name, t)
|
||||
if verbose:
|
||||
print("inserting values into %s: " % table_name, t)
|
||||
connection.commit()
|
||||
if verbose:
|
||||
print("Committing update to %s" % dbname)
|
||||
cur.close()
|
||||
|
||||
|
||||
def sql_query(dbname, query):
|
||||
"""
|
||||
Execute an SQL query over a database.
|
||||
:param dbname: filename of persistent store
|
||||
:type schema: str
|
||||
:param query: SQL query
|
||||
:type rel_name: str
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
try:
|
||||
path = nltk.data.find(dbname)
|
||||
connection = sqlite3.connect(str(path))
|
||||
cur = connection.cursor()
|
||||
return cur.execute(query)
|
||||
except (ValueError, sqlite3.OperationalError):
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Make sure the database file %s is installed and uncompressed." % dbname
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def _str2records(filename, rel):
|
||||
"""
|
||||
Read a file into memory and convert each relation clause into a list.
|
||||
"""
|
||||
recs = []
|
||||
contents = nltk.data.load("corpora/chat80/%s" % filename, format="text")
|
||||
for line in contents.splitlines():
|
||||
if line.startswith(rel):
|
||||
line = re.sub(rel + r"\(", "", line)
|
||||
line = re.sub(r"\)\.$", "", line)
|
||||
record = line.split(",")
|
||||
recs.append(record)
|
||||
return recs
|
||||
|
||||
|
||||
def unary_concept(label, subj, records):
|
||||
"""
|
||||
Make a unary concept out of the primary key in a record.
|
||||
|
||||
A record is a list of entities in some relation, such as
|
||||
``['france', 'paris']``, where ``'france'`` is acting as the primary
|
||||
key.
|
||||
|
||||
:param label: the preferred label for the concept
|
||||
:type label: string
|
||||
:param subj: position in the record of the subject of the predicate
|
||||
:type subj: int
|
||||
:param records: a list of records
|
||||
:type records: list of lists
|
||||
:return: ``Concept`` of arity 1
|
||||
:rtype: Concept
|
||||
"""
|
||||
c = Concept(label, arity=1, extension=set())
|
||||
for record in records:
|
||||
c.augment(record[subj])
|
||||
return c
|
||||
|
||||
|
||||
def binary_concept(label, closures, subj, obj, records):
|
||||
"""
|
||||
Make a binary concept out of the primary key and another field in a record.
|
||||
|
||||
A record is a list of entities in some relation, such as
|
||||
``['france', 'paris']``, where ``'france'`` is acting as the primary
|
||||
key, and ``'paris'`` stands in the ``'capital_of'`` relation to
|
||||
``'france'``.
|
||||
|
||||
More generally, given a record such as ``['a', 'b', 'c']``, where
|
||||
label is bound to ``'B'``, and ``obj`` bound to 1, the derived
|
||||
binary concept will have label ``'B_of'``, and its extension will
|
||||
be a set of pairs such as ``('a', 'b')``.
|
||||
|
||||
|
||||
:param label: the base part of the preferred label for the concept
|
||||
:type label: str
|
||||
:param closures: closure properties for the extension of the concept
|
||||
:type closures: list
|
||||
:param subj: position in the record of the subject of the predicate
|
||||
:type subj: int
|
||||
:param obj: position in the record of the object of the predicate
|
||||
:type obj: int
|
||||
:param records: a list of records
|
||||
:type records: list of lists
|
||||
:return: ``Concept`` of arity 2
|
||||
:rtype: Concept
|
||||
"""
|
||||
if not label == "border" and not label == "contain":
|
||||
label = label + "_of"
|
||||
c = Concept(label, arity=2, closures=closures, extension=set())
|
||||
for record in records:
|
||||
c.augment((record[subj], record[obj]))
|
||||
# close the concept's extension according to the properties in closures
|
||||
c.close()
|
||||
return c
|
||||
|
||||
|
||||
def process_bundle(rels):
|
||||
"""
|
||||
Given a list of relation metadata bundles, make a corresponding
|
||||
dictionary of concepts, indexed by the relation name.
|
||||
|
||||
:param rels: bundle of metadata needed for constructing a concept
|
||||
:type rels: list(dict)
|
||||
:return: a dictionary of concepts, indexed by the relation name.
|
||||
:rtype: dict(str): Concept
|
||||
"""
|
||||
concepts = {}
|
||||
for rel in rels:
|
||||
rel_name = rel["rel_name"]
|
||||
closures = rel["closures"]
|
||||
schema = rel["schema"]
|
||||
filename = rel["filename"]
|
||||
|
||||
concept_list = clause2concepts(filename, rel_name, schema, closures)
|
||||
for c in concept_list:
|
||||
label = c.prefLabel
|
||||
if label in concepts:
|
||||
for data in c.extension:
|
||||
concepts[label].augment(data)
|
||||
concepts[label].close()
|
||||
else:
|
||||
concepts[label] = c
|
||||
return concepts
|
||||
|
||||
|
||||
def make_valuation(concepts, read=False, lexicon=False):
|
||||
"""
|
||||
Convert a list of ``Concept`` objects into a list of (label, extension) pairs;
|
||||
optionally create a ``Valuation`` object.
|
||||
|
||||
:param concepts: concepts
|
||||
:type concepts: list(Concept)
|
||||
:param read: if ``True``, ``(symbol, set)`` pairs are read into a ``Valuation``
|
||||
:type read: bool
|
||||
:rtype: list or Valuation
|
||||
"""
|
||||
vals = []
|
||||
|
||||
for c in concepts:
|
||||
vals.append((c.prefLabel, c.extension))
|
||||
if lexicon:
|
||||
read = True
|
||||
if read:
|
||||
from nltk.sem import Valuation
|
||||
|
||||
val = Valuation({})
|
||||
val.update(vals)
|
||||
# add labels for individuals
|
||||
val = label_indivs(val, lexicon=lexicon)
|
||||
return val
|
||||
else:
|
||||
return vals
|
||||
|
||||
|
||||
def val_dump(rels, db):
|
||||
"""
|
||||
Make a ``Valuation`` from a list of relation metadata bundles and dump to
|
||||
persistent database.
|
||||
|
||||
:param rels: bundle of metadata needed for constructing a concept
|
||||
:type rels: list of dict
|
||||
:param db: name of file to which data is written.
|
||||
The suffix '.db' will be automatically appended.
|
||||
:type db: str
|
||||
"""
|
||||
concepts = process_bundle(rels).values()
|
||||
valuation = make_valuation(concepts, read=True)
|
||||
db_out = shelve.open(db, "n")
|
||||
|
||||
db_out.update(valuation)
|
||||
|
||||
db_out.close()
|
||||
|
||||
|
||||
def val_load(db):
|
||||
"""
|
||||
Load a ``Valuation`` from a persistent database.
|
||||
|
||||
:param db: name of file from which data is read.
|
||||
The suffix '.db' should be omitted from the name.
|
||||
:type db: str
|
||||
"""
|
||||
dbname = db + ".db"
|
||||
|
||||
if not os.access(dbname, os.R_OK):
|
||||
sys.exit("Cannot read file: %s" % dbname)
|
||||
else:
|
||||
db_in = shelve.open(db)
|
||||
from nltk.sem import Valuation
|
||||
|
||||
val = Valuation(db_in)
|
||||
# val.read(db_in.items())
|
||||
return val
|
||||
|
||||
|
||||
# def alpha(str):
|
||||
# """
|
||||
# Utility to filter out non-alphabetic constants.
|
||||
|
||||
#:param str: candidate constant
|
||||
#:type str: string
|
||||
#:rtype: bool
|
||||
# """
|
||||
# try:
|
||||
# int(str)
|
||||
# return False
|
||||
# except ValueError:
|
||||
## some unknown values in records are labeled '?'
|
||||
# if not str == '?':
|
||||
# return True
|
||||
|
||||
|
||||
def label_indivs(valuation, lexicon=False):
|
||||
"""
|
||||
Assign individual constants to the individuals in the domain of a ``Valuation``.
|
||||
|
||||
Given a valuation with an entry of the form ``{'rel': {'a': True}}``,
|
||||
add a new entry ``{'a': 'a'}``.
|
||||
|
||||
:type valuation: Valuation
|
||||
:rtype: Valuation
|
||||
"""
|
||||
# collect all the individuals into a domain
|
||||
domain = valuation.domain
|
||||
# convert the domain into a sorted list of alphabetic terms
|
||||
# use the same string as a label
|
||||
pairs = [(e, e) for e in domain]
|
||||
if lexicon:
|
||||
lex = make_lex(domain)
|
||||
with open("chat_pnames.cfg", "w") as outfile:
|
||||
outfile.writelines(lex)
|
||||
# read the pairs into the valuation
|
||||
valuation.update(pairs)
|
||||
return valuation
|
||||
|
||||
|
||||
def make_lex(symbols):
|
||||
"""
|
||||
Create lexical CFG rules for each individual symbol.
|
||||
|
||||
Given a valuation with an entry of the form ``{'zloty': 'zloty'}``,
|
||||
create a lexical rule for the proper name 'Zloty'.
|
||||
|
||||
:param symbols: a list of individual constants in the semantic representation
|
||||
:type symbols: sequence -- set(str)
|
||||
:rtype: list(str)
|
||||
"""
|
||||
lex = []
|
||||
header = """
|
||||
##################################################################
|
||||
# Lexical rules automatically generated by running 'chat80.py -x'.
|
||||
##################################################################
|
||||
|
||||
"""
|
||||
lex.append(header)
|
||||
template = r"PropN[num=sg, sem=<\P.(P %s)>] -> '%s'\n"
|
||||
|
||||
for s in symbols:
|
||||
parts = s.split("_")
|
||||
caps = [p.capitalize() for p in parts]
|
||||
pname = "_".join(caps)
|
||||
rule = template % (s, pname)
|
||||
lex.append(rule)
|
||||
return lex
|
||||
|
||||
|
||||
###########################################################################
|
||||
# Interface function to emulate other corpus readers
|
||||
###########################################################################
|
||||
|
||||
|
||||
def concepts(items=items):
|
||||
"""
|
||||
Build a list of concepts corresponding to the relation names in ``items``.
|
||||
|
||||
:param items: names of the Chat-80 relations to extract
|
||||
:type items: list(str)
|
||||
:return: the ``Concept`` objects which are extracted from the relations
|
||||
:rtype: list(Concept)
|
||||
"""
|
||||
if isinstance(items, str):
|
||||
items = (items,)
|
||||
|
||||
rels = [item_metadata[r] for r in items]
|
||||
|
||||
concept_map = process_bundle(rels)
|
||||
return concept_map.values()
|
||||
|
||||
|
||||
###########################################################################
|
||||
|
||||
|
||||
def main():
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
description = """
|
||||
Extract data from the Chat-80 Prolog files and convert them into a
|
||||
Valuation object for use in the NLTK semantics package.
|
||||
"""
|
||||
|
||||
opts = OptionParser(description=description)
|
||||
opts.set_defaults(verbose=True, lex=False, vocab=False)
|
||||
opts.add_option(
|
||||
"-s", "--store", dest="outdb", help="store a valuation in DB", metavar="DB"
|
||||
)
|
||||
opts.add_option(
|
||||
"-l",
|
||||
"--load",
|
||||
dest="indb",
|
||||
help="load a stored valuation from DB",
|
||||
metavar="DB",
|
||||
)
|
||||
opts.add_option(
|
||||
"-c",
|
||||
"--concepts",
|
||||
action="store_true",
|
||||
help="print concepts instead of a valuation",
|
||||
)
|
||||
opts.add_option(
|
||||
"-r",
|
||||
"--relation",
|
||||
dest="label",
|
||||
help="print concept with label REL (check possible labels with '-v' option)",
|
||||
metavar="REL",
|
||||
)
|
||||
opts.add_option(
|
||||
"-q",
|
||||
"--quiet",
|
||||
action="store_false",
|
||||
dest="verbose",
|
||||
help="don't print out progress info",
|
||||
)
|
||||
opts.add_option(
|
||||
"-x",
|
||||
"--lex",
|
||||
action="store_true",
|
||||
dest="lex",
|
||||
help="write a file of lexical entries for country names, then exit",
|
||||
)
|
||||
opts.add_option(
|
||||
"-v",
|
||||
"--vocab",
|
||||
action="store_true",
|
||||
dest="vocab",
|
||||
help="print out the vocabulary of concept labels and their arity, then exit",
|
||||
)
|
||||
|
||||
(options, args) = opts.parse_args()
|
||||
if options.outdb and options.indb:
|
||||
opts.error("Options --store and --load are mutually exclusive")
|
||||
|
||||
if options.outdb:
|
||||
# write the valuation to a persistent database
|
||||
if options.verbose:
|
||||
outdb = options.outdb + ".db"
|
||||
print("Dumping a valuation to %s" % outdb)
|
||||
val_dump(rels, options.outdb)
|
||||
sys.exit(0)
|
||||
else:
|
||||
# try to read in a valuation from a database
|
||||
if options.indb is not None:
|
||||
dbname = options.indb + ".db"
|
||||
if not os.access(dbname, os.R_OK):
|
||||
sys.exit("Cannot read file: %s" % dbname)
|
||||
else:
|
||||
valuation = val_load(options.indb)
|
||||
# we need to create the valuation from scratch
|
||||
else:
|
||||
# build some concepts
|
||||
concept_map = process_bundle(rels)
|
||||
concepts = concept_map.values()
|
||||
# just print out the vocabulary
|
||||
if options.vocab:
|
||||
items = sorted((c.arity, c.prefLabel) for c in concepts)
|
||||
for arity, label in items:
|
||||
print(label, arity)
|
||||
sys.exit(0)
|
||||
# show all the concepts
|
||||
if options.concepts:
|
||||
for c in concepts:
|
||||
print(c)
|
||||
print()
|
||||
if options.label:
|
||||
print(concept_map[options.label])
|
||||
sys.exit(0)
|
||||
else:
|
||||
# turn the concepts into a Valuation
|
||||
if options.lex:
|
||||
if options.verbose:
|
||||
print("Writing out lexical rules")
|
||||
make_valuation(concepts, lexicon=True)
|
||||
else:
|
||||
valuation = make_valuation(concepts, read=True)
|
||||
print(valuation)
|
||||
|
||||
|
||||
def sql_demo():
|
||||
"""
|
||||
Print out every row from the 'city.db' database.
|
||||
"""
|
||||
print()
|
||||
print("Using SQL to extract rows from 'city.db' RDB.")
|
||||
for row in sql_query("corpora/city_database/city.db", "SELECT * FROM city_table"):
|
||||
print(row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
sql_demo()
|
||||
124
backend/venv/Lib/site-packages/nltk/sem/cooper_storage.py
Normal file
124
backend/venv/Lib/site-packages/nltk/sem/cooper_storage.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# Natural Language Toolkit: Cooper storage for Quantifier Ambiguity
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from nltk.parse import load_parser
|
||||
from nltk.parse.featurechart import InstantiateVarsChart
|
||||
from nltk.sem.logic import ApplicationExpression, LambdaExpression, Variable
|
||||
|
||||
|
||||
class CooperStore:
|
||||
"""
|
||||
A container for handling quantifier ambiguity via Cooper storage.
|
||||
"""
|
||||
|
||||
def __init__(self, featstruct):
|
||||
"""
|
||||
:param featstruct: The value of the ``sem`` node in a tree from
|
||||
``parse_with_bindops()``
|
||||
:type featstruct: FeatStruct (with features ``core`` and ``store``)
|
||||
|
||||
"""
|
||||
self.featstruct = featstruct
|
||||
self.readings = []
|
||||
try:
|
||||
self.core = featstruct["CORE"]
|
||||
self.store = featstruct["STORE"]
|
||||
except KeyError:
|
||||
print("%s is not a Cooper storage structure" % featstruct)
|
||||
|
||||
def _permute(self, lst):
|
||||
"""
|
||||
:return: An iterator over the permutations of the input list
|
||||
:type lst: list
|
||||
:rtype: iter
|
||||
"""
|
||||
remove = lambda lst0, index: lst0[:index] + lst0[index + 1 :]
|
||||
if lst:
|
||||
for index, x in enumerate(lst):
|
||||
for y in self._permute(remove(lst, index)):
|
||||
yield (x,) + y
|
||||
else:
|
||||
yield ()
|
||||
|
||||
def s_retrieve(self, trace=False):
|
||||
r"""
|
||||
Carry out S-Retrieval of binding operators in store. If hack=True,
|
||||
serialize the bindop and core as strings and reparse. Ugh.
|
||||
|
||||
Each permutation of the store (i.e. list of binding operators) is
|
||||
taken to be a possible scoping of quantifiers. We iterate through the
|
||||
binding operators in each permutation, and successively apply them to
|
||||
the current term, starting with the core semantic representation,
|
||||
working from the inside out.
|
||||
|
||||
Binding operators are of the form::
|
||||
|
||||
bo(\P.all x.(man(x) -> P(x)),z1)
|
||||
"""
|
||||
for perm, store_perm in enumerate(self._permute(self.store)):
|
||||
if trace:
|
||||
print("Permutation %s" % (perm + 1))
|
||||
term = self.core
|
||||
for bindop in store_perm:
|
||||
# we just want the arguments that are wrapped by the 'bo' predicate
|
||||
quant, varex = tuple(bindop.args)
|
||||
# use var to make an abstraction over the current term and then
|
||||
# apply the quantifier to it
|
||||
term = ApplicationExpression(
|
||||
quant, LambdaExpression(varex.variable, term)
|
||||
)
|
||||
if trace:
|
||||
print(" ", term)
|
||||
term = term.simplify()
|
||||
self.readings.append(term)
|
||||
|
||||
|
||||
def parse_with_bindops(sentence, grammar=None, trace=0):
|
||||
"""
|
||||
Use a grammar with Binding Operators to parse a sentence.
|
||||
"""
|
||||
if not grammar:
|
||||
grammar = "grammars/book_grammars/storage.fcfg"
|
||||
parser = load_parser(grammar, trace=trace, chart_class=InstantiateVarsChart)
|
||||
# Parse the sentence.
|
||||
tokens = sentence.split()
|
||||
return list(parser.parse(tokens))
|
||||
|
||||
|
||||
def demo():
|
||||
from nltk.sem import cooper_storage as cs
|
||||
|
||||
sentence = "every girl chases a dog"
|
||||
# sentence = "a man gives a bone to every dog"
|
||||
print()
|
||||
print("Analysis of sentence '%s'" % sentence)
|
||||
print("=" * 50)
|
||||
trees = cs.parse_with_bindops(sentence, trace=0)
|
||||
for tree in trees:
|
||||
semrep = cs.CooperStore(tree.label()["SEM"])
|
||||
print()
|
||||
print("Binding operators:")
|
||||
print("-" * 15)
|
||||
for s in semrep.store:
|
||||
print(s)
|
||||
print()
|
||||
print("Core:")
|
||||
print("-" * 15)
|
||||
print(semrep.core)
|
||||
print()
|
||||
print("S-Retrieval:")
|
||||
print("-" * 15)
|
||||
semrep.s_retrieve(trace=True)
|
||||
print("Readings:")
|
||||
print("-" * 15)
|
||||
|
||||
for i, reading in enumerate(semrep.readings):
|
||||
print(f"{i + 1}: {reading}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
1456
backend/venv/Lib/site-packages/nltk/sem/drt.py
Normal file
1456
backend/venv/Lib/site-packages/nltk/sem/drt.py
Normal file
File diff suppressed because it is too large
Load Diff
553
backend/venv/Lib/site-packages/nltk/sem/drt_glue_demo.py
Normal file
553
backend/venv/Lib/site-packages/nltk/sem/drt_glue_demo.py
Normal file
@@ -0,0 +1,553 @@
|
||||
# Natural Language Toolkit: GUI Demo for Glue Semantics with Discourse
|
||||
# Representation Theory (DRT) as meaning language
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
try:
|
||||
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.draw.util import CanvasFrame, ShowText
|
||||
|
||||
except ImportError:
|
||||
"""Ignore ImportError because tkinter might not be available."""
|
||||
|
||||
from nltk.parse import MaltParser
|
||||
from nltk.sem.drt import DrsDrawer, DrtVariableExpression
|
||||
from nltk.sem.glue import DrtGlue
|
||||
from nltk.sem.logic import Variable
|
||||
from nltk.tag import RegexpTagger
|
||||
from nltk.util import in_idle
|
||||
|
||||
|
||||
class DrtGlueDemo:
|
||||
def __init__(self, examples):
|
||||
# Set up the main window.
|
||||
self._top = Tk()
|
||||
self._top.title("DRT Glue Demo")
|
||||
|
||||
# Set up key bindings.
|
||||
self._init_bindings()
|
||||
|
||||
# Initialize the fonts.self._error = None
|
||||
self._init_fonts(self._top)
|
||||
|
||||
self._examples = examples
|
||||
self._readingCache = [None for example in examples]
|
||||
|
||||
# The user can hide the grammar.
|
||||
self._show_grammar = IntVar(self._top)
|
||||
self._show_grammar.set(1)
|
||||
|
||||
# Set the data to None
|
||||
self._curExample = -1
|
||||
self._readings = []
|
||||
self._drs = None
|
||||
self._drsWidget = None
|
||||
self._error = None
|
||||
|
||||
self._init_glue()
|
||||
|
||||
# Create the basic frames.
|
||||
self._init_menubar(self._top)
|
||||
self._init_buttons(self._top)
|
||||
self._init_exampleListbox(self._top)
|
||||
self._init_readingListbox(self._top)
|
||||
self._init_canvas(self._top)
|
||||
|
||||
# Resize callback
|
||||
self._canvas.bind("<Configure>", self._configure)
|
||||
|
||||
#########################################
|
||||
## Initialization Helpers
|
||||
#########################################
|
||||
|
||||
def _init_glue(self):
|
||||
tagger = RegexpTagger(
|
||||
[
|
||||
("^(David|Mary|John)$", "NNP"),
|
||||
(
|
||||
"^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
|
||||
"VB",
|
||||
),
|
||||
("^(go|order|vanish|find|approach)$", "VB"),
|
||||
("^(a)$", "ex_quant"),
|
||||
("^(every)$", "univ_quant"),
|
||||
("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
|
||||
("^(big|gray|former)$", "JJ"),
|
||||
("^(him|himself)$", "PRP"),
|
||||
]
|
||||
)
|
||||
|
||||
depparser = MaltParser(tagger=tagger)
|
||||
self._glue = DrtGlue(depparser=depparser, remove_duplicates=False)
|
||||
|
||||
def _init_fonts(self, root):
|
||||
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
||||
self._sysfont = Font(font=Button()["font"])
|
||||
root.option_add("*Font", self._sysfont)
|
||||
|
||||
# TWhat's our font size (default=same as sysfont)
|
||||
self._size = IntVar(root)
|
||||
self._size.set(self._sysfont.cget("size"))
|
||||
|
||||
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
|
||||
self._font = Font(family="helvetica", size=self._size.get())
|
||||
if self._size.get() < 0:
|
||||
big = self._size.get() - 2
|
||||
else:
|
||||
big = self._size.get() + 2
|
||||
self._bigfont = Font(family="helvetica", weight="bold", size=big)
|
||||
|
||||
def _init_exampleListbox(self, parent):
|
||||
self._exampleFrame = listframe = Frame(parent)
|
||||
self._exampleFrame.pack(fill="both", side="left", padx=2)
|
||||
self._exampleList_label = Label(
|
||||
self._exampleFrame, font=self._boldfont, text="Examples"
|
||||
)
|
||||
self._exampleList_label.pack()
|
||||
self._exampleList = Listbox(
|
||||
self._exampleFrame,
|
||||
selectmode="single",
|
||||
relief="groove",
|
||||
background="white",
|
||||
foreground="#909090",
|
||||
font=self._font,
|
||||
selectforeground="#004040",
|
||||
selectbackground="#c0f0c0",
|
||||
)
|
||||
|
||||
self._exampleList.pack(side="right", fill="both", expand=1)
|
||||
|
||||
for example in self._examples:
|
||||
self._exampleList.insert("end", (" %s" % example))
|
||||
self._exampleList.config(height=min(len(self._examples), 25), width=40)
|
||||
|
||||
# Add a scrollbar if there are more than 25 examples.
|
||||
if len(self._examples) > 25:
|
||||
listscroll = Scrollbar(self._exampleFrame, orient="vertical")
|
||||
self._exampleList.config(yscrollcommand=listscroll.set)
|
||||
listscroll.config(command=self._exampleList.yview)
|
||||
listscroll.pack(side="left", fill="y")
|
||||
|
||||
# If they select a example, apply it.
|
||||
self._exampleList.bind("<<ListboxSelect>>", self._exampleList_select)
|
||||
|
||||
def _init_readingListbox(self, parent):
|
||||
self._readingFrame = listframe = Frame(parent)
|
||||
self._readingFrame.pack(fill="both", side="left", padx=2)
|
||||
self._readingList_label = Label(
|
||||
self._readingFrame, font=self._boldfont, text="Readings"
|
||||
)
|
||||
self._readingList_label.pack()
|
||||
self._readingList = Listbox(
|
||||
self._readingFrame,
|
||||
selectmode="single",
|
||||
relief="groove",
|
||||
background="white",
|
||||
foreground="#909090",
|
||||
font=self._font,
|
||||
selectforeground="#004040",
|
||||
selectbackground="#c0f0c0",
|
||||
)
|
||||
|
||||
self._readingList.pack(side="right", fill="both", expand=1)
|
||||
|
||||
# Add a scrollbar if there are more than 25 examples.
|
||||
listscroll = Scrollbar(self._readingFrame, orient="vertical")
|
||||
self._readingList.config(yscrollcommand=listscroll.set)
|
||||
listscroll.config(command=self._readingList.yview)
|
||||
listscroll.pack(side="right", fill="y")
|
||||
|
||||
self._populate_readingListbox()
|
||||
|
||||
def _populate_readingListbox(self):
|
||||
# Populate the listbox with integers
|
||||
self._readingList.delete(0, "end")
|
||||
for i in range(len(self._readings)):
|
||||
self._readingList.insert("end", (" %s" % (i + 1)))
|
||||
self._readingList.config(height=min(len(self._readings), 25), width=5)
|
||||
|
||||
# If they select a example, apply it.
|
||||
self._readingList.bind("<<ListboxSelect>>", self._readingList_select)
|
||||
|
||||
def _init_bindings(self):
|
||||
# Key bindings are a good thing.
|
||||
self._top.bind("<Control-q>", self.destroy)
|
||||
self._top.bind("<Control-x>", self.destroy)
|
||||
self._top.bind("<Escape>", self.destroy)
|
||||
self._top.bind("n", self.next)
|
||||
self._top.bind("<space>", self.next)
|
||||
self._top.bind("p", self.prev)
|
||||
self._top.bind("<BackSpace>", self.prev)
|
||||
|
||||
def _init_buttons(self, parent):
|
||||
# Set up the frames.
|
||||
self._buttonframe = buttonframe = Frame(parent)
|
||||
buttonframe.pack(fill="none", side="bottom", padx=3, pady=2)
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Prev",
|
||||
background="#90c0d0",
|
||||
foreground="black",
|
||||
command=self.prev,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Next",
|
||||
background="#90c0d0",
|
||||
foreground="black",
|
||||
command=self.next,
|
||||
).pack(side="left")
|
||||
|
||||
def _configure(self, event):
|
||||
self._autostep = 0
|
||||
(x1, y1, x2, y2) = self._cframe.scrollregion()
|
||||
y2 = event.height - 6
|
||||
self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2)
|
||||
self._redraw()
|
||||
|
||||
def _init_canvas(self, parent):
|
||||
self._cframe = CanvasFrame(
|
||||
parent,
|
||||
background="white",
|
||||
# width=525, height=250,
|
||||
closeenough=10,
|
||||
border=2,
|
||||
relief="sunken",
|
||||
)
|
||||
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
|
||||
canvas = self._canvas = self._cframe.canvas()
|
||||
|
||||
# Initially, there's no tree or text
|
||||
self._tree = None
|
||||
self._textwidgets = []
|
||||
self._textline = None
|
||||
|
||||
def _init_menubar(self, parent):
|
||||
menubar = Menu(parent)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="q"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
actionmenu = Menu(menubar, tearoff=0)
|
||||
actionmenu.add_command(
|
||||
label="Next", underline=0, command=self.next, accelerator="n, Space"
|
||||
)
|
||||
actionmenu.add_command(
|
||||
label="Previous", underline=0, command=self.prev, accelerator="p, Backspace"
|
||||
)
|
||||
menubar.add_cascade(label="Action", underline=0, menu=actionmenu)
|
||||
|
||||
optionmenu = Menu(menubar, tearoff=0)
|
||||
optionmenu.add_checkbutton(
|
||||
label="Remove Duplicates",
|
||||
underline=0,
|
||||
variable=self._glue.remove_duplicates,
|
||||
command=self._toggle_remove_duplicates,
|
||||
accelerator="r",
|
||||
)
|
||||
menubar.add_cascade(label="Options", underline=0, menu=optionmenu)
|
||||
|
||||
viewmenu = Menu(menubar, tearoff=0)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Tiny",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=10,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Small",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=12,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Medium",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=14,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Large",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=18,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Huge",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=24,
|
||||
command=self.resize,
|
||||
)
|
||||
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
||||
|
||||
helpmenu = Menu(menubar, tearoff=0)
|
||||
helpmenu.add_command(label="About", underline=0, command=self.about)
|
||||
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
||||
|
||||
parent.config(menu=menubar)
|
||||
|
||||
#########################################
|
||||
## Main draw procedure
|
||||
#########################################
|
||||
|
||||
def _redraw(self):
|
||||
canvas = self._canvas
|
||||
|
||||
# Delete the old DRS, widgets, etc.
|
||||
if self._drsWidget is not None:
|
||||
self._drsWidget.clear()
|
||||
|
||||
if self._drs:
|
||||
self._drsWidget = DrsWidget(self._canvas, self._drs)
|
||||
self._drsWidget.draw()
|
||||
|
||||
if self._error:
|
||||
self._drsWidget = DrsWidget(self._canvas, self._error)
|
||||
self._drsWidget.draw()
|
||||
|
||||
#########################################
|
||||
## Button Callbacks
|
||||
#########################################
|
||||
|
||||
def destroy(self, *e):
|
||||
self._autostep = 0
|
||||
if self._top is None:
|
||||
return
|
||||
self._top.destroy()
|
||||
self._top = None
|
||||
|
||||
def prev(self, *e):
|
||||
selection = self._readingList.curselection()
|
||||
readingListSize = self._readingList.size()
|
||||
|
||||
# there are readings
|
||||
if readingListSize > 0:
|
||||
# if one reading is currently selected
|
||||
if len(selection) == 1:
|
||||
index = int(selection[0])
|
||||
|
||||
# if it's on (or before) the first item
|
||||
if index <= 0:
|
||||
self._select_previous_example()
|
||||
else:
|
||||
self._readingList_store_selection(index - 1)
|
||||
|
||||
else:
|
||||
# select its first reading
|
||||
self._readingList_store_selection(readingListSize - 1)
|
||||
|
||||
else:
|
||||
self._select_previous_example()
|
||||
|
||||
def _select_previous_example(self):
|
||||
# if the current example is not the first example
|
||||
if self._curExample > 0:
|
||||
self._exampleList_store_selection(self._curExample - 1)
|
||||
else:
|
||||
# go to the last example
|
||||
self._exampleList_store_selection(len(self._examples) - 1)
|
||||
|
||||
def next(self, *e):
|
||||
selection = self._readingList.curselection()
|
||||
readingListSize = self._readingList.size()
|
||||
|
||||
# if there are readings
|
||||
if readingListSize > 0:
|
||||
# if one reading is currently selected
|
||||
if len(selection) == 1:
|
||||
index = int(selection[0])
|
||||
|
||||
# if it's on (or past) the last item
|
||||
if index >= (readingListSize - 1):
|
||||
self._select_next_example()
|
||||
else:
|
||||
self._readingList_store_selection(index + 1)
|
||||
|
||||
else:
|
||||
# select its first reading
|
||||
self._readingList_store_selection(0)
|
||||
|
||||
else:
|
||||
self._select_next_example()
|
||||
|
||||
def _select_next_example(self):
|
||||
# if the current example is not the last example
|
||||
if self._curExample < len(self._examples) - 1:
|
||||
self._exampleList_store_selection(self._curExample + 1)
|
||||
else:
|
||||
# go to the first example
|
||||
self._exampleList_store_selection(0)
|
||||
|
||||
def about(self, *e):
|
||||
ABOUT = (
|
||||
"NLTK Discourse Representation Theory (DRT) Glue Semantics Demo\n"
|
||||
+ "Written by Daniel H. Garrette"
|
||||
)
|
||||
TITLE = "About: NLTK DRT Glue Demo"
|
||||
try:
|
||||
from tkinter.messagebox import Message
|
||||
|
||||
Message(message=ABOUT, title=TITLE).show()
|
||||
except:
|
||||
ShowText(self._top, TITLE, ABOUT)
|
||||
|
||||
def postscript(self, *e):
|
||||
self._autostep = 0
|
||||
self._cframe.print_to_file()
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
"""
|
||||
Enter the Tkinter mainloop. This function must be called if
|
||||
this demo is created from a non-interactive program (e.g.
|
||||
from a secript); otherwise, the demo will close as soon as
|
||||
the script completes.
|
||||
"""
|
||||
if in_idle():
|
||||
return
|
||||
self._top.mainloop(*args, **kwargs)
|
||||
|
||||
def resize(self, size=None):
|
||||
if size is not None:
|
||||
self._size.set(size)
|
||||
size = self._size.get()
|
||||
self._font.configure(size=-(abs(size)))
|
||||
self._boldfont.configure(size=-(abs(size)))
|
||||
self._sysfont.configure(size=-(abs(size)))
|
||||
self._bigfont.configure(size=-(abs(size + 2)))
|
||||
self._redraw()
|
||||
|
||||
def _toggle_remove_duplicates(self):
|
||||
self._glue.remove_duplicates = not self._glue.remove_duplicates
|
||||
|
||||
self._exampleList.selection_clear(0, "end")
|
||||
self._readings = []
|
||||
self._populate_readingListbox()
|
||||
self._readingCache = [None for ex in self._examples]
|
||||
self._curExample = -1
|
||||
self._error = None
|
||||
|
||||
self._drs = None
|
||||
self._redraw()
|
||||
|
||||
def _exampleList_select(self, event):
|
||||
selection = self._exampleList.curselection()
|
||||
if len(selection) != 1:
|
||||
return
|
||||
self._exampleList_store_selection(int(selection[0]))
|
||||
|
||||
def _exampleList_store_selection(self, index):
|
||||
self._curExample = index
|
||||
example = self._examples[index]
|
||||
|
||||
self._exampleList.selection_clear(0, "end")
|
||||
if example:
|
||||
cache = self._readingCache[index]
|
||||
if cache:
|
||||
if isinstance(cache, list):
|
||||
self._readings = cache
|
||||
self._error = None
|
||||
else:
|
||||
self._readings = []
|
||||
self._error = cache
|
||||
else:
|
||||
try:
|
||||
self._readings = self._glue.parse_to_meaning(example)
|
||||
self._error = None
|
||||
self._readingCache[index] = self._readings
|
||||
except Exception as e:
|
||||
self._readings = []
|
||||
self._error = DrtVariableExpression(Variable("Error: " + str(e)))
|
||||
self._readingCache[index] = self._error
|
||||
|
||||
# add a star to the end of the example
|
||||
self._exampleList.delete(index)
|
||||
self._exampleList.insert(index, (" %s *" % example))
|
||||
self._exampleList.config(
|
||||
height=min(len(self._examples), 25), width=40
|
||||
)
|
||||
|
||||
self._populate_readingListbox()
|
||||
|
||||
self._exampleList.selection_set(index)
|
||||
|
||||
self._drs = None
|
||||
self._redraw()
|
||||
|
||||
def _readingList_select(self, event):
|
||||
selection = self._readingList.curselection()
|
||||
if len(selection) != 1:
|
||||
return
|
||||
self._readingList_store_selection(int(selection[0]))
|
||||
|
||||
def _readingList_store_selection(self, index):
|
||||
reading = self._readings[index]
|
||||
|
||||
self._readingList.selection_clear(0, "end")
|
||||
if reading:
|
||||
self._readingList.selection_set(index)
|
||||
|
||||
self._drs = reading.simplify().normalize().resolve_anaphora()
|
||||
|
||||
self._redraw()
|
||||
|
||||
|
||||
class DrsWidget:
|
||||
def __init__(self, canvas, drs, **attribs):
|
||||
self._drs = drs
|
||||
self._canvas = canvas
|
||||
canvas.font = Font(
|
||||
font=canvas.itemcget(canvas.create_text(0, 0, text=""), "font")
|
||||
)
|
||||
canvas._BUFFER = 3
|
||||
self.bbox = (0, 0, 0, 0)
|
||||
|
||||
def draw(self):
|
||||
(right, bottom) = DrsDrawer(self._drs, canvas=self._canvas).draw()
|
||||
self.bbox = (0, 0, right + 1, bottom + 1)
|
||||
|
||||
def clear(self):
|
||||
self._canvas.create_rectangle(self.bbox, fill="white", width="0")
|
||||
|
||||
|
||||
def demo():
|
||||
examples = [
|
||||
"John walks",
|
||||
"David sees Mary",
|
||||
"David eats a sandwich",
|
||||
"every man chases a dog",
|
||||
# 'every man believes a dog yawns',
|
||||
# 'John gives David a sandwich',
|
||||
"John chases himself",
|
||||
# 'John persuades David to order a pizza',
|
||||
# 'John tries to go',
|
||||
# 'John tries to find a unicorn',
|
||||
# 'John seems to vanish',
|
||||
# 'a unicorn seems to approach',
|
||||
# 'every big cat leaves',
|
||||
# 'every gray cat leaves',
|
||||
# 'every big gray cat leaves',
|
||||
# 'a former senator leaves',
|
||||
# 'John likes a cat',
|
||||
# 'John likes every cat',
|
||||
# 'he walks',
|
||||
# 'John walks and he leaves'
|
||||
]
|
||||
DrtGlueDemo(examples).mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
830
backend/venv/Lib/site-packages/nltk/sem/evaluate.py
Normal file
830
backend/venv/Lib/site-packages/nltk/sem/evaluate.py
Normal file
@@ -0,0 +1,830 @@
|
||||
# Natural Language Toolkit: Models for first-order languages with lambda
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>,
|
||||
# URL: <https://www.nltk.org>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
# TODO:
|
||||
# - fix tracing
|
||||
# - fix iterator-based approach to existentials
|
||||
|
||||
"""
|
||||
This module provides data structures for representing first-order
|
||||
models.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import re
|
||||
import sys
|
||||
import textwrap
|
||||
from pprint import pformat
|
||||
|
||||
from nltk.decorators import decorator # this used in code that is commented out
|
||||
from nltk.sem.logic import (
|
||||
AbstractVariableExpression,
|
||||
AllExpression,
|
||||
AndExpression,
|
||||
ApplicationExpression,
|
||||
EqualityExpression,
|
||||
ExistsExpression,
|
||||
Expression,
|
||||
IffExpression,
|
||||
ImpExpression,
|
||||
IndividualVariableExpression,
|
||||
IotaExpression,
|
||||
LambdaExpression,
|
||||
NegatedExpression,
|
||||
OrExpression,
|
||||
Variable,
|
||||
is_indvar,
|
||||
)
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Undefined(Error):
|
||||
pass
|
||||
|
||||
|
||||
def trace(f, *args, **kw):
|
||||
argspec = inspect.getfullargspec(f)
|
||||
d = dict(zip(argspec[0], args))
|
||||
if d.pop("trace", None):
|
||||
print()
|
||||
for item in d.items():
|
||||
print("%s => %s" % item)
|
||||
return f(*args, **kw)
|
||||
|
||||
|
||||
def is_rel(s):
|
||||
"""
|
||||
Check whether a set represents a relation (of any arity).
|
||||
|
||||
:param s: a set containing tuples of str elements
|
||||
:type s: set
|
||||
:rtype: bool
|
||||
"""
|
||||
# we have the empty relation, i.e. set()
|
||||
if len(s) == 0:
|
||||
return True
|
||||
# all the elements are tuples of the same length
|
||||
elif all(isinstance(el, tuple) for el in s) and len(max(s)) == len(min(s)):
|
||||
return True
|
||||
else:
|
||||
raise ValueError("Set %r contains sequences of different lengths" % s)
|
||||
|
||||
|
||||
def set2rel(s):
|
||||
"""
|
||||
Convert a set containing individuals (strings or numbers) into a set of
|
||||
unary tuples. Any tuples of strings already in the set are passed through
|
||||
unchanged.
|
||||
|
||||
For example:
|
||||
- set(['a', 'b']) => set([('a',), ('b',)])
|
||||
- set([3, 27]) => set([('3',), ('27',)])
|
||||
|
||||
:type s: set
|
||||
:rtype: set of tuple of str
|
||||
"""
|
||||
new = set()
|
||||
for elem in s:
|
||||
if isinstance(elem, str):
|
||||
new.add((elem,))
|
||||
elif isinstance(elem, int):
|
||||
new.add(str(elem))
|
||||
else:
|
||||
new.add(elem)
|
||||
return new
|
||||
|
||||
|
||||
def arity(rel):
|
||||
"""
|
||||
Check the arity of a relation.
|
||||
:type rel: set of tuples
|
||||
:rtype: int of tuple of str
|
||||
"""
|
||||
if len(rel) == 0:
|
||||
return 0
|
||||
return len(list(rel)[0])
|
||||
|
||||
|
||||
class Valuation(dict):
|
||||
"""
|
||||
A dictionary which represents a model-theoretic Valuation of non-logical constants.
|
||||
Keys are strings representing the constants to be interpreted, and values correspond
|
||||
to individuals (represented as strings) and n-ary relations (represented as sets of tuples
|
||||
of strings).
|
||||
|
||||
An instance of ``Valuation`` will raise a KeyError exception (i.e.,
|
||||
just behave like a standard dictionary) if indexed with an expression that
|
||||
is not in its list of symbols.
|
||||
"""
|
||||
|
||||
def __init__(self, xs):
|
||||
"""
|
||||
:param xs: a list of (symbol, value) pairs.
|
||||
"""
|
||||
super().__init__()
|
||||
for sym, val in xs:
|
||||
if isinstance(val, str) or isinstance(val, bool):
|
||||
self[sym] = val
|
||||
elif isinstance(val, set):
|
||||
self[sym] = set2rel(val)
|
||||
else:
|
||||
msg = textwrap.fill(
|
||||
"Error in initializing Valuation. "
|
||||
"Unrecognized value for symbol '%s':\n%s" % (sym, val),
|
||||
width=66,
|
||||
)
|
||||
|
||||
raise ValueError(msg)
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key in self:
|
||||
return dict.__getitem__(self, key)
|
||||
else:
|
||||
raise Undefined("Unknown expression: '%s'" % key)
|
||||
|
||||
def __str__(self):
|
||||
return pformat(self)
|
||||
|
||||
@property
|
||||
def domain(self):
|
||||
"""Set-theoretic domain of the value-space of a Valuation."""
|
||||
dom = []
|
||||
for val in self.values():
|
||||
if isinstance(val, str):
|
||||
dom.append(val)
|
||||
elif not isinstance(val, bool):
|
||||
dom.extend(
|
||||
[elem for tuple_ in val for elem in tuple_ if elem is not None]
|
||||
)
|
||||
return set(dom)
|
||||
|
||||
@property
|
||||
def symbols(self):
|
||||
"""The non-logical constants which the Valuation recognizes."""
|
||||
return sorted(self.keys())
|
||||
|
||||
@classmethod
|
||||
def fromstring(cls, s):
|
||||
return read_valuation(s)
|
||||
|
||||
|
||||
##########################################
|
||||
# REs used by the _read_valuation function
|
||||
##########################################
|
||||
_VAL_SPLIT_RE = re.compile(r"\s*=+>\s*")
|
||||
_ELEMENT_SPLIT_RE = re.compile(r"\s*,\s*")
|
||||
_TUPLES_RE = re.compile(
|
||||
r"""\s*
|
||||
(\([^)]+\)) # tuple-expression
|
||||
\s*""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
|
||||
def _read_valuation_line(s):
|
||||
"""
|
||||
Read a line in a valuation file.
|
||||
|
||||
Lines are expected to be of the form::
|
||||
|
||||
noosa => n
|
||||
girl => {g1, g2}
|
||||
chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
|
||||
|
||||
:param s: input line
|
||||
:type s: str
|
||||
:return: a pair (symbol, value)
|
||||
:rtype: tuple
|
||||
"""
|
||||
pieces = _VAL_SPLIT_RE.split(s)
|
||||
symbol = pieces[0]
|
||||
value = pieces[1]
|
||||
# check whether the value is meant to be a set
|
||||
if value.startswith("{"):
|
||||
value = value[1:-1]
|
||||
tuple_strings = _TUPLES_RE.findall(value)
|
||||
# are the set elements tuples?
|
||||
if tuple_strings:
|
||||
set_elements = []
|
||||
for ts in tuple_strings:
|
||||
ts = ts[1:-1]
|
||||
element = tuple(_ELEMENT_SPLIT_RE.split(ts))
|
||||
set_elements.append(element)
|
||||
else:
|
||||
set_elements = _ELEMENT_SPLIT_RE.split(value)
|
||||
value = set(set_elements)
|
||||
return symbol, value
|
||||
|
||||
|
||||
def read_valuation(s, encoding=None):
|
||||
"""
|
||||
Convert a valuation string into a valuation.
|
||||
|
||||
:param s: a valuation string
|
||||
:type s: str
|
||||
:param encoding: the encoding of the input string, if it is binary
|
||||
:type encoding: str
|
||||
:return: a ``nltk.sem`` valuation
|
||||
:rtype: Valuation
|
||||
"""
|
||||
if encoding is not None:
|
||||
s = s.decode(encoding)
|
||||
statements = []
|
||||
for linenum, line in enumerate(s.splitlines()):
|
||||
line = line.strip()
|
||||
if line.startswith("#") or line == "":
|
||||
continue
|
||||
try:
|
||||
statements.append(_read_valuation_line(line))
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Unable to parse line {linenum}: {line}") from e
|
||||
return Valuation(statements)
|
||||
|
||||
|
||||
class Assignment(dict):
|
||||
r"""
|
||||
A dictionary which represents an assignment of values to variables.
|
||||
|
||||
An assignment can only assign values from its domain.
|
||||
|
||||
If an unknown expression *a* is passed to a model *M*\ 's
|
||||
interpretation function *i*, *i* will first check whether *M*\ 's
|
||||
valuation assigns an interpretation to *a* as a constant, and if
|
||||
this fails, *i* will delegate the interpretation of *a* to
|
||||
*g*. *g* only assigns values to individual variables (i.e.,
|
||||
members of the class ``IndividualVariableExpression`` in the ``logic``
|
||||
module. If a variable is not assigned a value by *g*, it will raise
|
||||
an ``Undefined`` exception.
|
||||
|
||||
A variable *Assignment* is a mapping from individual variables to
|
||||
entities in the domain. Individual variables are usually indicated
|
||||
with the letters ``'x'``, ``'y'``, ``'w'`` and ``'z'``, optionally
|
||||
followed by an integer (e.g., ``'x0'``, ``'y332'``). Assignments are
|
||||
created using the ``Assignment`` constructor, which also takes the
|
||||
domain as a parameter.
|
||||
|
||||
>>> from nltk.sem.evaluate import Assignment
|
||||
>>> dom = set(['u1', 'u2', 'u3', 'u4'])
|
||||
>>> g3 = Assignment(dom, [('x', 'u1'), ('y', 'u2')])
|
||||
>>> g3 == {'x': 'u1', 'y': 'u2'}
|
||||
True
|
||||
|
||||
There is also a ``print`` format for assignments which uses a notation
|
||||
closer to that in logic textbooks:
|
||||
|
||||
>>> print(g3)
|
||||
g[u1/x][u2/y]
|
||||
|
||||
It is also possible to update an assignment using the ``add`` method:
|
||||
|
||||
>>> dom = set(['u1', 'u2', 'u3', 'u4'])
|
||||
>>> g4 = Assignment(dom)
|
||||
>>> g4.add('x', 'u1')
|
||||
{'x': 'u1'}
|
||||
|
||||
With no arguments, ``purge()`` is equivalent to ``clear()`` on a dictionary:
|
||||
|
||||
>>> g4.purge()
|
||||
>>> g4
|
||||
{}
|
||||
|
||||
:param domain: the domain of discourse
|
||||
:type domain: set
|
||||
:param assign: a list of (varname, value) associations
|
||||
:type assign: list
|
||||
"""
|
||||
|
||||
def __init__(self, domain, assign=None):
|
||||
super().__init__()
|
||||
self.domain = domain
|
||||
if assign:
|
||||
for var, val in assign:
|
||||
assert val in self.domain, "'{}' is not in the domain: {}".format(
|
||||
val,
|
||||
self.domain,
|
||||
)
|
||||
assert is_indvar(var), (
|
||||
"Wrong format for an Individual Variable: '%s'" % var
|
||||
)
|
||||
self[var] = val
|
||||
self.variant = None
|
||||
self._addvariant()
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key in self:
|
||||
return dict.__getitem__(self, key)
|
||||
else:
|
||||
raise Undefined("Not recognized as a variable: '%s'" % key)
|
||||
|
||||
def copy(self):
|
||||
new = Assignment(self.domain)
|
||||
new.update(self)
|
||||
return new
|
||||
|
||||
def purge(self, var=None):
|
||||
"""
|
||||
Remove one or all keys (i.e. logic variables) from an
|
||||
assignment, and update ``self.variant``.
|
||||
|
||||
:param var: a Variable acting as a key for the assignment.
|
||||
"""
|
||||
if var:
|
||||
del self[var]
|
||||
else:
|
||||
self.clear()
|
||||
self._addvariant()
|
||||
return None
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
Pretty printing for assignments. {'x', 'u'} appears as 'g[u/x]'
|
||||
"""
|
||||
gstring = "g"
|
||||
# Deterministic output for unit testing.
|
||||
variant = sorted(self.variant)
|
||||
for val, var in variant:
|
||||
gstring += f"[{val}/{var}]"
|
||||
return gstring
|
||||
|
||||
def _addvariant(self):
|
||||
"""
|
||||
Create a more pretty-printable version of the assignment.
|
||||
"""
|
||||
list_ = []
|
||||
for item in self.items():
|
||||
pair = (item[1], item[0])
|
||||
list_.append(pair)
|
||||
self.variant = list_
|
||||
return None
|
||||
|
||||
def add(self, var, val):
|
||||
"""
|
||||
Add a new variable-value pair to the assignment, and update
|
||||
``self.variant``.
|
||||
|
||||
"""
|
||||
assert val in self.domain, f"{val} is not in the domain {self.domain}"
|
||||
assert is_indvar(var), "Wrong format for an Individual Variable: '%s'" % var
|
||||
self[var] = val
|
||||
self._addvariant()
|
||||
return self
|
||||
|
||||
|
||||
class Model:
|
||||
"""
|
||||
A first order model is a domain *D* of discourse and a valuation *V*.
|
||||
|
||||
A domain *D* is a set, and a valuation *V* is a map that associates
|
||||
expressions with values in the model.
|
||||
The domain of *V* should be a subset of *D*.
|
||||
|
||||
Construct a new ``Model``.
|
||||
|
||||
:type domain: set
|
||||
:param domain: A set of entities representing the domain of discourse of the model.
|
||||
:type valuation: Valuation
|
||||
:param valuation: the valuation of the model.
|
||||
:param prop: If this is set, then we are building a propositional\
|
||||
model and don't require the domain of *V* to be subset of *D*.
|
||||
"""
|
||||
|
||||
def __init__(self, domain, valuation):
|
||||
assert isinstance(domain, set)
|
||||
self.domain = domain
|
||||
self.valuation = valuation
|
||||
if not domain.issuperset(valuation.domain):
|
||||
raise Error(
|
||||
"The valuation domain, %s, must be a subset of the model's domain, %s"
|
||||
% (valuation.domain, domain)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.domain!r}, {self.valuation!r})"
|
||||
|
||||
def __str__(self):
|
||||
return f"Domain = {self.domain},\nValuation = \n{self.valuation}"
|
||||
|
||||
def evaluate(self, expr, g, trace=None):
|
||||
"""
|
||||
Read input expressions, and provide a handler for ``satisfy``
|
||||
that blocks further propagation of the ``Undefined`` error.
|
||||
:param expr: An ``Expression`` of ``logic``.
|
||||
:type g: Assignment
|
||||
:param g: an assignment to individual variables.
|
||||
:rtype: bool or 'Undefined'
|
||||
"""
|
||||
try:
|
||||
parsed = Expression.fromstring(expr)
|
||||
value = self.satisfy(parsed, g, trace=trace)
|
||||
if trace:
|
||||
print()
|
||||
print(f"'{expr}' evaluates to {value} under M, {g}")
|
||||
return value
|
||||
except Undefined:
|
||||
if trace:
|
||||
print()
|
||||
print(f"'{expr}' is undefined under M, {g}")
|
||||
return "Undefined"
|
||||
|
||||
def satisfy(self, parsed, g, trace=None):
|
||||
"""
|
||||
Recursive interpretation function for a formula of first-order logic.
|
||||
|
||||
Raises an ``Undefined`` error when ``parsed`` is an atomic string
|
||||
but is not a symbol or an individual variable.
|
||||
|
||||
:return: Returns a truth value or ``Undefined`` if ``parsed`` is\
|
||||
complex, and calls the interpretation function ``i`` if ``parsed``\
|
||||
is atomic.
|
||||
|
||||
:param parsed: An expression of ``logic``.
|
||||
:type g: Assignment
|
||||
:param g: an assignment to individual variables.
|
||||
"""
|
||||
|
||||
if isinstance(parsed, ApplicationExpression):
|
||||
function, arguments = parsed.uncurry()
|
||||
if isinstance(function, AbstractVariableExpression):
|
||||
# It's a predicate expression ("P(x,y)"), so used uncurried arguments
|
||||
funval = self.satisfy(function, g)
|
||||
argvals = tuple(self.satisfy(arg, g) for arg in arguments)
|
||||
return argvals in funval
|
||||
else:
|
||||
# It must be a lambda expression, so use curried form
|
||||
funval = self.satisfy(parsed.function, g)
|
||||
argval = self.satisfy(parsed.argument, g)
|
||||
return funval[argval]
|
||||
elif isinstance(parsed, NegatedExpression):
|
||||
return not self.satisfy(parsed.term, g)
|
||||
elif isinstance(parsed, AndExpression):
|
||||
return self.satisfy(parsed.first, g) and self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, OrExpression):
|
||||
return self.satisfy(parsed.first, g) or self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, ImpExpression):
|
||||
return (not self.satisfy(parsed.first, g)) or self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, IffExpression):
|
||||
return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, EqualityExpression):
|
||||
return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
|
||||
elif isinstance(parsed, AllExpression):
|
||||
new_g = g.copy()
|
||||
for u in self.domain:
|
||||
new_g.add(parsed.variable.name, u)
|
||||
if not self.satisfy(parsed.term, new_g):
|
||||
return False
|
||||
return True
|
||||
elif isinstance(parsed, ExistsExpression):
|
||||
new_g = g.copy()
|
||||
for u in self.domain:
|
||||
new_g.add(parsed.variable.name, u)
|
||||
if self.satisfy(parsed.term, new_g):
|
||||
return True
|
||||
return False
|
||||
elif isinstance(parsed, IotaExpression):
|
||||
new_g = g.copy()
|
||||
for u in self.domain:
|
||||
new_g.add(parsed.variable.name, u)
|
||||
if self.satisfy(parsed.term, new_g):
|
||||
return True
|
||||
return False
|
||||
elif isinstance(parsed, LambdaExpression):
|
||||
cf = {}
|
||||
var = parsed.variable.name
|
||||
for u in self.domain:
|
||||
val = self.satisfy(parsed.term, g.add(var, u))
|
||||
# NB the dict would be a lot smaller if we do this:
|
||||
# if val: cf[u] = val
|
||||
# But then need to deal with cases where f(a) should yield
|
||||
# a function rather than just False.
|
||||
cf[u] = val
|
||||
return cf
|
||||
else:
|
||||
return self.i(parsed, g, trace)
|
||||
|
||||
# @decorator(trace_eval)
|
||||
def i(self, parsed, g, trace=False):
|
||||
"""
|
||||
An interpretation function.
|
||||
|
||||
Assuming that ``parsed`` is atomic:
|
||||
|
||||
- if ``parsed`` is a non-logical constant, calls the valuation *V*
|
||||
- else if ``parsed`` is an individual variable, calls assignment *g*
|
||||
- else returns ``Undefined``.
|
||||
|
||||
:param parsed: an ``Expression`` of ``logic``.
|
||||
:type g: Assignment
|
||||
:param g: an assignment to individual variables.
|
||||
:return: a semantic value
|
||||
"""
|
||||
# If parsed is a propositional letter 'p', 'q', etc, it could be in valuation.symbols
|
||||
# and also be an IndividualVariableExpression. We want to catch this first case.
|
||||
# So there is a procedural consequence to the ordering of clauses here:
|
||||
if parsed.variable.name in self.valuation.symbols:
|
||||
return self.valuation[parsed.variable.name]
|
||||
elif isinstance(parsed, IndividualVariableExpression):
|
||||
return g[parsed.variable.name]
|
||||
|
||||
else:
|
||||
raise Undefined("Can't find a value for %s" % parsed)
|
||||
|
||||
def satisfiers(self, parsed, varex, g, trace=None, nesting=0):
|
||||
"""
|
||||
Generate the entities from the model's domain that satisfy an open formula.
|
||||
|
||||
:param parsed: an open formula
|
||||
:type parsed: Expression
|
||||
:param varex: the relevant free individual variable in ``parsed``.
|
||||
:type varex: VariableExpression or str
|
||||
:param g: a variable assignment
|
||||
:type g: Assignment
|
||||
:return: a set of the entities that satisfy ``parsed``.
|
||||
"""
|
||||
|
||||
spacer = " "
|
||||
indent = spacer + (spacer * nesting)
|
||||
candidates = []
|
||||
|
||||
if isinstance(varex, str):
|
||||
var = Variable(varex)
|
||||
else:
|
||||
var = varex
|
||||
|
||||
if var in parsed.free():
|
||||
if trace:
|
||||
print()
|
||||
print(
|
||||
(spacer * nesting)
|
||||
+ f"Open formula is '{parsed}' with assignment {g}"
|
||||
)
|
||||
for u in self.domain:
|
||||
new_g = g.copy()
|
||||
new_g.add(var.name, u)
|
||||
if trace and trace > 1:
|
||||
lowtrace = trace - 1
|
||||
else:
|
||||
lowtrace = 0
|
||||
value = self.satisfy(parsed, new_g, lowtrace)
|
||||
|
||||
if trace:
|
||||
print(indent + "(trying assignment %s)" % new_g)
|
||||
|
||||
# parsed == False under g[u/var]?
|
||||
if value == False:
|
||||
if trace:
|
||||
print(indent + f"value of '{parsed}' under {new_g} is False")
|
||||
|
||||
# so g[u/var] is a satisfying assignment
|
||||
else:
|
||||
candidates.append(u)
|
||||
if trace:
|
||||
print(indent + f"value of '{parsed}' under {new_g} is {value}")
|
||||
|
||||
result = {c for c in candidates}
|
||||
# var isn't free in parsed
|
||||
else:
|
||||
raise Undefined(f"{var.name} is not free in {parsed}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# //////////////////////////////////////////////////////////////////////
|
||||
# Demo..
|
||||
# //////////////////////////////////////////////////////////////////////
|
||||
# number of spacer chars
|
||||
mult = 30
|
||||
|
||||
|
||||
# Demo 1: Propositional Logic
|
||||
#################
|
||||
def propdemo(trace=None):
|
||||
"""Example of a propositional model."""
|
||||
|
||||
global val1, dom1, m1, g1
|
||||
val1 = Valuation([("P", True), ("Q", True), ("R", False)])
|
||||
dom1 = set()
|
||||
m1 = Model(dom1, val1)
|
||||
g1 = Assignment(dom1)
|
||||
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("Propositional Formulas Demo")
|
||||
print("*" * mult)
|
||||
print("(Propositional constants treated as nullary predicates)")
|
||||
print()
|
||||
print("Model m1:\n", m1)
|
||||
print("*" * mult)
|
||||
sentences = [
|
||||
"(P & Q)",
|
||||
"(P & R)",
|
||||
"- P",
|
||||
"- R",
|
||||
"- - P",
|
||||
"- (P & R)",
|
||||
"(P | R)",
|
||||
"(R | P)",
|
||||
"(R | R)",
|
||||
"(- P | R)",
|
||||
"(P | - P)",
|
||||
"(P -> Q)",
|
||||
"(P -> R)",
|
||||
"(R -> P)",
|
||||
"(P <-> P)",
|
||||
"(R <-> R)",
|
||||
"(P <-> R)",
|
||||
]
|
||||
|
||||
for sent in sentences:
|
||||
if trace:
|
||||
print()
|
||||
m1.evaluate(sent, g1, trace)
|
||||
else:
|
||||
print(f"The value of '{sent}' is: {m1.evaluate(sent, g1)}")
|
||||
|
||||
|
||||
# Demo 2: FOL Model
|
||||
#############
|
||||
|
||||
|
||||
def folmodel(quiet=False, trace=None):
|
||||
"""Example of a first-order model."""
|
||||
|
||||
global val2, v2, dom2, m2, g2
|
||||
|
||||
v2 = [
|
||||
("adam", "b1"),
|
||||
("betty", "g1"),
|
||||
("fido", "d1"),
|
||||
("girl", {"g1", "g2"}),
|
||||
("boy", {"b1", "b2"}),
|
||||
("dog", {"d1"}),
|
||||
("love", {("b1", "g1"), ("b2", "g2"), ("g1", "b1"), ("g2", "b1")}),
|
||||
]
|
||||
val2 = Valuation(v2)
|
||||
dom2 = val2.domain
|
||||
m2 = Model(dom2, val2)
|
||||
g2 = Assignment(dom2, [("x", "b1"), ("y", "g2")])
|
||||
|
||||
if not quiet:
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("Models Demo")
|
||||
print("*" * mult)
|
||||
print("Model m2:\n", "-" * 14, "\n", m2)
|
||||
print("Variable assignment = ", g2)
|
||||
|
||||
exprs = ["adam", "boy", "love", "walks", "x", "y", "z"]
|
||||
parsed_exprs = [Expression.fromstring(e) for e in exprs]
|
||||
|
||||
print()
|
||||
for parsed in parsed_exprs:
|
||||
try:
|
||||
print(
|
||||
"The interpretation of '%s' in m2 is %s"
|
||||
% (parsed, m2.i(parsed, g2))
|
||||
)
|
||||
except Undefined:
|
||||
print("The interpretation of '%s' in m2 is Undefined" % parsed)
|
||||
|
||||
applications = [
|
||||
("boy", ("adam")),
|
||||
("walks", ("adam",)),
|
||||
("love", ("adam", "y")),
|
||||
("love", ("y", "adam")),
|
||||
]
|
||||
|
||||
for fun, args in applications:
|
||||
try:
|
||||
funval = m2.i(Expression.fromstring(fun), g2)
|
||||
argsval = tuple(m2.i(Expression.fromstring(arg), g2) for arg in args)
|
||||
print(f"{fun}({args}) evaluates to {argsval in funval}")
|
||||
except Undefined:
|
||||
print(f"{fun}({args}) evaluates to Undefined")
|
||||
|
||||
|
||||
# Demo 3: FOL
|
||||
#########
|
||||
|
||||
|
||||
def foldemo(trace=None):
|
||||
"""
|
||||
Interpretation of closed expressions in a first-order model.
|
||||
"""
|
||||
folmodel(quiet=True)
|
||||
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("FOL Formulas Demo")
|
||||
print("*" * mult)
|
||||
|
||||
formulas = [
|
||||
"love (adam, betty)",
|
||||
"(adam = mia)",
|
||||
"\\x. (boy(x) | girl(x))",
|
||||
"\\x. boy(x)(adam)",
|
||||
"\\x y. love(x, y)",
|
||||
"\\x y. love(x, y)(adam)(betty)",
|
||||
"\\x y. love(x, y)(adam, betty)",
|
||||
"\\x y. (boy(x) & love(x, y))",
|
||||
"\\x. exists y. (boy(x) & love(x, y))",
|
||||
"exists z1. boy(z1)",
|
||||
"exists x. (boy(x) & -(x = adam))",
|
||||
"exists x. (boy(x) & all y. love(y, x))",
|
||||
"all x. (boy(x) | girl(x))",
|
||||
"all x. (girl(x) -> exists y. boy(y) & love(x, y))", # Every girl loves exists boy.
|
||||
"exists x. (boy(x) & all y. (girl(y) -> love(y, x)))", # There is exists boy that every girl loves.
|
||||
"exists x. (boy(x) & all y. (girl(y) -> love(x, y)))", # exists boy loves every girl.
|
||||
"all x. (dog(x) -> - girl(x))",
|
||||
"exists x. exists y. (love(x, y) & love(x, y))",
|
||||
]
|
||||
|
||||
for fmla in formulas:
|
||||
g2.purge()
|
||||
if trace:
|
||||
m2.evaluate(fmla, g2, trace)
|
||||
else:
|
||||
print(f"The value of '{fmla}' is: {m2.evaluate(fmla, g2)}")
|
||||
|
||||
|
||||
# Demo 3: Satisfaction
|
||||
#############
|
||||
|
||||
|
||||
def satdemo(trace=None):
|
||||
"""Satisfiers of an open formula in a first order model."""
|
||||
|
||||
print()
|
||||
print("*" * mult)
|
||||
print("Satisfiers Demo")
|
||||
print("*" * mult)
|
||||
|
||||
folmodel(quiet=True)
|
||||
|
||||
formulas = [
|
||||
"boy(x)",
|
||||
"(x = x)",
|
||||
"(boy(x) | girl(x))",
|
||||
"(boy(x) & girl(x))",
|
||||
"love(adam, x)",
|
||||
"love(x, adam)",
|
||||
"-(x = adam)",
|
||||
"exists z22. love(x, z22)",
|
||||
"exists y. love(y, x)",
|
||||
"all y. (girl(y) -> love(x, y))",
|
||||
"all y. (girl(y) -> love(y, x))",
|
||||
"all y. (girl(y) -> (boy(x) & love(y, x)))",
|
||||
"(boy(x) & all y. (girl(y) -> love(x, y)))",
|
||||
"(boy(x) & all y. (girl(y) -> love(y, x)))",
|
||||
"(boy(x) & exists y. (girl(y) & love(y, x)))",
|
||||
"(girl(x) -> dog(x))",
|
||||
"all y. (dog(y) -> (x = y))",
|
||||
"exists y. love(y, x)",
|
||||
"exists y. (love(adam, y) & love(y, x))",
|
||||
]
|
||||
|
||||
if trace:
|
||||
print(m2)
|
||||
|
||||
for fmla in formulas:
|
||||
print(fmla)
|
||||
Expression.fromstring(fmla)
|
||||
|
||||
parsed = [Expression.fromstring(fmla) for fmla in formulas]
|
||||
|
||||
for p in parsed:
|
||||
g2.purge()
|
||||
print(
|
||||
"The satisfiers of '{}' are: {}".format(p, m2.satisfiers(p, "x", g2, trace))
|
||||
)
|
||||
|
||||
|
||||
def demo(num=0, trace=None):
|
||||
"""
|
||||
Run exists demos.
|
||||
|
||||
- num = 1: propositional logic demo
|
||||
- num = 2: first order model demo (only if trace is set)
|
||||
- num = 3: first order sentences demo
|
||||
- num = 4: satisfaction of open formulas demo
|
||||
- any other value: run all the demos
|
||||
|
||||
:param trace: trace = 1, or trace = 2 for more verbose tracing
|
||||
"""
|
||||
demos = {1: propdemo, 2: folmodel, 3: foldemo, 4: satdemo}
|
||||
|
||||
try:
|
||||
demos[num](trace=trace)
|
||||
except KeyError:
|
||||
for num in demos:
|
||||
demos[num](trace=trace)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo(2, trace=0)
|
||||
835
backend/venv/Lib/site-packages/nltk/sem/glue.py
Normal file
835
backend/venv/Lib/site-packages/nltk/sem/glue.py
Normal file
@@ -0,0 +1,835 @@
|
||||
# Natural Language Toolkit: Glue Semantics
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
import os
|
||||
from itertools import chain
|
||||
|
||||
import nltk
|
||||
from nltk.internals import Counter
|
||||
from nltk.sem import drt, linearlogic
|
||||
from nltk.sem.logic import (
|
||||
AbstractVariableExpression,
|
||||
Expression,
|
||||
LambdaExpression,
|
||||
Variable,
|
||||
VariableExpression,
|
||||
)
|
||||
from nltk.tag import BigramTagger, RegexpTagger, TrigramTagger, UnigramTagger
|
||||
|
||||
SPEC_SEMTYPES = {
|
||||
"a": "ex_quant",
|
||||
"an": "ex_quant",
|
||||
"every": "univ_quant",
|
||||
"the": "def_art",
|
||||
"no": "no_quant",
|
||||
"default": "ex_quant",
|
||||
}
|
||||
|
||||
OPTIONAL_RELATIONSHIPS = ["nmod", "vmod", "punct"]
|
||||
|
||||
|
||||
class GlueFormula:
|
||||
def __init__(self, meaning, glue, indices=None):
|
||||
if not indices:
|
||||
indices = set()
|
||||
|
||||
if isinstance(meaning, str):
|
||||
self.meaning = Expression.fromstring(meaning)
|
||||
elif isinstance(meaning, Expression):
|
||||
self.meaning = meaning
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Meaning term neither string or expression: %s, %s"
|
||||
% (meaning, meaning.__class__)
|
||||
)
|
||||
|
||||
if isinstance(glue, str):
|
||||
self.glue = linearlogic.LinearLogicParser().parse(glue)
|
||||
elif isinstance(glue, linearlogic.Expression):
|
||||
self.glue = glue
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Glue term neither string or expression: %s, %s"
|
||||
% (glue, glue.__class__)
|
||||
)
|
||||
|
||||
self.indices = indices
|
||||
|
||||
def applyto(self, arg):
|
||||
"""self = (\\x.(walk x), (subj -o f))
|
||||
arg = (john , subj)
|
||||
returns ((walk john), f)
|
||||
"""
|
||||
if self.indices & arg.indices: # if the sets are NOT disjoint
|
||||
raise linearlogic.LinearLogicApplicationException(
|
||||
f"'{self}' applied to '{arg}'. Indices are not disjoint."
|
||||
)
|
||||
else: # if the sets ARE disjoint
|
||||
return_indices = self.indices | arg.indices
|
||||
|
||||
try:
|
||||
return_glue = linearlogic.ApplicationExpression(
|
||||
self.glue, arg.glue, arg.indices
|
||||
)
|
||||
except linearlogic.LinearLogicApplicationException as e:
|
||||
raise linearlogic.LinearLogicApplicationException(
|
||||
f"'{self.simplify()}' applied to '{arg.simplify()}'"
|
||||
) from e
|
||||
|
||||
arg_meaning_abstracted = arg.meaning
|
||||
if return_indices:
|
||||
for dep in self.glue.simplify().antecedent.dependencies[
|
||||
::-1
|
||||
]: # if self.glue is (A -o B), dep is in A.dependencies
|
||||
arg_meaning_abstracted = self.make_LambdaExpression(
|
||||
Variable("v%s" % dep), arg_meaning_abstracted
|
||||
)
|
||||
return_meaning = self.meaning.applyto(arg_meaning_abstracted)
|
||||
|
||||
return self.__class__(return_meaning, return_glue, return_indices)
|
||||
|
||||
def make_VariableExpression(self, name):
|
||||
return VariableExpression(name)
|
||||
|
||||
def make_LambdaExpression(self, variable, term):
|
||||
return LambdaExpression(variable, term)
|
||||
|
||||
def lambda_abstract(self, other):
|
||||
assert isinstance(other, GlueFormula)
|
||||
assert isinstance(other.meaning, AbstractVariableExpression)
|
||||
return self.__class__(
|
||||
self.make_LambdaExpression(other.meaning.variable, self.meaning),
|
||||
linearlogic.ImpExpression(other.glue, self.glue),
|
||||
)
|
||||
|
||||
def compile(self, counter=None):
|
||||
"""From Iddo Lev's PhD Dissertation p108-109"""
|
||||
if not counter:
|
||||
counter = Counter()
|
||||
(compiled_glue, new_forms) = self.glue.simplify().compile_pos(
|
||||
counter, self.__class__
|
||||
)
|
||||
return new_forms + [
|
||||
self.__class__(self.meaning, compiled_glue, {counter.get()})
|
||||
]
|
||||
|
||||
def simplify(self):
|
||||
return self.__class__(
|
||||
self.meaning.simplify(), self.glue.simplify(), self.indices
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ == other.__class__
|
||||
and self.meaning == other.meaning
|
||||
and self.glue == other.glue
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
# sorting for use in doctests which must be deterministic
|
||||
def __lt__(self, other):
|
||||
return str(self) < str(other)
|
||||
|
||||
def __str__(self):
|
||||
assert isinstance(self.indices, set)
|
||||
accum = f"{self.meaning} : {self.glue}"
|
||||
if self.indices:
|
||||
accum += (
|
||||
" : {" + ", ".join(str(index) for index in sorted(self.indices)) + "}"
|
||||
)
|
||||
return accum
|
||||
|
||||
def __repr__(self):
|
||||
return "%s" % self
|
||||
|
||||
|
||||
class GlueDict(dict):
|
||||
def __init__(self, filename, encoding=None):
|
||||
self.filename = filename
|
||||
self.file_encoding = encoding
|
||||
self.read_file()
|
||||
|
||||
def read_file(self, empty_first=True):
|
||||
if empty_first:
|
||||
self.clear()
|
||||
|
||||
try:
|
||||
contents = nltk.data.load(
|
||||
self.filename, format="text", encoding=self.file_encoding
|
||||
)
|
||||
# TODO: the above can't handle zip files, but this should anyway be fixed in nltk.data.load()
|
||||
except LookupError as e:
|
||||
try:
|
||||
contents = nltk.data.load(
|
||||
"file:" + self.filename, format="text", encoding=self.file_encoding
|
||||
)
|
||||
except LookupError:
|
||||
raise e
|
||||
lines = contents.splitlines()
|
||||
|
||||
for line in lines: # example: 'n : (\\x.(<word> x), (v-or))'
|
||||
# lambdacalc -^ linear logic -^
|
||||
line = line.strip() # remove trailing newline
|
||||
if not len(line):
|
||||
continue # skip empty lines
|
||||
if line[0] == "#":
|
||||
continue # skip commented out lines
|
||||
|
||||
parts = line.split(
|
||||
" : ", 2
|
||||
) # ['verb', '(\\x.(<word> x), ( subj -o f ))', '[subj]']
|
||||
|
||||
glue_formulas = []
|
||||
paren_count = 0
|
||||
tuple_start = 0
|
||||
tuple_comma = 0
|
||||
|
||||
relationships = None
|
||||
|
||||
if len(parts) > 1:
|
||||
for i, c in enumerate(parts[1]):
|
||||
if c == "(":
|
||||
if paren_count == 0: # if it's the first '(' of a tuple
|
||||
tuple_start = i + 1 # then save the index
|
||||
paren_count += 1
|
||||
elif c == ")":
|
||||
paren_count -= 1
|
||||
if paren_count == 0: # if it's the last ')' of a tuple
|
||||
meaning_term = parts[1][
|
||||
tuple_start:tuple_comma
|
||||
] # '\\x.(<word> x)'
|
||||
glue_term = parts[1][tuple_comma + 1 : i] # '(v-r)'
|
||||
glue_formulas.append(
|
||||
[meaning_term, glue_term]
|
||||
) # add the GlueFormula to the list
|
||||
elif c == ",":
|
||||
if (
|
||||
paren_count == 1
|
||||
): # if it's a comma separating the parts of the tuple
|
||||
tuple_comma = i # then save the index
|
||||
elif c == "#": # skip comments at the ends of lines
|
||||
if (
|
||||
paren_count != 0
|
||||
): # if the line hasn't parsed correctly so far
|
||||
raise RuntimeError(
|
||||
"Formula syntax is incorrect for entry " + line
|
||||
)
|
||||
break # break to the next line
|
||||
|
||||
if len(parts) > 2: # if there is a relationship entry at the end
|
||||
rel_start = parts[2].index("[") + 1
|
||||
rel_end = parts[2].index("]")
|
||||
if rel_start == rel_end:
|
||||
relationships = frozenset()
|
||||
else:
|
||||
relationships = frozenset(
|
||||
r.strip() for r in parts[2][rel_start:rel_end].split(",")
|
||||
)
|
||||
|
||||
try:
|
||||
start_inheritance = parts[0].index("(")
|
||||
end_inheritance = parts[0].index(")")
|
||||
sem = parts[0][:start_inheritance].strip()
|
||||
supertype = parts[0][start_inheritance + 1 : end_inheritance]
|
||||
except:
|
||||
sem = parts[0].strip()
|
||||
supertype = None
|
||||
|
||||
if sem not in self:
|
||||
self[sem] = {}
|
||||
|
||||
if (
|
||||
relationships is None
|
||||
): # if not specified for a specific relationship set
|
||||
# add all relationship entries for parents
|
||||
if supertype:
|
||||
for rels in self[supertype]:
|
||||
if rels not in self[sem]:
|
||||
self[sem][rels] = []
|
||||
glue = self[supertype][rels]
|
||||
self[sem][rels].extend(glue)
|
||||
self[sem][rels].extend(
|
||||
glue_formulas
|
||||
) # add the glue formulas to every rel entry
|
||||
else:
|
||||
if None not in self[sem]:
|
||||
self[sem][None] = []
|
||||
self[sem][None].extend(
|
||||
glue_formulas
|
||||
) # add the glue formulas to every rel entry
|
||||
else:
|
||||
if relationships not in self[sem]:
|
||||
self[sem][relationships] = []
|
||||
if supertype:
|
||||
self[sem][relationships].extend(self[supertype][relationships])
|
||||
self[sem][relationships].extend(
|
||||
glue_formulas
|
||||
) # add the glue entry to the dictionary
|
||||
|
||||
def __str__(self):
|
||||
accum = ""
|
||||
for pos in self:
|
||||
str_pos = "%s" % pos
|
||||
for relset in self[pos]:
|
||||
i = 1
|
||||
for gf in self[pos][relset]:
|
||||
if i == 1:
|
||||
accum += str_pos + ": "
|
||||
else:
|
||||
accum += " " * (len(str_pos) + 2)
|
||||
accum += "%s" % gf
|
||||
if relset and i == len(self[pos][relset]):
|
||||
accum += " : %s" % relset
|
||||
accum += "\n"
|
||||
i += 1
|
||||
return accum
|
||||
|
||||
def to_glueformula_list(self, depgraph, node=None, counter=None, verbose=False):
|
||||
if node is None:
|
||||
# TODO: should it be depgraph.root? Is this code tested?
|
||||
top = depgraph.nodes[0]
|
||||
depList = list(chain.from_iterable(top["deps"].values()))
|
||||
root = depgraph.nodes[depList[0]]
|
||||
|
||||
return self.to_glueformula_list(depgraph, root, Counter(), verbose)
|
||||
|
||||
glueformulas = self.lookup(node, depgraph, counter)
|
||||
for dep_idx in chain.from_iterable(node["deps"].values()):
|
||||
dep = depgraph.nodes[dep_idx]
|
||||
glueformulas.extend(
|
||||
self.to_glueformula_list(depgraph, dep, counter, verbose)
|
||||
)
|
||||
return glueformulas
|
||||
|
||||
def lookup(self, node, depgraph, counter):
|
||||
semtype_names = self.get_semtypes(node)
|
||||
|
||||
semtype = None
|
||||
for name in semtype_names:
|
||||
if name in self:
|
||||
semtype = self[name]
|
||||
break
|
||||
if semtype is None:
|
||||
# raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word)
|
||||
return []
|
||||
|
||||
self.add_missing_dependencies(node, depgraph)
|
||||
|
||||
lookup = self._lookup_semtype_option(semtype, node, depgraph)
|
||||
|
||||
if not len(lookup):
|
||||
raise KeyError(
|
||||
"There is no GlueDict entry for sem type of '%s' "
|
||||
"with tag '%s', and rel '%s'" % (node["word"], node["tag"], node["rel"])
|
||||
)
|
||||
|
||||
return self.get_glueformulas_from_semtype_entry(
|
||||
lookup, node["word"], node, depgraph, counter
|
||||
)
|
||||
|
||||
def add_missing_dependencies(self, node, depgraph):
|
||||
rel = node["rel"].lower()
|
||||
|
||||
if rel == "main":
|
||||
headnode = depgraph.nodes[node["head"]]
|
||||
subj = self.lookup_unique("subj", headnode, depgraph)
|
||||
relation = subj["rel"]
|
||||
node["deps"].setdefault(relation, [])
|
||||
node["deps"][relation].append(subj["address"])
|
||||
# node['deps'].append(subj['address'])
|
||||
|
||||
def _lookup_semtype_option(self, semtype, node, depgraph):
|
||||
relationships = frozenset(
|
||||
depgraph.nodes[dep]["rel"].lower()
|
||||
for dep in chain.from_iterable(node["deps"].values())
|
||||
if depgraph.nodes[dep]["rel"].lower() not in OPTIONAL_RELATIONSHIPS
|
||||
)
|
||||
|
||||
try:
|
||||
lookup = semtype[relationships]
|
||||
except KeyError:
|
||||
# An exact match is not found, so find the best match where
|
||||
# 'best' is defined as the glue entry whose relationship set has the
|
||||
# most relations of any possible relationship set that is a subset
|
||||
# of the actual depgraph
|
||||
best_match = frozenset()
|
||||
for relset_option in set(semtype) - {None}:
|
||||
if (
|
||||
len(relset_option) > len(best_match)
|
||||
and relset_option < relationships
|
||||
):
|
||||
best_match = relset_option
|
||||
if not best_match:
|
||||
if None in semtype:
|
||||
best_match = None
|
||||
else:
|
||||
return None
|
||||
lookup = semtype[best_match]
|
||||
|
||||
return lookup
|
||||
|
||||
def get_semtypes(self, node):
|
||||
"""
|
||||
Based on the node, return a list of plausible semtypes in order of
|
||||
plausibility.
|
||||
"""
|
||||
rel = node["rel"].lower()
|
||||
word = node["word"].lower()
|
||||
|
||||
if rel == "spec":
|
||||
if word in SPEC_SEMTYPES:
|
||||
return [SPEC_SEMTYPES[word]]
|
||||
else:
|
||||
return [SPEC_SEMTYPES["default"]]
|
||||
elif rel in ["nmod", "vmod"]:
|
||||
return [node["tag"], rel]
|
||||
else:
|
||||
return [node["tag"]]
|
||||
|
||||
def get_glueformulas_from_semtype_entry(
|
||||
self, lookup, word, node, depgraph, counter
|
||||
):
|
||||
glueformulas = []
|
||||
|
||||
glueFormulaFactory = self.get_GlueFormula_factory()
|
||||
for meaning, glue in lookup:
|
||||
gf = glueFormulaFactory(self.get_meaning_formula(meaning, word), glue)
|
||||
if not len(glueformulas):
|
||||
gf.word = word
|
||||
else:
|
||||
gf.word = f"{word}{len(glueformulas) + 1}"
|
||||
|
||||
gf.glue = self.initialize_labels(gf.glue, node, depgraph, counter.get())
|
||||
|
||||
glueformulas.append(gf)
|
||||
return glueformulas
|
||||
|
||||
def get_meaning_formula(self, generic, word):
|
||||
"""
|
||||
:param generic: A meaning formula string containing the
|
||||
parameter "<word>"
|
||||
:param word: The actual word to be replace "<word>"
|
||||
"""
|
||||
word = word.replace(".", "")
|
||||
return generic.replace("<word>", word)
|
||||
|
||||
def initialize_labels(self, expr, node, depgraph, unique_index):
|
||||
if isinstance(expr, linearlogic.AtomicExpression):
|
||||
name = self.find_label_name(expr.name.lower(), node, depgraph, unique_index)
|
||||
if name[0].isupper():
|
||||
return linearlogic.VariableExpression(name)
|
||||
else:
|
||||
return linearlogic.ConstantExpression(name)
|
||||
else:
|
||||
return linearlogic.ImpExpression(
|
||||
self.initialize_labels(expr.antecedent, node, depgraph, unique_index),
|
||||
self.initialize_labels(expr.consequent, node, depgraph, unique_index),
|
||||
)
|
||||
|
||||
def find_label_name(self, name, node, depgraph, unique_index):
|
||||
try:
|
||||
dot = name.index(".")
|
||||
|
||||
before_dot = name[:dot]
|
||||
after_dot = name[dot + 1 :]
|
||||
if before_dot == "super":
|
||||
return self.find_label_name(
|
||||
after_dot, depgraph.nodes[node["head"]], depgraph, unique_index
|
||||
)
|
||||
else:
|
||||
return self.find_label_name(
|
||||
after_dot,
|
||||
self.lookup_unique(before_dot, node, depgraph),
|
||||
depgraph,
|
||||
unique_index,
|
||||
)
|
||||
except ValueError:
|
||||
lbl = self.get_label(node)
|
||||
if name == "f":
|
||||
return lbl
|
||||
elif name == "v":
|
||||
return "%sv" % lbl
|
||||
elif name == "r":
|
||||
return "%sr" % lbl
|
||||
elif name == "super":
|
||||
return self.get_label(depgraph.nodes[node["head"]])
|
||||
elif name == "var":
|
||||
return f"{lbl.upper()}{unique_index}"
|
||||
elif name == "a":
|
||||
return self.get_label(self.lookup_unique("conja", node, depgraph))
|
||||
elif name == "b":
|
||||
return self.get_label(self.lookup_unique("conjb", node, depgraph))
|
||||
else:
|
||||
return self.get_label(self.lookup_unique(name, node, depgraph))
|
||||
|
||||
def get_label(self, node):
|
||||
"""
|
||||
Pick an alphabetic character as identifier for an entity in the model.
|
||||
|
||||
:param value: where to index into the list of characters
|
||||
:type value: int
|
||||
"""
|
||||
value = node["address"]
|
||||
|
||||
letter = [
|
||||
"f",
|
||||
"g",
|
||||
"h",
|
||||
"i",
|
||||
"j",
|
||||
"k",
|
||||
"l",
|
||||
"m",
|
||||
"n",
|
||||
"o",
|
||||
"p",
|
||||
"q",
|
||||
"r",
|
||||
"s",
|
||||
"t",
|
||||
"u",
|
||||
"v",
|
||||
"w",
|
||||
"x",
|
||||
"y",
|
||||
"z",
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"d",
|
||||
"e",
|
||||
][value - 1]
|
||||
num = int(value) // 26
|
||||
if num > 0:
|
||||
return letter + str(num)
|
||||
else:
|
||||
return letter
|
||||
|
||||
def lookup_unique(self, rel, node, depgraph):
|
||||
"""
|
||||
Lookup 'key'. There should be exactly one item in the associated relation.
|
||||
"""
|
||||
deps = [
|
||||
depgraph.nodes[dep]
|
||||
for dep in chain.from_iterable(node["deps"].values())
|
||||
if depgraph.nodes[dep]["rel"].lower() == rel.lower()
|
||||
]
|
||||
|
||||
if len(deps) == 0:
|
||||
raise KeyError(
|
||||
"'{}' doesn't contain a feature '{}'".format(node["word"], rel)
|
||||
)
|
||||
elif len(deps) > 1:
|
||||
raise KeyError(
|
||||
"'{}' should only have one feature '{}'".format(node["word"], rel)
|
||||
)
|
||||
else:
|
||||
return deps[0]
|
||||
|
||||
def get_GlueFormula_factory(self):
|
||||
return GlueFormula
|
||||
|
||||
|
||||
class Glue:
|
||||
def __init__(
|
||||
self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
|
||||
):
|
||||
self.verbose = verbose
|
||||
self.remove_duplicates = remove_duplicates
|
||||
self.depparser = depparser
|
||||
|
||||
from nltk import Prover9
|
||||
|
||||
self.prover = Prover9()
|
||||
|
||||
if semtype_file:
|
||||
self.semtype_file = semtype_file
|
||||
else:
|
||||
self.semtype_file = os.path.join(
|
||||
"grammars", "sample_grammars", "glue.semtype"
|
||||
)
|
||||
|
||||
def train_depparser(self, depgraphs=None):
|
||||
if depgraphs:
|
||||
self.depparser.train(depgraphs)
|
||||
else:
|
||||
self.depparser.train_from_file(
|
||||
nltk.data.find(
|
||||
os.path.join("grammars", "sample_grammars", "glue_train.conll")
|
||||
)
|
||||
)
|
||||
|
||||
def parse_to_meaning(self, sentence):
|
||||
readings = []
|
||||
for agenda in self.parse_to_compiled(sentence):
|
||||
readings.extend(self.get_readings(agenda))
|
||||
return readings
|
||||
|
||||
def get_readings(self, agenda):
|
||||
readings = []
|
||||
agenda_length = len(agenda)
|
||||
atomics = dict()
|
||||
nonatomics = dict()
|
||||
while agenda: # is not empty
|
||||
cur = agenda.pop()
|
||||
glue_simp = cur.glue.simplify()
|
||||
if isinstance(
|
||||
glue_simp, linearlogic.ImpExpression
|
||||
): # if cur.glue is non-atomic
|
||||
for key in atomics:
|
||||
try:
|
||||
if isinstance(cur.glue, linearlogic.ApplicationExpression):
|
||||
bindings = cur.glue.bindings
|
||||
else:
|
||||
bindings = linearlogic.BindingDict()
|
||||
glue_simp.antecedent.unify(key, bindings)
|
||||
for atomic in atomics[key]:
|
||||
if not (
|
||||
cur.indices & atomic.indices
|
||||
): # if the sets of indices are disjoint
|
||||
try:
|
||||
agenda.append(cur.applyto(atomic))
|
||||
except linearlogic.LinearLogicApplicationException:
|
||||
pass
|
||||
except linearlogic.UnificationException:
|
||||
pass
|
||||
try:
|
||||
nonatomics[glue_simp.antecedent].append(cur)
|
||||
except KeyError:
|
||||
nonatomics[glue_simp.antecedent] = [cur]
|
||||
|
||||
else: # else cur.glue is atomic
|
||||
for key in nonatomics:
|
||||
for nonatomic in nonatomics[key]:
|
||||
try:
|
||||
if isinstance(
|
||||
nonatomic.glue, linearlogic.ApplicationExpression
|
||||
):
|
||||
bindings = nonatomic.glue.bindings
|
||||
else:
|
||||
bindings = linearlogic.BindingDict()
|
||||
glue_simp.unify(key, bindings)
|
||||
if not (
|
||||
cur.indices & nonatomic.indices
|
||||
): # if the sets of indices are disjoint
|
||||
try:
|
||||
agenda.append(nonatomic.applyto(cur))
|
||||
except linearlogic.LinearLogicApplicationException:
|
||||
pass
|
||||
except linearlogic.UnificationException:
|
||||
pass
|
||||
try:
|
||||
atomics[glue_simp].append(cur)
|
||||
except KeyError:
|
||||
atomics[glue_simp] = [cur]
|
||||
|
||||
for entry in atomics:
|
||||
for gf in atomics[entry]:
|
||||
if len(gf.indices) == agenda_length:
|
||||
self._add_to_reading_list(gf, readings)
|
||||
for entry in nonatomics:
|
||||
for gf in nonatomics[entry]:
|
||||
if len(gf.indices) == agenda_length:
|
||||
self._add_to_reading_list(gf, readings)
|
||||
return readings
|
||||
|
||||
def _add_to_reading_list(self, glueformula, reading_list):
|
||||
add_reading = True
|
||||
if self.remove_duplicates:
|
||||
for reading in reading_list:
|
||||
try:
|
||||
if reading.equiv(glueformula.meaning, self.prover):
|
||||
add_reading = False
|
||||
break
|
||||
except Exception as e:
|
||||
# if there is an exception, the syntax of the formula
|
||||
# may not be understandable by the prover, so don't
|
||||
# throw out the reading.
|
||||
print("Error when checking logical equality of statements", e)
|
||||
|
||||
if add_reading:
|
||||
reading_list.append(glueformula.meaning)
|
||||
|
||||
def parse_to_compiled(self, sentence):
|
||||
gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)]
|
||||
return [self.gfl_to_compiled(gfl) for gfl in gfls]
|
||||
|
||||
def dep_parse(self, sentence):
|
||||
"""
|
||||
Return a dependency graph for the sentence.
|
||||
|
||||
:param sentence: the sentence to be parsed
|
||||
:type sentence: list(str)
|
||||
:rtype: DependencyGraph
|
||||
"""
|
||||
|
||||
# Lazy-initialize the depparser
|
||||
if self.depparser is None:
|
||||
from nltk.parse import MaltParser
|
||||
|
||||
self.depparser = MaltParser(tagger=self.get_pos_tagger())
|
||||
if not self.depparser._trained:
|
||||
self.train_depparser()
|
||||
return self.depparser.parse(sentence, verbose=self.verbose)
|
||||
|
||||
def depgraph_to_glue(self, depgraph):
|
||||
return self.get_glue_dict().to_glueformula_list(depgraph)
|
||||
|
||||
def get_glue_dict(self):
|
||||
return GlueDict(self.semtype_file)
|
||||
|
||||
def gfl_to_compiled(self, gfl):
|
||||
index_counter = Counter()
|
||||
return_list = []
|
||||
for gf in gfl:
|
||||
return_list.extend(gf.compile(index_counter))
|
||||
|
||||
if self.verbose:
|
||||
print("Compiled Glue Premises:")
|
||||
for cgf in return_list:
|
||||
print(cgf)
|
||||
|
||||
return return_list
|
||||
|
||||
def get_pos_tagger(self):
|
||||
from nltk.corpus import brown
|
||||
|
||||
regexp_tagger = RegexpTagger(
|
||||
[
|
||||
(r"^-?[0-9]+(\.[0-9]+)?$", "CD"), # cardinal numbers
|
||||
(r"(The|the|A|a|An|an)$", "AT"), # articles
|
||||
(r".*able$", "JJ"), # adjectives
|
||||
(r".*ness$", "NN"), # nouns formed from adjectives
|
||||
(r".*ly$", "RB"), # adverbs
|
||||
(r".*s$", "NNS"), # plural nouns
|
||||
(r".*ing$", "VBG"), # gerunds
|
||||
(r".*ed$", "VBD"), # past tense verbs
|
||||
(r".*", "NN"), # nouns (default)
|
||||
]
|
||||
)
|
||||
brown_train = brown.tagged_sents(categories="news")
|
||||
unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger)
|
||||
bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger)
|
||||
trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger)
|
||||
|
||||
# Override particular words
|
||||
main_tagger = RegexpTagger(
|
||||
[(r"(A|a|An|an)$", "ex_quant"), (r"(Every|every|All|all)$", "univ_quant")],
|
||||
backoff=trigram_tagger,
|
||||
)
|
||||
|
||||
return main_tagger
|
||||
|
||||
|
||||
class DrtGlueFormula(GlueFormula):
|
||||
def __init__(self, meaning, glue, indices=None):
|
||||
if not indices:
|
||||
indices = set()
|
||||
|
||||
if isinstance(meaning, str):
|
||||
self.meaning = drt.DrtExpression.fromstring(meaning)
|
||||
elif isinstance(meaning, drt.DrtExpression):
|
||||
self.meaning = meaning
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Meaning term neither string or expression: %s, %s"
|
||||
% (meaning, meaning.__class__)
|
||||
)
|
||||
|
||||
if isinstance(glue, str):
|
||||
self.glue = linearlogic.LinearLogicParser().parse(glue)
|
||||
elif isinstance(glue, linearlogic.Expression):
|
||||
self.glue = glue
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Glue term neither string or expression: %s, %s"
|
||||
% (glue, glue.__class__)
|
||||
)
|
||||
|
||||
self.indices = indices
|
||||
|
||||
def make_VariableExpression(self, name):
|
||||
return drt.DrtVariableExpression(name)
|
||||
|
||||
def make_LambdaExpression(self, variable, term):
|
||||
return drt.DrtLambdaExpression(variable, term)
|
||||
|
||||
|
||||
class DrtGlueDict(GlueDict):
|
||||
def get_GlueFormula_factory(self):
|
||||
return DrtGlueFormula
|
||||
|
||||
|
||||
class DrtGlue(Glue):
|
||||
def __init__(
|
||||
self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
|
||||
):
|
||||
if not semtype_file:
|
||||
semtype_file = os.path.join(
|
||||
"grammars", "sample_grammars", "drt_glue.semtype"
|
||||
)
|
||||
Glue.__init__(self, semtype_file, remove_duplicates, depparser, verbose)
|
||||
|
||||
def get_glue_dict(self):
|
||||
return DrtGlueDict(self.semtype_file)
|
||||
|
||||
|
||||
def demo(show_example=-1):
|
||||
from nltk.parse import MaltParser
|
||||
|
||||
examples = [
|
||||
"David sees Mary",
|
||||
"David eats a sandwich",
|
||||
"every man chases a dog",
|
||||
"every man believes a dog sleeps",
|
||||
"John gives David a sandwich",
|
||||
"John chases himself",
|
||||
]
|
||||
# 'John persuades David to order a pizza',
|
||||
# 'John tries to go',
|
||||
# 'John tries to find a unicorn',
|
||||
# 'John seems to vanish',
|
||||
# 'a unicorn seems to approach',
|
||||
# 'every big cat leaves',
|
||||
# 'every gray cat leaves',
|
||||
# 'every big gray cat leaves',
|
||||
# 'a former senator leaves',
|
||||
|
||||
print("============== DEMO ==============")
|
||||
|
||||
tagger = RegexpTagger(
|
||||
[
|
||||
("^(David|Mary|John)$", "NNP"),
|
||||
(
|
||||
"^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
|
||||
"VB",
|
||||
),
|
||||
("^(go|order|vanish|find|approach)$", "VB"),
|
||||
("^(a)$", "ex_quant"),
|
||||
("^(every)$", "univ_quant"),
|
||||
("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
|
||||
("^(big|gray|former)$", "JJ"),
|
||||
("^(him|himself)$", "PRP"),
|
||||
]
|
||||
)
|
||||
|
||||
depparser = MaltParser(tagger=tagger)
|
||||
glue = Glue(depparser=depparser, verbose=False)
|
||||
|
||||
for i, sentence in enumerate(examples):
|
||||
if i == show_example or show_example == -1:
|
||||
print(f"[[[Example {i}]]] {sentence}")
|
||||
for reading in glue.parse_to_meaning(sentence.split()):
|
||||
print(reading.simplify())
|
||||
print("")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
395
backend/venv/Lib/site-packages/nltk/sem/hole.py
Normal file
395
backend/venv/Lib/site-packages/nltk/sem/hole.py
Normal file
@@ -0,0 +1,395 @@
|
||||
# Natural Language Toolkit: Logic
|
||||
#
|
||||
# Author: Peter Wang
|
||||
# Updated by: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
An implementation of the Hole Semantics model, following Blackburn and Bos,
|
||||
Representation and Inference for Natural Language (CSLI, 2005).
|
||||
|
||||
The semantic representations are built by the grammar hole.fcfg.
|
||||
This module contains driver code to read in sentences and parse them
|
||||
according to a hole semantics grammar.
|
||||
|
||||
After parsing, the semantic representation is in the form of an underspecified
|
||||
representation that is not easy to read. We use a "plugging" algorithm to
|
||||
convert that representation into first-order logic formulas.
|
||||
"""
|
||||
|
||||
from functools import reduce
|
||||
|
||||
from nltk.parse import load_parser
|
||||
from nltk.sem.logic import (
|
||||
AllExpression,
|
||||
AndExpression,
|
||||
ApplicationExpression,
|
||||
ExistsExpression,
|
||||
IffExpression,
|
||||
ImpExpression,
|
||||
LambdaExpression,
|
||||
NegatedExpression,
|
||||
OrExpression,
|
||||
)
|
||||
from nltk.sem.skolemize import skolemize
|
||||
|
||||
# Note that in this code there may be multiple types of trees being referred to:
|
||||
#
|
||||
# 1. parse trees
|
||||
# 2. the underspecified representation
|
||||
# 3. first-order logic formula trees
|
||||
# 4. the search space when plugging (search tree)
|
||||
#
|
||||
|
||||
|
||||
class Constants:
|
||||
ALL = "ALL"
|
||||
EXISTS = "EXISTS"
|
||||
NOT = "NOT"
|
||||
AND = "AND"
|
||||
OR = "OR"
|
||||
IMP = "IMP"
|
||||
IFF = "IFF"
|
||||
PRED = "PRED"
|
||||
LEQ = "LEQ"
|
||||
HOLE = "HOLE"
|
||||
LABEL = "LABEL"
|
||||
|
||||
MAP = {
|
||||
ALL: lambda v, e: AllExpression(v.variable, e),
|
||||
EXISTS: lambda v, e: ExistsExpression(v.variable, e),
|
||||
NOT: NegatedExpression,
|
||||
AND: AndExpression,
|
||||
OR: OrExpression,
|
||||
IMP: ImpExpression,
|
||||
IFF: IffExpression,
|
||||
PRED: ApplicationExpression,
|
||||
}
|
||||
|
||||
|
||||
class HoleSemantics:
|
||||
"""
|
||||
This class holds the broken-down components of a hole semantics, i.e. it
|
||||
extracts the holes, labels, logic formula fragments and constraints out of
|
||||
a big conjunction of such as produced by the hole semantics grammar. It
|
||||
then provides some operations on the semantics dealing with holes, labels
|
||||
and finding legal ways to plug holes with labels.
|
||||
"""
|
||||
|
||||
def __init__(self, usr):
|
||||
"""
|
||||
Constructor. `usr' is a ``sem.Expression`` representing an
|
||||
Underspecified Representation Structure (USR). A USR has the following
|
||||
special predicates:
|
||||
ALL(l,v,n),
|
||||
EXISTS(l,v,n),
|
||||
AND(l,n,n),
|
||||
OR(l,n,n),
|
||||
IMP(l,n,n),
|
||||
IFF(l,n,n),
|
||||
PRED(l,v,n,v[,v]*) where the brackets and star indicate zero or more repetitions,
|
||||
LEQ(n,n),
|
||||
HOLE(n),
|
||||
LABEL(n)
|
||||
where l is the label of the node described by the predicate, n is either
|
||||
a label or a hole, and v is a variable.
|
||||
"""
|
||||
self.holes = set()
|
||||
self.labels = set()
|
||||
self.fragments = {} # mapping of label -> formula fragment
|
||||
self.constraints = set() # set of Constraints
|
||||
self._break_down(usr)
|
||||
self.top_most_labels = self._find_top_most_labels()
|
||||
self.top_hole = self._find_top_hole()
|
||||
|
||||
def is_node(self, x):
|
||||
"""
|
||||
Return true if x is a node (label or hole) in this semantic
|
||||
representation.
|
||||
"""
|
||||
return x in (self.labels | self.holes)
|
||||
|
||||
def _break_down(self, usr):
|
||||
"""
|
||||
Extract holes, labels, formula fragments and constraints from the hole
|
||||
semantics underspecified representation (USR).
|
||||
"""
|
||||
if isinstance(usr, AndExpression):
|
||||
self._break_down(usr.first)
|
||||
self._break_down(usr.second)
|
||||
elif isinstance(usr, ApplicationExpression):
|
||||
func, args = usr.uncurry()
|
||||
if func.variable.name == Constants.LEQ:
|
||||
self.constraints.add(Constraint(args[0], args[1]))
|
||||
elif func.variable.name == Constants.HOLE:
|
||||
self.holes.add(args[0])
|
||||
elif func.variable.name == Constants.LABEL:
|
||||
self.labels.add(args[0])
|
||||
else:
|
||||
label = args[0]
|
||||
assert label not in self.fragments
|
||||
self.fragments[label] = (func, args[1:])
|
||||
else:
|
||||
raise ValueError(usr.label())
|
||||
|
||||
def _find_top_nodes(self, node_list):
|
||||
top_nodes = node_list.copy()
|
||||
for f in self.fragments.values():
|
||||
# the label is the first argument of the predicate
|
||||
args = f[1]
|
||||
for arg in args:
|
||||
if arg in node_list:
|
||||
top_nodes.discard(arg)
|
||||
return top_nodes
|
||||
|
||||
def _find_top_most_labels(self):
|
||||
"""
|
||||
Return the set of labels which are not referenced directly as part of
|
||||
another formula fragment. These will be the top-most labels for the
|
||||
subtree that they are part of.
|
||||
"""
|
||||
return self._find_top_nodes(self.labels)
|
||||
|
||||
def _find_top_hole(self):
|
||||
"""
|
||||
Return the hole that will be the top of the formula tree.
|
||||
"""
|
||||
top_holes = self._find_top_nodes(self.holes)
|
||||
assert len(top_holes) == 1 # it must be unique
|
||||
return top_holes.pop()
|
||||
|
||||
def pluggings(self):
|
||||
"""
|
||||
Calculate and return all the legal pluggings (mappings of labels to
|
||||
holes) of this semantics given the constraints.
|
||||
"""
|
||||
record = []
|
||||
self._plug_nodes([(self.top_hole, [])], self.top_most_labels, {}, record)
|
||||
return record
|
||||
|
||||
def _plug_nodes(self, queue, potential_labels, plug_acc, record):
|
||||
"""
|
||||
Plug the nodes in `queue' with the labels in `potential_labels'.
|
||||
|
||||
Each element of `queue' is a tuple of the node to plug and the list of
|
||||
ancestor holes from the root of the graph to that node.
|
||||
|
||||
`potential_labels' is a set of the labels which are still available for
|
||||
plugging.
|
||||
|
||||
`plug_acc' is the incomplete mapping of holes to labels made on the
|
||||
current branch of the search tree so far.
|
||||
|
||||
`record' is a list of all the complete pluggings that we have found in
|
||||
total so far. It is the only parameter that is destructively updated.
|
||||
"""
|
||||
if queue != []:
|
||||
(node, ancestors) = queue[0]
|
||||
if node in self.holes:
|
||||
# The node is a hole, try to plug it.
|
||||
self._plug_hole(
|
||||
node, ancestors, queue[1:], potential_labels, plug_acc, record
|
||||
)
|
||||
else:
|
||||
assert node in self.labels
|
||||
# The node is a label. Replace it in the queue by the holes and
|
||||
# labels in the formula fragment named by that label.
|
||||
args = self.fragments[node][1]
|
||||
head = [(a, ancestors) for a in args if self.is_node(a)]
|
||||
self._plug_nodes(head + queue[1:], potential_labels, plug_acc, record)
|
||||
else:
|
||||
raise Exception("queue empty")
|
||||
|
||||
def _plug_hole(self, hole, ancestors0, queue, potential_labels0, plug_acc0, record):
|
||||
"""
|
||||
Try all possible ways of plugging a single hole.
|
||||
See _plug_nodes for the meanings of the parameters.
|
||||
"""
|
||||
# Add the current hole we're trying to plug into the list of ancestors.
|
||||
assert hole not in ancestors0
|
||||
ancestors = [hole] + ancestors0
|
||||
|
||||
# Try each potential label in this hole in turn.
|
||||
for l in potential_labels0:
|
||||
# Is the label valid in this hole?
|
||||
if self._violates_constraints(l, ancestors):
|
||||
continue
|
||||
|
||||
plug_acc = plug_acc0.copy()
|
||||
plug_acc[hole] = l
|
||||
potential_labels = potential_labels0.copy()
|
||||
potential_labels.remove(l)
|
||||
|
||||
if len(potential_labels) == 0:
|
||||
# No more potential labels. That must mean all the holes have
|
||||
# been filled so we have found a legal plugging so remember it.
|
||||
#
|
||||
# Note that the queue might not be empty because there might
|
||||
# be labels on there that point to formula fragments with
|
||||
# no holes in them. _sanity_check_plugging will make sure
|
||||
# all holes are filled.
|
||||
self._sanity_check_plugging(plug_acc, self.top_hole, [])
|
||||
record.append(plug_acc)
|
||||
else:
|
||||
# Recursively try to fill in the rest of the holes in the
|
||||
# queue. The label we just plugged into the hole could have
|
||||
# holes of its own so at the end of the queue. Putting it on
|
||||
# the end of the queue gives us a breadth-first search, so that
|
||||
# all the holes at level i of the formula tree are filled
|
||||
# before filling level i+1.
|
||||
# A depth-first search would work as well since the trees must
|
||||
# be finite but the bookkeeping would be harder.
|
||||
self._plug_nodes(
|
||||
queue + [(l, ancestors)], potential_labels, plug_acc, record
|
||||
)
|
||||
|
||||
def _violates_constraints(self, label, ancestors):
|
||||
"""
|
||||
Return True if the `label' cannot be placed underneath the holes given
|
||||
by the set `ancestors' because it would violate the constraints imposed
|
||||
on it.
|
||||
"""
|
||||
for c in self.constraints:
|
||||
if c.lhs == label:
|
||||
if c.rhs not in ancestors:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _sanity_check_plugging(self, plugging, node, ancestors):
|
||||
"""
|
||||
Make sure that a given plugging is legal. We recursively go through
|
||||
each node and make sure that no constraints are violated.
|
||||
We also check that all holes have been filled.
|
||||
"""
|
||||
if node in self.holes:
|
||||
ancestors = [node] + ancestors
|
||||
label = plugging[node]
|
||||
else:
|
||||
label = node
|
||||
assert label in self.labels
|
||||
for c in self.constraints:
|
||||
if c.lhs == label:
|
||||
assert c.rhs in ancestors
|
||||
args = self.fragments[label][1]
|
||||
for arg in args:
|
||||
if self.is_node(arg):
|
||||
self._sanity_check_plugging(plugging, arg, [label] + ancestors)
|
||||
|
||||
def formula_tree(self, plugging):
|
||||
"""
|
||||
Return the first-order logic formula tree for this underspecified
|
||||
representation using the plugging given.
|
||||
"""
|
||||
return self._formula_tree(plugging, self.top_hole)
|
||||
|
||||
def _formula_tree(self, plugging, node):
|
||||
if node in plugging:
|
||||
return self._formula_tree(plugging, plugging[node])
|
||||
elif node in self.fragments:
|
||||
pred, args = self.fragments[node]
|
||||
children = [self._formula_tree(plugging, arg) for arg in args]
|
||||
return reduce(Constants.MAP[pred.variable.name], children)
|
||||
else:
|
||||
return node
|
||||
|
||||
|
||||
class Constraint:
|
||||
"""
|
||||
This class represents a constraint of the form (L =< N),
|
||||
where L is a label and N is a node (a label or a hole).
|
||||
"""
|
||||
|
||||
def __init__(self, lhs, rhs):
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
|
||||
def __eq__(self, other):
|
||||
if self.__class__ == other.__class__:
|
||||
return self.lhs == other.lhs and self.rhs == other.rhs
|
||||
else:
|
||||
return False
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(repr(self))
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.lhs} < {self.rhs})"
|
||||
|
||||
|
||||
def hole_readings(sentence, grammar_filename=None, verbose=False):
|
||||
if not grammar_filename:
|
||||
grammar_filename = "grammars/sample_grammars/hole.fcfg"
|
||||
|
||||
if verbose:
|
||||
print("Reading grammar file", grammar_filename)
|
||||
|
||||
parser = load_parser(grammar_filename)
|
||||
|
||||
# Parse the sentence.
|
||||
tokens = sentence.split()
|
||||
trees = list(parser.parse(tokens))
|
||||
if verbose:
|
||||
print("Got %d different parses" % len(trees))
|
||||
|
||||
all_readings = []
|
||||
for tree in trees:
|
||||
# Get the semantic feature from the top of the parse tree.
|
||||
sem = tree.label()["SEM"].simplify()
|
||||
|
||||
# Print the raw semantic representation.
|
||||
if verbose:
|
||||
print("Raw: ", sem)
|
||||
|
||||
# Skolemize away all quantifiers. All variables become unique.
|
||||
while isinstance(sem, LambdaExpression):
|
||||
sem = sem.term
|
||||
skolemized = skolemize(sem)
|
||||
|
||||
if verbose:
|
||||
print("Skolemized:", skolemized)
|
||||
|
||||
# Break the hole semantics representation down into its components
|
||||
# i.e. holes, labels, formula fragments and constraints.
|
||||
hole_sem = HoleSemantics(skolemized)
|
||||
|
||||
# Maybe show the details of the semantic representation.
|
||||
if verbose:
|
||||
print("Holes: ", hole_sem.holes)
|
||||
print("Labels: ", hole_sem.labels)
|
||||
print("Constraints: ", hole_sem.constraints)
|
||||
print("Top hole: ", hole_sem.top_hole)
|
||||
print("Top labels: ", hole_sem.top_most_labels)
|
||||
print("Fragments:")
|
||||
for l, f in hole_sem.fragments.items():
|
||||
print(f"\t{l}: {f}")
|
||||
|
||||
# Find all the possible ways to plug the formulas together.
|
||||
pluggings = hole_sem.pluggings()
|
||||
|
||||
# Build FOL formula trees using the pluggings.
|
||||
readings = list(map(hole_sem.formula_tree, pluggings))
|
||||
|
||||
# Print out the formulas in a textual format.
|
||||
if verbose:
|
||||
for i, r in enumerate(readings):
|
||||
print()
|
||||
print("%d. %s" % (i, r))
|
||||
print()
|
||||
|
||||
all_readings.extend(readings)
|
||||
|
||||
return all_readings
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for r in hole_readings("a dog barks"):
|
||||
print(r)
|
||||
print()
|
||||
for r in hole_readings("every girl chases a dog"):
|
||||
print(r)
|
||||
261
backend/venv/Lib/site-packages/nltk/sem/lfg.py
Normal file
261
backend/venv/Lib/site-packages/nltk/sem/lfg.py
Normal file
@@ -0,0 +1,261 @@
|
||||
# Natural Language Toolkit: Lexical Functional Grammar
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from itertools import chain
|
||||
|
||||
from nltk.internals import Counter
|
||||
|
||||
|
||||
class FStructure(dict):
|
||||
def safeappend(self, key, item):
|
||||
"""
|
||||
Append 'item' to the list at 'key'. If no list exists for 'key', then
|
||||
construct one.
|
||||
"""
|
||||
if key not in self:
|
||||
self[key] = []
|
||||
self[key].append(item)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, key.lower(), value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.__getitem__(self, key.lower())
|
||||
|
||||
def __contains__(self, key):
|
||||
return dict.__contains__(self, key.lower())
|
||||
|
||||
def to_glueformula_list(self, glue_dict):
|
||||
depgraph = self.to_depgraph()
|
||||
return glue_dict.to_glueformula_list(depgraph)
|
||||
|
||||
def to_depgraph(self, rel=None):
|
||||
from nltk.parse.dependencygraph import DependencyGraph
|
||||
|
||||
depgraph = DependencyGraph()
|
||||
nodes = depgraph.nodes
|
||||
|
||||
self._to_depgraph(nodes, 0, "ROOT")
|
||||
|
||||
# Add all the dependencies for all the nodes
|
||||
for address, node in nodes.items():
|
||||
for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
|
||||
if n2["head"] == address:
|
||||
relation = n2["rel"]
|
||||
node["deps"].setdefault(relation, [])
|
||||
node["deps"][relation].append(n2["address"])
|
||||
|
||||
depgraph.root = nodes[1]
|
||||
|
||||
return depgraph
|
||||
|
||||
def _to_depgraph(self, nodes, head, rel):
|
||||
index = len(nodes)
|
||||
|
||||
nodes[index].update(
|
||||
{
|
||||
"address": index,
|
||||
"word": self.pred[0],
|
||||
"tag": self.pred[1],
|
||||
"head": head,
|
||||
"rel": rel,
|
||||
}
|
||||
)
|
||||
|
||||
for feature in sorted(self):
|
||||
for item in sorted(self[feature]):
|
||||
if isinstance(item, FStructure):
|
||||
item._to_depgraph(nodes, index, feature)
|
||||
elif isinstance(item, tuple):
|
||||
new_index = len(nodes)
|
||||
nodes[new_index].update(
|
||||
{
|
||||
"address": new_index,
|
||||
"word": item[0],
|
||||
"tag": item[1],
|
||||
"head": index,
|
||||
"rel": feature,
|
||||
}
|
||||
)
|
||||
elif isinstance(item, list):
|
||||
for n in item:
|
||||
n._to_depgraph(nodes, index, feature)
|
||||
else:
|
||||
raise Exception(
|
||||
"feature %s is not an FStruct, a list, or a tuple" % feature
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def read_depgraph(depgraph):
|
||||
return FStructure._read_depgraph(depgraph.root, depgraph)
|
||||
|
||||
@staticmethod
|
||||
def _read_depgraph(node, depgraph, label_counter=None, parent=None):
|
||||
if not label_counter:
|
||||
label_counter = Counter()
|
||||
|
||||
if node["rel"].lower() in ["spec", "punct"]:
|
||||
# the value of a 'spec' entry is a word, not an FStructure
|
||||
return (node["word"], node["tag"])
|
||||
|
||||
else:
|
||||
fstruct = FStructure()
|
||||
fstruct.pred = None
|
||||
fstruct.label = FStructure._make_label(label_counter.get())
|
||||
|
||||
fstruct.parent = parent
|
||||
|
||||
word, tag = node["word"], node["tag"]
|
||||
if tag[:2] == "VB":
|
||||
if tag[2:3] == "D":
|
||||
fstruct.safeappend("tense", ("PAST", "tense"))
|
||||
fstruct.pred = (word, tag[:2])
|
||||
|
||||
if not fstruct.pred:
|
||||
fstruct.pred = (word, tag)
|
||||
|
||||
children = [
|
||||
depgraph.nodes[idx]
|
||||
for idx in chain.from_iterable(node["deps"].values())
|
||||
]
|
||||
for child in children:
|
||||
fstruct.safeappend(
|
||||
child["rel"],
|
||||
FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
|
||||
)
|
||||
|
||||
return fstruct
|
||||
|
||||
@staticmethod
|
||||
def _make_label(value):
|
||||
"""
|
||||
Pick an alphabetic character as identifier for an entity in the model.
|
||||
|
||||
:param value: where to index into the list of characters
|
||||
:type value: int
|
||||
"""
|
||||
letter = [
|
||||
"f",
|
||||
"g",
|
||||
"h",
|
||||
"i",
|
||||
"j",
|
||||
"k",
|
||||
"l",
|
||||
"m",
|
||||
"n",
|
||||
"o",
|
||||
"p",
|
||||
"q",
|
||||
"r",
|
||||
"s",
|
||||
"t",
|
||||
"u",
|
||||
"v",
|
||||
"w",
|
||||
"x",
|
||||
"y",
|
||||
"z",
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"d",
|
||||
"e",
|
||||
][value - 1]
|
||||
num = int(value) // 26
|
||||
if num > 0:
|
||||
return letter + str(num)
|
||||
else:
|
||||
return letter
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__().replace("\n", "")
|
||||
|
||||
def __str__(self):
|
||||
return self.pretty_format()
|
||||
|
||||
def pretty_format(self, indent=3):
|
||||
try:
|
||||
accum = "%s:[" % self.label
|
||||
except NameError:
|
||||
accum = "["
|
||||
try:
|
||||
accum += "pred '%s'" % (self.pred[0])
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
for feature in sorted(self):
|
||||
for item in self[feature]:
|
||||
if isinstance(item, FStructure):
|
||||
next_indent = indent + len(feature) + 3 + len(self.label)
|
||||
accum += "\n{}{} {}".format(
|
||||
" " * (indent),
|
||||
feature,
|
||||
item.pretty_format(next_indent),
|
||||
)
|
||||
elif isinstance(item, tuple):
|
||||
accum += "\n{}{} '{}'".format(" " * (indent), feature, item[0])
|
||||
elif isinstance(item, list):
|
||||
accum += "\n{}{} {{{}}}".format(
|
||||
" " * (indent),
|
||||
feature,
|
||||
("\n%s" % (" " * (indent + len(feature) + 2))).join(item),
|
||||
)
|
||||
else: # ERROR
|
||||
raise Exception(
|
||||
"feature %s is not an FStruct, a list, or a tuple" % feature
|
||||
)
|
||||
return accum + "]"
|
||||
|
||||
|
||||
def demo_read_depgraph():
|
||||
from nltk.parse.dependencygraph import DependencyGraph
|
||||
|
||||
dg1 = DependencyGraph(
|
||||
"""\
|
||||
Esso NNP 2 SUB
|
||||
said VBD 0 ROOT
|
||||
the DT 5 NMOD
|
||||
Whiting NNP 5 NMOD
|
||||
field NN 6 SUB
|
||||
started VBD 2 VMOD
|
||||
production NN 6 OBJ
|
||||
Tuesday NNP 6 VMOD
|
||||
"""
|
||||
)
|
||||
dg2 = DependencyGraph(
|
||||
"""\
|
||||
John NNP 2 SUB
|
||||
sees VBP 0 ROOT
|
||||
Mary NNP 2 OBJ
|
||||
"""
|
||||
)
|
||||
dg3 = DependencyGraph(
|
||||
"""\
|
||||
a DT 2 SPEC
|
||||
man NN 3 SUBJ
|
||||
walks VB 0 ROOT
|
||||
"""
|
||||
)
|
||||
dg4 = DependencyGraph(
|
||||
"""\
|
||||
every DT 2 SPEC
|
||||
girl NN 3 SUBJ
|
||||
chases VB 0 ROOT
|
||||
a DT 5 SPEC
|
||||
dog NN 3 OBJ
|
||||
"""
|
||||
)
|
||||
|
||||
depgraphs = [dg1, dg2, dg3, dg4]
|
||||
for dg in depgraphs:
|
||||
print(FStructure.read_depgraph(dg))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo_read_depgraph()
|
||||
481
backend/venv/Lib/site-packages/nltk/sem/linearlogic.py
Normal file
481
backend/venv/Lib/site-packages/nltk/sem/linearlogic.py
Normal file
@@ -0,0 +1,481 @@
|
||||
# Natural Language Toolkit: Linear Logic
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from nltk.internals import Counter
|
||||
from nltk.sem.logic import APP, LogicParser
|
||||
|
||||
_counter = Counter()
|
||||
|
||||
|
||||
class Tokens:
|
||||
# Punctuation
|
||||
OPEN = "("
|
||||
CLOSE = ")"
|
||||
|
||||
# Operations
|
||||
IMP = "-o"
|
||||
|
||||
PUNCT = [OPEN, CLOSE]
|
||||
TOKENS = PUNCT + [IMP]
|
||||
|
||||
|
||||
class LinearLogicParser(LogicParser):
|
||||
"""A linear logic expression parser."""
|
||||
|
||||
def __init__(self):
|
||||
LogicParser.__init__(self)
|
||||
|
||||
self.operator_precedence = {APP: 1, Tokens.IMP: 2, None: 3}
|
||||
self.right_associated_operations += [Tokens.IMP]
|
||||
|
||||
def get_all_symbols(self):
|
||||
return Tokens.TOKENS
|
||||
|
||||
def handle(self, tok, context):
|
||||
if tok not in Tokens.TOKENS:
|
||||
return self.handle_variable(tok, context)
|
||||
elif tok == Tokens.OPEN:
|
||||
return self.handle_open(tok, context)
|
||||
|
||||
def get_BooleanExpression_factory(self, tok):
|
||||
if tok == Tokens.IMP:
|
||||
return ImpExpression
|
||||
else:
|
||||
return None
|
||||
|
||||
def make_BooleanExpression(self, factory, first, second):
|
||||
return factory(first, second)
|
||||
|
||||
def attempt_ApplicationExpression(self, expression, context):
|
||||
"""Attempt to make an application expression. If the next tokens
|
||||
are an argument in parens, then the argument expression is a
|
||||
function being applied to the arguments. Otherwise, return the
|
||||
argument expression."""
|
||||
if self.has_priority(APP, context):
|
||||
if self.inRange(0) and self.token(0) == Tokens.OPEN:
|
||||
self.token() # swallow then open paren
|
||||
argument = self.process_next_expression(APP)
|
||||
self.assertNextToken(Tokens.CLOSE)
|
||||
expression = ApplicationExpression(expression, argument, None)
|
||||
return expression
|
||||
|
||||
def make_VariableExpression(self, name):
|
||||
if name[0].isupper():
|
||||
return VariableExpression(name)
|
||||
else:
|
||||
return ConstantExpression(name)
|
||||
|
||||
|
||||
class Expression:
|
||||
_linear_logic_parser = LinearLogicParser()
|
||||
|
||||
@classmethod
|
||||
def fromstring(cls, s):
|
||||
return cls._linear_logic_parser.parse(s)
|
||||
|
||||
def applyto(self, other, other_indices=None):
|
||||
return ApplicationExpression(self, other, other_indices)
|
||||
|
||||
def __call__(self, other):
|
||||
return self.applyto(other)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{self.__class__.__name__} {self}>"
|
||||
|
||||
|
||||
class AtomicExpression(Expression):
|
||||
def __init__(self, name, dependencies=None):
|
||||
"""
|
||||
:param name: str for the constant name
|
||||
:param dependencies: list of int for the indices on which this atom is dependent
|
||||
"""
|
||||
assert isinstance(name, str)
|
||||
self.name = name
|
||||
|
||||
if not dependencies:
|
||||
dependencies = []
|
||||
self.dependencies = dependencies
|
||||
|
||||
def simplify(self, bindings=None):
|
||||
"""
|
||||
If 'self' is bound by 'bindings', return the atomic to which it is bound.
|
||||
Otherwise, return self.
|
||||
|
||||
:param bindings: ``BindingDict`` A dictionary of bindings used to simplify
|
||||
:return: ``AtomicExpression``
|
||||
"""
|
||||
if bindings and self in bindings:
|
||||
return bindings[self]
|
||||
else:
|
||||
return self
|
||||
|
||||
def compile_pos(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
self.dependencies = []
|
||||
return (self, [])
|
||||
|
||||
def compile_neg(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
self.dependencies = []
|
||||
return (self, [])
|
||||
|
||||
def initialize_labels(self, fstruct):
|
||||
self.name = fstruct.initialize_label(self.name.lower())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__ and self.name == other.name
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __str__(self):
|
||||
accum = self.name
|
||||
if self.dependencies:
|
||||
accum += "%s" % self.dependencies
|
||||
return accum
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
|
||||
class ConstantExpression(AtomicExpression):
|
||||
def unify(self, other, bindings):
|
||||
"""
|
||||
If 'other' is a constant, then it must be equal to 'self'. If 'other' is a variable,
|
||||
then it must not be bound to anything other than 'self'.
|
||||
|
||||
:param other: ``Expression``
|
||||
:param bindings: ``BindingDict`` A dictionary of all current bindings
|
||||
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new binding
|
||||
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
|
||||
"""
|
||||
assert isinstance(other, Expression)
|
||||
if isinstance(other, VariableExpression):
|
||||
try:
|
||||
return bindings + BindingDict([(other, self)])
|
||||
except VariableBindingException:
|
||||
pass
|
||||
elif self == other:
|
||||
return bindings
|
||||
raise UnificationException(self, other, bindings)
|
||||
|
||||
|
||||
class VariableExpression(AtomicExpression):
|
||||
def unify(self, other, bindings):
|
||||
"""
|
||||
'self' must not be bound to anything other than 'other'.
|
||||
|
||||
:param other: ``Expression``
|
||||
:param bindings: ``BindingDict`` A dictionary of all current bindings
|
||||
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and the new binding
|
||||
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
|
||||
"""
|
||||
assert isinstance(other, Expression)
|
||||
try:
|
||||
if self == other:
|
||||
return bindings
|
||||
else:
|
||||
return bindings + BindingDict([(self, other)])
|
||||
except VariableBindingException as e:
|
||||
raise UnificationException(self, other, bindings) from e
|
||||
|
||||
|
||||
class ImpExpression(Expression):
|
||||
def __init__(self, antecedent, consequent):
|
||||
"""
|
||||
:param antecedent: ``Expression`` for the antecedent
|
||||
:param consequent: ``Expression`` for the consequent
|
||||
"""
|
||||
assert isinstance(antecedent, Expression)
|
||||
assert isinstance(consequent, Expression)
|
||||
self.antecedent = antecedent
|
||||
self.consequent = consequent
|
||||
|
||||
def simplify(self, bindings=None):
|
||||
return self.__class__(
|
||||
self.antecedent.simplify(bindings), self.consequent.simplify(bindings)
|
||||
)
|
||||
|
||||
def unify(self, other, bindings):
|
||||
"""
|
||||
Both the antecedent and consequent of 'self' and 'other' must unify.
|
||||
|
||||
:param other: ``ImpExpression``
|
||||
:param bindings: ``BindingDict`` A dictionary of all current bindings
|
||||
:return: ``BindingDict`` A new combined dictionary of of 'bindings' and any new bindings
|
||||
:raise UnificationException: If 'self' and 'other' cannot be unified in the context of 'bindings'
|
||||
"""
|
||||
assert isinstance(other, ImpExpression)
|
||||
try:
|
||||
return (
|
||||
bindings
|
||||
+ self.antecedent.unify(other.antecedent, bindings)
|
||||
+ self.consequent.unify(other.consequent, bindings)
|
||||
)
|
||||
except VariableBindingException as e:
|
||||
raise UnificationException(self, other, bindings) from e
|
||||
|
||||
def compile_pos(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,set) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
(a, a_new) = self.antecedent.compile_neg(index_counter, glueFormulaFactory)
|
||||
(c, c_new) = self.consequent.compile_pos(index_counter, glueFormulaFactory)
|
||||
return (ImpExpression(a, c), a_new + c_new)
|
||||
|
||||
def compile_neg(self, index_counter, glueFormulaFactory):
|
||||
"""
|
||||
From Iddo Lev's PhD Dissertation p108-109
|
||||
|
||||
:param index_counter: ``Counter`` for unique indices
|
||||
:param glueFormulaFactory: ``GlueFormula`` for creating new glue formulas
|
||||
:return: (``Expression``,list of ``GlueFormula``) for the compiled linear logic and any newly created glue formulas
|
||||
"""
|
||||
(a, a_new) = self.antecedent.compile_pos(index_counter, glueFormulaFactory)
|
||||
(c, c_new) = self.consequent.compile_neg(index_counter, glueFormulaFactory)
|
||||
fresh_index = index_counter.get()
|
||||
c.dependencies.append(fresh_index)
|
||||
new_v = glueFormulaFactory("v%s" % fresh_index, a, {fresh_index})
|
||||
return (c, a_new + c_new + [new_v])
|
||||
|
||||
def initialize_labels(self, fstruct):
|
||||
self.antecedent.initialize_labels(fstruct)
|
||||
self.consequent.initialize_labels(fstruct)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ == other.__class__
|
||||
and self.antecedent == other.antecedent
|
||||
and self.consequent == other.consequent
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __str__(self):
|
||||
return "{}{} {} {}{}".format(
|
||||
Tokens.OPEN,
|
||||
self.antecedent,
|
||||
Tokens.IMP,
|
||||
self.consequent,
|
||||
Tokens.CLOSE,
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(f"{hash(self.antecedent)}{Tokens.IMP}{hash(self.consequent)}")
|
||||
|
||||
|
||||
class ApplicationExpression(Expression):
|
||||
def __init__(self, function, argument, argument_indices=None):
|
||||
"""
|
||||
:param function: ``Expression`` for the function
|
||||
:param argument: ``Expression`` for the argument
|
||||
:param argument_indices: set for the indices of the glue formula from which the argument came
|
||||
:raise LinearLogicApplicationException: If 'function' cannot be applied to 'argument' given 'argument_indices'.
|
||||
"""
|
||||
function_simp = function.simplify()
|
||||
argument_simp = argument.simplify()
|
||||
|
||||
assert isinstance(function_simp, ImpExpression)
|
||||
assert isinstance(argument_simp, Expression)
|
||||
|
||||
bindings = BindingDict()
|
||||
|
||||
try:
|
||||
if isinstance(function, ApplicationExpression):
|
||||
bindings += function.bindings
|
||||
if isinstance(argument, ApplicationExpression):
|
||||
bindings += argument.bindings
|
||||
bindings += function_simp.antecedent.unify(argument_simp, bindings)
|
||||
except UnificationException as e:
|
||||
raise LinearLogicApplicationException(
|
||||
f"Cannot apply {function_simp} to {argument_simp}. {e}"
|
||||
) from e
|
||||
|
||||
# If you are running it on complied premises, more conditions apply
|
||||
if argument_indices:
|
||||
# A.dependencies of (A -o (B -o C)) must be a proper subset of argument_indices
|
||||
if not set(function_simp.antecedent.dependencies) < argument_indices:
|
||||
raise LinearLogicApplicationException(
|
||||
"Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s"
|
||||
% (function_simp, argument_simp)
|
||||
)
|
||||
if set(function_simp.antecedent.dependencies) == argument_indices:
|
||||
raise LinearLogicApplicationException(
|
||||
"Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s"
|
||||
% (function_simp, argument_simp)
|
||||
)
|
||||
|
||||
self.function = function
|
||||
self.argument = argument
|
||||
self.bindings = bindings
|
||||
|
||||
def simplify(self, bindings=None):
|
||||
"""
|
||||
Since function is an implication, return its consequent. There should be
|
||||
no need to check that the application is valid since the checking is done
|
||||
by the constructor.
|
||||
|
||||
:param bindings: ``BindingDict`` A dictionary of bindings used to simplify
|
||||
:return: ``Expression``
|
||||
"""
|
||||
if not bindings:
|
||||
bindings = self.bindings
|
||||
|
||||
return self.function.simplify(bindings).consequent
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.__class__ == other.__class__
|
||||
and self.function == other.function
|
||||
and self.argument == other.argument
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __str__(self):
|
||||
return "%s" % self.function + Tokens.OPEN + "%s" % self.argument + Tokens.CLOSE
|
||||
|
||||
def __hash__(self):
|
||||
return hash(f"{hash(self.antecedent)}{Tokens.OPEN}{hash(self.consequent)}")
|
||||
|
||||
|
||||
class BindingDict:
|
||||
def __init__(self, bindings=None):
|
||||
"""
|
||||
:param bindings:
|
||||
list [(``VariableExpression``, ``AtomicExpression``)] to initialize the dictionary
|
||||
dict {``VariableExpression``: ``AtomicExpression``} to initialize the dictionary
|
||||
"""
|
||||
self.d = {}
|
||||
|
||||
if isinstance(bindings, dict):
|
||||
bindings = bindings.items()
|
||||
|
||||
if bindings:
|
||||
for v, b in bindings:
|
||||
self[v] = b
|
||||
|
||||
def __setitem__(self, variable, binding):
|
||||
"""
|
||||
A binding is consistent with the dict if its variable is not already bound, OR if its
|
||||
variable is already bound to its argument.
|
||||
|
||||
:param variable: ``VariableExpression`` The variable bind
|
||||
:param binding: ``Expression`` The expression to which 'variable' should be bound
|
||||
:raise VariableBindingException: If the variable cannot be bound in this dictionary
|
||||
"""
|
||||
assert isinstance(variable, VariableExpression)
|
||||
assert isinstance(binding, Expression)
|
||||
|
||||
assert variable != binding
|
||||
|
||||
existing = self.d.get(variable, None)
|
||||
|
||||
if not existing or binding == existing:
|
||||
self.d[variable] = binding
|
||||
else:
|
||||
raise VariableBindingException(
|
||||
"Variable %s already bound to another value" % (variable)
|
||||
)
|
||||
|
||||
def __getitem__(self, variable):
|
||||
"""
|
||||
Return the expression to which 'variable' is bound
|
||||
"""
|
||||
assert isinstance(variable, VariableExpression)
|
||||
|
||||
intermediate = self.d[variable]
|
||||
while intermediate:
|
||||
try:
|
||||
intermediate = self.d[intermediate]
|
||||
except KeyError:
|
||||
return intermediate
|
||||
|
||||
def __contains__(self, item):
|
||||
return item in self.d
|
||||
|
||||
def __add__(self, other):
|
||||
"""
|
||||
:param other: ``BindingDict`` The dict with which to combine self
|
||||
:return: ``BindingDict`` A new dict containing all the elements of both parameters
|
||||
:raise VariableBindingException: If the parameter dictionaries are not consistent with each other
|
||||
"""
|
||||
try:
|
||||
combined = BindingDict()
|
||||
for v in self.d:
|
||||
combined[v] = self.d[v]
|
||||
for v in other.d:
|
||||
combined[v] = other.d[v]
|
||||
return combined
|
||||
except VariableBindingException as e:
|
||||
raise VariableBindingException(
|
||||
"Attempting to add two contradicting"
|
||||
" VariableBindingsLists: %s, %s" % (self, other)
|
||||
) from e
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, BindingDict):
|
||||
raise TypeError
|
||||
return self.d == other.d
|
||||
|
||||
def __str__(self):
|
||||
return "{" + ", ".join(f"{v}: {self.d[v]}" for v in sorted(self.d.keys())) + "}"
|
||||
|
||||
def __repr__(self):
|
||||
return "BindingDict: %s" % self
|
||||
|
||||
|
||||
class VariableBindingException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UnificationException(Exception):
|
||||
def __init__(self, a, b, bindings):
|
||||
Exception.__init__(self, f"Cannot unify {a} with {b} given {bindings}")
|
||||
|
||||
|
||||
class LinearLogicApplicationException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def demo():
|
||||
lexpr = Expression.fromstring
|
||||
|
||||
print(lexpr(r"f"))
|
||||
print(lexpr(r"(g -o f)"))
|
||||
print(lexpr(r"((g -o G) -o G)"))
|
||||
print(lexpr(r"g -o h -o f"))
|
||||
print(lexpr(r"(g -o f)(g)").simplify())
|
||||
print(lexpr(r"(H -o f)(g)").simplify())
|
||||
print(lexpr(r"((g -o G) -o G)((g -o f))").simplify())
|
||||
print(lexpr(r"(H -o H)((g -o f))").simplify())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
2065
backend/venv/Lib/site-packages/nltk/sem/logic.py
Normal file
2065
backend/venv/Lib/site-packages/nltk/sem/logic.py
Normal file
File diff suppressed because it is too large
Load Diff
539
backend/venv/Lib/site-packages/nltk/sem/relextract.py
Normal file
539
backend/venv/Lib/site-packages/nltk/sem/relextract.py
Normal file
@@ -0,0 +1,539 @@
|
||||
# Natural Language Toolkit: Relation Extraction
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Code for extracting relational triples from the ieer and conll2002 corpora.
|
||||
|
||||
Relations are stored internally as dictionaries ('reldicts').
|
||||
|
||||
The two serialization outputs are "rtuple" and "clause".
|
||||
|
||||
- An rtuple is a tuple of the form ``(subj, filler, obj)``,
|
||||
where ``subj`` and ``obj`` are pairs of Named Entity mentions, and ``filler`` is the string of words
|
||||
occurring between ``sub`` and ``obj`` (with no intervening NEs). Strings are printed via ``repr()`` to
|
||||
circumvent locale variations in rendering utf-8 encoded strings.
|
||||
- A clause is an atom of the form ``relsym(subjsym, objsym)``,
|
||||
where the relation, subject and object have been canonicalized to single strings.
|
||||
"""
|
||||
|
||||
# todo: get a more general solution to canonicalized symbols for clauses -- maybe use xmlcharrefs?
|
||||
|
||||
import html
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
# Dictionary that associates corpora with NE classes
|
||||
NE_CLASSES = {
|
||||
"ieer": [
|
||||
"LOCATION",
|
||||
"ORGANIZATION",
|
||||
"PERSON",
|
||||
"DURATION",
|
||||
"DATE",
|
||||
"CARDINAL",
|
||||
"PERCENT",
|
||||
"MONEY",
|
||||
"MEASURE",
|
||||
],
|
||||
"conll2002": ["LOC", "PER", "ORG"],
|
||||
"ace": [
|
||||
"LOCATION",
|
||||
"ORGANIZATION",
|
||||
"PERSON",
|
||||
"DURATION",
|
||||
"DATE",
|
||||
"CARDINAL",
|
||||
"PERCENT",
|
||||
"MONEY",
|
||||
"MEASURE",
|
||||
"FACILITY",
|
||||
"GPE",
|
||||
],
|
||||
}
|
||||
|
||||
# Allow abbreviated class labels
|
||||
short2long = dict(LOC="LOCATION", ORG="ORGANIZATION", PER="PERSON")
|
||||
long2short = dict(LOCATION="LOC", ORGANIZATION="ORG", PERSON="PER")
|
||||
|
||||
|
||||
def _expand(type):
|
||||
"""
|
||||
Expand an NE class name.
|
||||
:type type: str
|
||||
:rtype: str
|
||||
"""
|
||||
try:
|
||||
return short2long[type]
|
||||
except KeyError:
|
||||
return type
|
||||
|
||||
|
||||
def class_abbrev(type):
|
||||
"""
|
||||
Abbreviate an NE class name.
|
||||
:type type: str
|
||||
:rtype: str
|
||||
"""
|
||||
try:
|
||||
return long2short[type]
|
||||
except KeyError:
|
||||
return type
|
||||
|
||||
|
||||
def _join(lst, sep=" ", untag=False):
|
||||
"""
|
||||
Join a list into a string, turning tags tuples into tag strings or just words.
|
||||
:param untag: if ``True``, omit the tag from tagged input strings.
|
||||
:type lst: list
|
||||
:rtype: str
|
||||
"""
|
||||
try:
|
||||
return sep.join(lst)
|
||||
except TypeError:
|
||||
if untag:
|
||||
return sep.join(tup[0] for tup in lst)
|
||||
from nltk.tag import tuple2str
|
||||
|
||||
return sep.join(tuple2str(tup) for tup in lst)
|
||||
|
||||
|
||||
def descape_entity(m, defs=html.entities.entitydefs):
|
||||
"""
|
||||
Translate one entity to its ISO Latin value.
|
||||
Inspired by example from effbot.org
|
||||
|
||||
|
||||
"""
|
||||
try:
|
||||
return defs[m.group(1)]
|
||||
|
||||
except KeyError:
|
||||
return m.group(0) # use as is
|
||||
|
||||
|
||||
def list2sym(lst):
|
||||
"""
|
||||
Convert a list of strings into a canonical symbol.
|
||||
:type lst: list
|
||||
:return: a Unicode string without whitespace
|
||||
:rtype: unicode
|
||||
"""
|
||||
sym = _join(lst, "_", untag=True)
|
||||
sym = sym.lower()
|
||||
ENT = re.compile(r"&(\w+?);")
|
||||
sym = ENT.sub(descape_entity, sym)
|
||||
sym = sym.replace(".", "")
|
||||
return sym
|
||||
|
||||
|
||||
def tree2semi_rel(tree):
|
||||
"""
|
||||
Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``).
|
||||
|
||||
In order to facilitate the construction of (``Tree``, string, ``Tree``) triples, this
|
||||
identifies pairs whose first member is a list (possibly empty) of terminal
|
||||
strings, and whose second member is a ``Tree`` of the form (NE_label, terminals).
|
||||
|
||||
:param tree: a chunk tree
|
||||
:return: a list of pairs (list(str), ``Tree``)
|
||||
:rtype: list of tuple
|
||||
"""
|
||||
|
||||
from nltk.tree import Tree
|
||||
|
||||
semi_rels = []
|
||||
semi_rel = [[], None]
|
||||
|
||||
for dtr in tree:
|
||||
if not isinstance(dtr, Tree):
|
||||
semi_rel[0].append(dtr)
|
||||
else:
|
||||
# dtr is a Tree
|
||||
semi_rel[1] = dtr
|
||||
semi_rels.append(semi_rel)
|
||||
semi_rel = [[], None]
|
||||
return semi_rels
|
||||
|
||||
|
||||
def semi_rel2reldict(pairs, window=5, trace=False):
|
||||
"""
|
||||
Converts the pairs generated by ``tree2semi_rel`` into a 'reldict': a dictionary which
|
||||
stores information about the subject and object NEs plus the filler between them.
|
||||
Additionally, a left and right context of length =< window are captured (within
|
||||
a given input sentence).
|
||||
|
||||
:param pairs: a pair of list(str) and ``Tree``, as generated by
|
||||
:param window: a threshold for the number of items to include in the left and right context
|
||||
:type window: int
|
||||
:return: 'relation' dictionaries whose keys are 'lcon', 'subjclass', 'subjtext', 'subjsym', 'filler', objclass', objtext', 'objsym' and 'rcon'
|
||||
:rtype: list(defaultdict)
|
||||
"""
|
||||
result = []
|
||||
while len(pairs) > 2:
|
||||
reldict = defaultdict(str)
|
||||
reldict["lcon"] = _join(pairs[0][0][-window:])
|
||||
reldict["subjclass"] = pairs[0][1].label()
|
||||
reldict["subjtext"] = _join(pairs[0][1].leaves())
|
||||
reldict["subjsym"] = list2sym(pairs[0][1].leaves())
|
||||
reldict["filler"] = _join(pairs[1][0])
|
||||
reldict["untagged_filler"] = _join(pairs[1][0], untag=True)
|
||||
reldict["objclass"] = pairs[1][1].label()
|
||||
reldict["objtext"] = _join(pairs[1][1].leaves())
|
||||
reldict["objsym"] = list2sym(pairs[1][1].leaves())
|
||||
reldict["rcon"] = _join(pairs[2][0][:window])
|
||||
if trace:
|
||||
print(
|
||||
"(%s(%s, %s)"
|
||||
% (
|
||||
reldict["untagged_filler"],
|
||||
reldict["subjclass"],
|
||||
reldict["objclass"],
|
||||
)
|
||||
)
|
||||
result.append(reldict)
|
||||
pairs = pairs[1:]
|
||||
return result
|
||||
|
||||
|
||||
def extract_rels(subjclass, objclass, doc, corpus="ace", pattern=None, window=10):
|
||||
"""
|
||||
Filter the output of ``semi_rel2reldict`` according to specified NE classes and a filler pattern.
|
||||
|
||||
The parameters ``subjclass`` and ``objclass`` can be used to restrict the
|
||||
Named Entities to particular types (any of 'LOCATION', 'ORGANIZATION',
|
||||
'PERSON', 'DURATION', 'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE').
|
||||
|
||||
:param subjclass: the class of the subject Named Entity.
|
||||
:type subjclass: str
|
||||
:param objclass: the class of the object Named Entity.
|
||||
:type objclass: str
|
||||
:param doc: input document
|
||||
:type doc: ieer document or a list of chunk trees
|
||||
:param corpus: name of the corpus to take as input; possible values are
|
||||
'ieer' and 'conll2002'
|
||||
:type corpus: str
|
||||
:param pattern: a regular expression for filtering the fillers of
|
||||
retrieved triples.
|
||||
:type pattern: SRE_Pattern
|
||||
:param window: filters out fillers which exceed this threshold
|
||||
:type window: int
|
||||
:return: see ``mk_reldicts``
|
||||
:rtype: list(defaultdict)
|
||||
"""
|
||||
|
||||
if subjclass and subjclass not in NE_CLASSES[corpus]:
|
||||
if _expand(subjclass) in NE_CLASSES[corpus]:
|
||||
subjclass = _expand(subjclass)
|
||||
else:
|
||||
raise ValueError(
|
||||
"your value for the subject type has not been recognized: %s"
|
||||
% subjclass
|
||||
)
|
||||
if objclass and objclass not in NE_CLASSES[corpus]:
|
||||
if _expand(objclass) in NE_CLASSES[corpus]:
|
||||
objclass = _expand(objclass)
|
||||
else:
|
||||
raise ValueError(
|
||||
"your value for the object type has not been recognized: %s" % objclass
|
||||
)
|
||||
|
||||
if corpus == "ace" or corpus == "conll2002":
|
||||
pairs = tree2semi_rel(doc)
|
||||
elif corpus == "ieer":
|
||||
pairs = tree2semi_rel(doc.text) + tree2semi_rel(doc.headline)
|
||||
else:
|
||||
raise ValueError("corpus type not recognized")
|
||||
|
||||
reldicts = semi_rel2reldict(pairs)
|
||||
|
||||
relfilter = lambda x: (
|
||||
x["subjclass"] == subjclass
|
||||
and len(x["filler"].split()) <= window
|
||||
and pattern.match(x["filler"])
|
||||
and x["objclass"] == objclass
|
||||
)
|
||||
|
||||
return list(filter(relfilter, reldicts))
|
||||
|
||||
|
||||
def rtuple(reldict, lcon=False, rcon=False):
|
||||
"""
|
||||
Pretty print the reldict as an rtuple.
|
||||
:param reldict: a relation dictionary
|
||||
:type reldict: defaultdict
|
||||
"""
|
||||
items = [
|
||||
class_abbrev(reldict["subjclass"]),
|
||||
reldict["subjtext"],
|
||||
reldict["filler"],
|
||||
class_abbrev(reldict["objclass"]),
|
||||
reldict["objtext"],
|
||||
]
|
||||
format = "[%s: %r] %r [%s: %r]"
|
||||
if lcon:
|
||||
items = [reldict["lcon"]] + items
|
||||
format = "...%r)" + format
|
||||
if rcon:
|
||||
items.append(reldict["rcon"])
|
||||
format = format + "(%r..."
|
||||
printargs = tuple(items)
|
||||
return format % printargs
|
||||
|
||||
|
||||
def clause(reldict, relsym):
|
||||
"""
|
||||
Print the relation in clausal form.
|
||||
:param reldict: a relation dictionary
|
||||
:type reldict: defaultdict
|
||||
:param relsym: a label for the relation
|
||||
:type relsym: str
|
||||
"""
|
||||
items = (relsym, reldict["subjsym"], reldict["objsym"])
|
||||
return "%s(%r, %r)" % items
|
||||
|
||||
|
||||
#######################################################
|
||||
# Demos of relation extraction with regular expressions
|
||||
#######################################################
|
||||
|
||||
|
||||
############################################
|
||||
# Example of in(ORG, LOC)
|
||||
############################################
|
||||
def in_demo(trace=0, sql=True):
|
||||
"""
|
||||
Select pairs of organizations and locations whose mentions occur with an
|
||||
intervening occurrence of the preposition "in".
|
||||
|
||||
If the sql parameter is set to True, then the entity pairs are loaded into
|
||||
an in-memory database, and subsequently pulled out using an SQL "SELECT"
|
||||
query.
|
||||
"""
|
||||
from nltk.corpus import ieer
|
||||
|
||||
if sql:
|
||||
try:
|
||||
import sqlite3
|
||||
|
||||
connection = sqlite3.connect(":memory:")
|
||||
cur = connection.cursor()
|
||||
cur.execute(
|
||||
"""create table Locations
|
||||
(OrgName text, LocationName text, DocID text)"""
|
||||
)
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
warnings.warn("Cannot import sqlite; sql flag will be ignored.")
|
||||
|
||||
IN = re.compile(r".*\bin\b(?!\b.+ing)")
|
||||
|
||||
print()
|
||||
print("IEER: in(ORG, LOC) -- just the clauses:")
|
||||
print("=" * 45)
|
||||
|
||||
for file in ieer.fileids():
|
||||
for doc in ieer.parsed_docs(file):
|
||||
if trace:
|
||||
print(doc.docno)
|
||||
print("=" * 15)
|
||||
for rel in extract_rels("ORG", "LOC", doc, corpus="ieer", pattern=IN):
|
||||
print(clause(rel, relsym="IN"))
|
||||
if sql:
|
||||
try:
|
||||
rtuple = (rel["subjtext"], rel["objtext"], doc.docno)
|
||||
cur.execute(
|
||||
"""insert into Locations
|
||||
values (?, ?, ?)""",
|
||||
rtuple,
|
||||
)
|
||||
connection.commit()
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
if sql:
|
||||
try:
|
||||
cur.execute(
|
||||
"""select OrgName from Locations
|
||||
where LocationName = 'Atlanta'"""
|
||||
)
|
||||
print()
|
||||
print("Extract data from SQL table: ORGs in Atlanta")
|
||||
print("-" * 15)
|
||||
for row in cur:
|
||||
print(row)
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
|
||||
############################################
|
||||
# Example of has_role(PER, LOC)
|
||||
############################################
|
||||
|
||||
|
||||
def roles_demo(trace=0):
|
||||
from nltk.corpus import ieer
|
||||
|
||||
roles = r"""
|
||||
(.*( # assorted roles
|
||||
analyst|
|
||||
chair(wo)?man|
|
||||
commissioner|
|
||||
counsel|
|
||||
director|
|
||||
economist|
|
||||
editor|
|
||||
executive|
|
||||
foreman|
|
||||
governor|
|
||||
head|
|
||||
lawyer|
|
||||
leader|
|
||||
librarian).*)|
|
||||
manager|
|
||||
partner|
|
||||
president|
|
||||
producer|
|
||||
professor|
|
||||
researcher|
|
||||
spokes(wo)?man|
|
||||
writer|
|
||||
,\sof\sthe?\s* # "X, of (the) Y"
|
||||
"""
|
||||
ROLES = re.compile(roles, re.VERBOSE)
|
||||
|
||||
print()
|
||||
print("IEER: has_role(PER, ORG) -- raw rtuples:")
|
||||
print("=" * 45)
|
||||
|
||||
for file in ieer.fileids():
|
||||
for doc in ieer.parsed_docs(file):
|
||||
lcon = rcon = False
|
||||
if trace:
|
||||
print(doc.docno)
|
||||
print("=" * 15)
|
||||
lcon = rcon = True
|
||||
for rel in extract_rels("PER", "ORG", doc, corpus="ieer", pattern=ROLES):
|
||||
print(rtuple(rel, lcon=lcon, rcon=rcon))
|
||||
|
||||
|
||||
##############################################
|
||||
### Show what's in the IEER Headlines
|
||||
##############################################
|
||||
|
||||
|
||||
def ieer_headlines():
|
||||
from nltk.corpus import ieer
|
||||
from nltk.tree import Tree
|
||||
|
||||
print("IEER: First 20 Headlines")
|
||||
print("=" * 45)
|
||||
|
||||
trees = [
|
||||
(doc.docno, doc.headline)
|
||||
for file in ieer.fileids()
|
||||
for doc in ieer.parsed_docs(file)
|
||||
]
|
||||
for tree in trees[:20]:
|
||||
print()
|
||||
print("%s:\n%s" % tree)
|
||||
|
||||
|
||||
#############################################
|
||||
## Dutch CONLL2002: take_on_role(PER, ORG
|
||||
#############################################
|
||||
|
||||
|
||||
def conllned(trace=1):
|
||||
"""
|
||||
Find the copula+'van' relation ('of') in the Dutch tagged training corpus
|
||||
from CoNLL 2002.
|
||||
"""
|
||||
|
||||
from nltk.corpus import conll2002
|
||||
|
||||
vnv = """
|
||||
(
|
||||
is/V| # 3rd sing present and
|
||||
was/V| # past forms of the verb zijn ('be')
|
||||
werd/V| # and also present
|
||||
wordt/V # past of worden ('become)
|
||||
)
|
||||
.* # followed by anything
|
||||
van/Prep # followed by van ('of')
|
||||
"""
|
||||
VAN = re.compile(vnv, re.VERBOSE)
|
||||
|
||||
print()
|
||||
print("Dutch CoNLL2002: van(PER, ORG) -- raw rtuples with context:")
|
||||
print("=" * 45)
|
||||
|
||||
for doc in conll2002.chunked_sents("ned.train"):
|
||||
lcon = rcon = False
|
||||
if trace:
|
||||
lcon = rcon = True
|
||||
for rel in extract_rels(
|
||||
"PER", "ORG", doc, corpus="conll2002", pattern=VAN, window=10
|
||||
):
|
||||
print(rtuple(rel, lcon=lcon, rcon=rcon))
|
||||
|
||||
|
||||
#############################################
|
||||
## Spanish CONLL2002: (PER, ORG)
|
||||
#############################################
|
||||
|
||||
|
||||
def conllesp():
|
||||
from nltk.corpus import conll2002
|
||||
|
||||
de = """
|
||||
.*
|
||||
(
|
||||
de/SP|
|
||||
del/SP
|
||||
)
|
||||
"""
|
||||
DE = re.compile(de, re.VERBOSE)
|
||||
|
||||
print()
|
||||
print("Spanish CoNLL2002: de(ORG, LOC) -- just the first 10 clauses:")
|
||||
print("=" * 45)
|
||||
rels = [
|
||||
rel
|
||||
for doc in conll2002.chunked_sents("esp.train")
|
||||
for rel in extract_rels("ORG", "LOC", doc, corpus="conll2002", pattern=DE)
|
||||
]
|
||||
for r in rels[:10]:
|
||||
print(clause(r, relsym="DE"))
|
||||
print()
|
||||
|
||||
|
||||
def ne_chunked():
|
||||
print()
|
||||
print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker")
|
||||
print("=" * 45)
|
||||
ROLE = re.compile(
|
||||
r".*(chairman|president|trader|scientist|economist|analyst|partner).*"
|
||||
)
|
||||
rels = []
|
||||
for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]):
|
||||
sent = nltk.ne_chunk(sent)
|
||||
rels = extract_rels("PER", "ORG", sent, corpus="ace", pattern=ROLE, window=7)
|
||||
for rel in rels:
|
||||
print(f"{i:<5}{rtuple(rel)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import nltk
|
||||
from nltk.sem import relextract
|
||||
|
||||
in_demo(trace=0)
|
||||
roles_demo(trace=0)
|
||||
conllned()
|
||||
conllesp()
|
||||
ieer_headlines()
|
||||
ne_chunked()
|
||||
148
backend/venv/Lib/site-packages/nltk/sem/skolemize.py
Normal file
148
backend/venv/Lib/site-packages/nltk/sem/skolemize.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# Natural Language Toolkit: Semantic Interpretation
|
||||
#
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from nltk.sem.logic import (
|
||||
AllExpression,
|
||||
AndExpression,
|
||||
ApplicationExpression,
|
||||
EqualityExpression,
|
||||
ExistsExpression,
|
||||
IffExpression,
|
||||
ImpExpression,
|
||||
NegatedExpression,
|
||||
OrExpression,
|
||||
VariableExpression,
|
||||
skolem_function,
|
||||
unique_variable,
|
||||
)
|
||||
|
||||
|
||||
def skolemize(expression, univ_scope=None, used_variables=None):
|
||||
"""
|
||||
Skolemize the expression and convert to conjunctive normal form (CNF)
|
||||
"""
|
||||
if univ_scope is None:
|
||||
univ_scope = set()
|
||||
if used_variables is None:
|
||||
used_variables = set()
|
||||
|
||||
if isinstance(expression, AllExpression):
|
||||
term = skolemize(
|
||||
expression.term,
|
||||
univ_scope | {expression.variable},
|
||||
used_variables | {expression.variable},
|
||||
)
|
||||
return term.replace(
|
||||
expression.variable,
|
||||
VariableExpression(unique_variable(ignore=used_variables)),
|
||||
)
|
||||
elif isinstance(expression, AndExpression):
|
||||
return skolemize(expression.first, univ_scope, used_variables) & skolemize(
|
||||
expression.second, univ_scope, used_variables
|
||||
)
|
||||
elif isinstance(expression, OrExpression):
|
||||
return to_cnf(
|
||||
skolemize(expression.first, univ_scope, used_variables),
|
||||
skolemize(expression.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(expression, ImpExpression):
|
||||
return to_cnf(
|
||||
skolemize(-expression.first, univ_scope, used_variables),
|
||||
skolemize(expression.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(expression, IffExpression):
|
||||
return to_cnf(
|
||||
skolemize(-expression.first, univ_scope, used_variables),
|
||||
skolemize(expression.second, univ_scope, used_variables),
|
||||
) & to_cnf(
|
||||
skolemize(expression.first, univ_scope, used_variables),
|
||||
skolemize(-expression.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(expression, EqualityExpression):
|
||||
return expression
|
||||
elif isinstance(expression, NegatedExpression):
|
||||
negated = expression.term
|
||||
if isinstance(negated, AllExpression):
|
||||
term = skolemize(
|
||||
-negated.term, univ_scope, used_variables | {negated.variable}
|
||||
)
|
||||
if univ_scope:
|
||||
return term.replace(negated.variable, skolem_function(univ_scope))
|
||||
else:
|
||||
skolem_constant = VariableExpression(
|
||||
unique_variable(ignore=used_variables)
|
||||
)
|
||||
return term.replace(negated.variable, skolem_constant)
|
||||
elif isinstance(negated, AndExpression):
|
||||
return to_cnf(
|
||||
skolemize(-negated.first, univ_scope, used_variables),
|
||||
skolemize(-negated.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(negated, OrExpression):
|
||||
return skolemize(-negated.first, univ_scope, used_variables) & skolemize(
|
||||
-negated.second, univ_scope, used_variables
|
||||
)
|
||||
elif isinstance(negated, ImpExpression):
|
||||
return skolemize(negated.first, univ_scope, used_variables) & skolemize(
|
||||
-negated.second, univ_scope, used_variables
|
||||
)
|
||||
elif isinstance(negated, IffExpression):
|
||||
return to_cnf(
|
||||
skolemize(-negated.first, univ_scope, used_variables),
|
||||
skolemize(-negated.second, univ_scope, used_variables),
|
||||
) & to_cnf(
|
||||
skolemize(negated.first, univ_scope, used_variables),
|
||||
skolemize(negated.second, univ_scope, used_variables),
|
||||
)
|
||||
elif isinstance(negated, EqualityExpression):
|
||||
return expression
|
||||
elif isinstance(negated, NegatedExpression):
|
||||
return skolemize(negated.term, univ_scope, used_variables)
|
||||
elif isinstance(negated, ExistsExpression):
|
||||
term = skolemize(
|
||||
-negated.term,
|
||||
univ_scope | {negated.variable},
|
||||
used_variables | {negated.variable},
|
||||
)
|
||||
return term.replace(
|
||||
negated.variable,
|
||||
VariableExpression(unique_variable(ignore=used_variables)),
|
||||
)
|
||||
elif isinstance(negated, ApplicationExpression):
|
||||
return expression
|
||||
else:
|
||||
raise Exception("'%s' cannot be skolemized" % expression)
|
||||
elif isinstance(expression, ExistsExpression):
|
||||
term = skolemize(
|
||||
expression.term, univ_scope, used_variables | {expression.variable}
|
||||
)
|
||||
if univ_scope:
|
||||
return term.replace(expression.variable, skolem_function(univ_scope))
|
||||
else:
|
||||
skolem_constant = VariableExpression(unique_variable(ignore=used_variables))
|
||||
return term.replace(expression.variable, skolem_constant)
|
||||
elif isinstance(expression, ApplicationExpression):
|
||||
return expression
|
||||
else:
|
||||
raise Exception("'%s' cannot be skolemized" % expression)
|
||||
|
||||
|
||||
def to_cnf(first, second):
|
||||
"""
|
||||
Convert this split disjunction to conjunctive normal form (CNF)
|
||||
"""
|
||||
if isinstance(first, AndExpression):
|
||||
r_first = to_cnf(first.first, second)
|
||||
r_second = to_cnf(first.second, second)
|
||||
return r_first & r_second
|
||||
elif isinstance(second, AndExpression):
|
||||
r_first = to_cnf(first, second.first)
|
||||
r_second = to_cnf(first, second.second)
|
||||
return r_first & r_second
|
||||
else:
|
||||
return first | second
|
||||
307
backend/venv/Lib/site-packages/nltk/sem/util.py
Normal file
307
backend/venv/Lib/site-packages/nltk/sem/util.py
Normal file
@@ -0,0 +1,307 @@
|
||||
# Natural Language Toolkit: Semantic Interpretation
|
||||
#
|
||||
# Author: Ewan Klein <ewan@inf.ed.ac.uk>
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Utility functions for batch-processing sentences: parsing and
|
||||
extraction of the semantic representation of the root node of the the
|
||||
syntax tree, followed by evaluation of the semantic representation in
|
||||
a first-order model.
|
||||
"""
|
||||
|
||||
import codecs
|
||||
|
||||
from nltk.sem import evaluate
|
||||
|
||||
##############################################################
|
||||
## Utility functions for connecting parse output to semantics
|
||||
##############################################################
|
||||
|
||||
|
||||
def parse_sents(inputs, grammar, trace=0):
|
||||
"""
|
||||
Convert input sentences into syntactic trees.
|
||||
|
||||
:param inputs: sentences to be parsed
|
||||
:type inputs: list(str)
|
||||
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
|
||||
:type grammar: nltk.grammar.FeatureGrammar
|
||||
:rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
|
||||
:return: a mapping from input sentences to a list of ``Tree`` instances.
|
||||
"""
|
||||
# put imports here to avoid circult dependencies
|
||||
from nltk.grammar import FeatureGrammar
|
||||
from nltk.parse import FeatureChartParser, load_parser
|
||||
|
||||
if isinstance(grammar, FeatureGrammar):
|
||||
cp = FeatureChartParser(grammar)
|
||||
else:
|
||||
cp = load_parser(grammar, trace=trace)
|
||||
parses = []
|
||||
for sent in inputs:
|
||||
tokens = sent.split() # use a tokenizer?
|
||||
syntrees = list(cp.parse(tokens))
|
||||
parses.append(syntrees)
|
||||
return parses
|
||||
|
||||
|
||||
def root_semrep(syntree, semkey="SEM"):
|
||||
"""
|
||||
Find the semantic representation at the root of a tree.
|
||||
|
||||
:param syntree: a parse ``Tree``
|
||||
:param semkey: the feature label to use for the root semantics in the tree
|
||||
:return: the semantic representation at the root of a ``Tree``
|
||||
:rtype: sem.Expression
|
||||
"""
|
||||
from nltk.grammar import FeatStructNonterminal
|
||||
|
||||
node = syntree.label()
|
||||
assert isinstance(node, FeatStructNonterminal)
|
||||
try:
|
||||
return node[semkey]
|
||||
except KeyError:
|
||||
print(node, end=" ")
|
||||
print("has no specification for the feature %s" % semkey)
|
||||
raise
|
||||
|
||||
|
||||
def interpret_sents(inputs, grammar, semkey="SEM", trace=0):
|
||||
"""
|
||||
Add the semantic representation to each syntactic parse tree
|
||||
of each input sentence.
|
||||
|
||||
:param inputs: a list of sentences
|
||||
:type inputs: list(str)
|
||||
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
|
||||
:type grammar: nltk.grammar.FeatureGrammar
|
||||
:return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
|
||||
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
|
||||
"""
|
||||
return [
|
||||
[(syn, root_semrep(syn, semkey)) for syn in syntrees]
|
||||
for syntrees in parse_sents(inputs, grammar, trace=trace)
|
||||
]
|
||||
|
||||
|
||||
def evaluate_sents(inputs, grammar, model, assignment, trace=0):
|
||||
"""
|
||||
Add the truth-in-a-model value to each semantic representation
|
||||
for each syntactic parse of each input sentences.
|
||||
|
||||
:param inputs: a list of sentences
|
||||
:type inputs: list(str)
|
||||
:param grammar: ``FeatureGrammar`` or name of feature-based grammar
|
||||
:type grammar: nltk.grammar.FeatureGrammar
|
||||
:return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
|
||||
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
|
||||
"""
|
||||
return [
|
||||
[
|
||||
(syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
|
||||
for (syn, sem) in interpretations
|
||||
]
|
||||
for interpretations in interpret_sents(inputs, grammar)
|
||||
]
|
||||
|
||||
|
||||
def demo_model0():
|
||||
global m0, g0
|
||||
# Initialize a valuation of non-logical constants."""
|
||||
v = [
|
||||
("john", "b1"),
|
||||
("mary", "g1"),
|
||||
("suzie", "g2"),
|
||||
("fido", "d1"),
|
||||
("tess", "d2"),
|
||||
("noosa", "n"),
|
||||
("girl", {"g1", "g2"}),
|
||||
("boy", {"b1", "b2"}),
|
||||
("dog", {"d1", "d2"}),
|
||||
("bark", {"d1", "d2"}),
|
||||
("walk", {"b1", "g2", "d1"}),
|
||||
("chase", {("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")}),
|
||||
(
|
||||
"see",
|
||||
{("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")},
|
||||
),
|
||||
("in", {("b1", "n"), ("b2", "n"), ("d2", "n")}),
|
||||
("with", {("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")}),
|
||||
]
|
||||
# Read in the data from ``v``
|
||||
val = evaluate.Valuation(v)
|
||||
# Bind ``dom`` to the ``domain`` property of ``val``
|
||||
dom = val.domain
|
||||
# Initialize a model with parameters ``dom`` and ``val``.
|
||||
m0 = evaluate.Model(dom, val)
|
||||
# Initialize a variable assignment with parameter ``dom``
|
||||
g0 = evaluate.Assignment(dom)
|
||||
|
||||
|
||||
def read_sents(filename, encoding="utf8"):
|
||||
with codecs.open(filename, "r", encoding) as fp:
|
||||
sents = [l.rstrip() for l in fp]
|
||||
|
||||
# get rid of blank lines
|
||||
sents = [l for l in sents if len(l) > 0]
|
||||
sents = [l for l in sents if not l[0] == "#"]
|
||||
return sents
|
||||
|
||||
|
||||
def demo_legacy_grammar():
|
||||
"""
|
||||
Check that interpret_sents() is compatible with legacy grammars that use
|
||||
a lowercase 'sem' feature.
|
||||
|
||||
Define 'test.fcfg' to be the following
|
||||
|
||||
"""
|
||||
from nltk.grammar import FeatureGrammar
|
||||
|
||||
g = FeatureGrammar.fromstring(
|
||||
"""
|
||||
% start S
|
||||
S[sem=<hello>] -> 'hello'
|
||||
"""
|
||||
)
|
||||
print("Reading grammar: %s" % g)
|
||||
print("*" * 20)
|
||||
for reading in interpret_sents(["hello"], g, semkey="sem"):
|
||||
syn, sem = reading[0]
|
||||
print()
|
||||
print("output: ", sem)
|
||||
|
||||
|
||||
def demo():
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
description = """
|
||||
Parse and evaluate some sentences.
|
||||
"""
|
||||
|
||||
opts = OptionParser(description=description)
|
||||
|
||||
opts.set_defaults(
|
||||
evaluate=True,
|
||||
beta=True,
|
||||
syntrace=0,
|
||||
semtrace=0,
|
||||
demo="default",
|
||||
grammar="",
|
||||
sentences="",
|
||||
)
|
||||
|
||||
opts.add_option(
|
||||
"-d",
|
||||
"--demo",
|
||||
dest="demo",
|
||||
help="choose demo D; omit this for the default demo, or specify 'chat80'",
|
||||
metavar="D",
|
||||
)
|
||||
opts.add_option(
|
||||
"-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
|
||||
)
|
||||
opts.add_option(
|
||||
"-m",
|
||||
"--model",
|
||||
dest="model",
|
||||
help="import model M (omit '.py' suffix)",
|
||||
metavar="M",
|
||||
)
|
||||
opts.add_option(
|
||||
"-s",
|
||||
"--sentences",
|
||||
dest="sentences",
|
||||
help="read in a file of test sentences S",
|
||||
metavar="S",
|
||||
)
|
||||
opts.add_option(
|
||||
"-e",
|
||||
"--no-eval",
|
||||
action="store_false",
|
||||
dest="evaluate",
|
||||
help="just do a syntactic analysis",
|
||||
)
|
||||
opts.add_option(
|
||||
"-b",
|
||||
"--no-beta-reduction",
|
||||
action="store_false",
|
||||
dest="beta",
|
||||
help="don't carry out beta-reduction",
|
||||
)
|
||||
opts.add_option(
|
||||
"-t",
|
||||
"--syntrace",
|
||||
action="count",
|
||||
dest="syntrace",
|
||||
help="set syntactic tracing on; requires '-e' option",
|
||||
)
|
||||
opts.add_option(
|
||||
"-T",
|
||||
"--semtrace",
|
||||
action="count",
|
||||
dest="semtrace",
|
||||
help="set semantic tracing on",
|
||||
)
|
||||
|
||||
(options, args) = opts.parse_args()
|
||||
|
||||
SPACER = "-" * 30
|
||||
|
||||
demo_model0()
|
||||
|
||||
sents = [
|
||||
"Fido sees a boy with Mary",
|
||||
"John sees Mary",
|
||||
"every girl chases a dog",
|
||||
"every boy chases a girl",
|
||||
"John walks with a girl in Noosa",
|
||||
"who walks",
|
||||
]
|
||||
|
||||
gramfile = "grammars/sample_grammars/sem2.fcfg"
|
||||
|
||||
if options.sentences:
|
||||
sentsfile = options.sentences
|
||||
if options.grammar:
|
||||
gramfile = options.grammar
|
||||
if options.model:
|
||||
exec("import %s as model" % options.model)
|
||||
|
||||
if sents is None:
|
||||
sents = read_sents(sentsfile)
|
||||
|
||||
# Set model and assignment
|
||||
model = m0
|
||||
g = g0
|
||||
|
||||
if options.evaluate:
|
||||
evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
|
||||
else:
|
||||
semreps = interpret_sents(sents, gramfile, trace=options.syntrace)
|
||||
|
||||
for i, sent in enumerate(sents):
|
||||
n = 1
|
||||
print("\nSentence: %s" % sent)
|
||||
print(SPACER)
|
||||
if options.evaluate:
|
||||
for syntree, semrep, value in evaluations[i]:
|
||||
if isinstance(value, dict):
|
||||
value = set(value.keys())
|
||||
print("%d: %s" % (n, semrep))
|
||||
print(value)
|
||||
n += 1
|
||||
else:
|
||||
for syntree, semrep in semreps[i]:
|
||||
print("%d: %s" % (n, semrep))
|
||||
n += 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
demo_legacy_grammar()
|
||||
Reference in New Issue
Block a user