Initial commit
This commit is contained in:
47
backend/venv/Lib/site-packages/nltk/app/__init__.py
Normal file
47
backend/venv/Lib/site-packages/nltk/app/__init__.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# Natural Language Toolkit: Applications package
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Edward Loper <edloper@gmail.com>
|
||||
# Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Interactive NLTK Applications:
|
||||
|
||||
chartparser: Chart Parser
|
||||
chunkparser: Regular-Expression Chunk Parser
|
||||
collocations: Find collocations in text
|
||||
concordance: Part-of-speech concordancer
|
||||
nemo: Finding (and Replacing) Nemo regular expression tool
|
||||
rdparser: Recursive Descent Parser
|
||||
srparser: Shift-Reduce Parser
|
||||
wordnet: WordNet Browser
|
||||
"""
|
||||
|
||||
|
||||
# Import Tkinter-based modules if Tkinter is installed
|
||||
try:
|
||||
import tkinter
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
warnings.warn("nltk.app package not loaded (please install Tkinter library).")
|
||||
else:
|
||||
from nltk.app.chartparser_app import app as chartparser
|
||||
from nltk.app.chunkparser_app import app as chunkparser
|
||||
from nltk.app.collocations_app import app as collocations
|
||||
from nltk.app.concordance_app import app as concordance
|
||||
from nltk.app.nemo_app import app as nemo
|
||||
from nltk.app.rdparser_app import app as rdparser
|
||||
from nltk.app.srparser_app import app as srparser
|
||||
from nltk.app.wordnet_app import app as wordnet
|
||||
|
||||
try:
|
||||
from matplotlib import pylab
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
warnings.warn("nltk.app.wordfreq not loaded (requires the matplotlib library).")
|
||||
else:
|
||||
from nltk.app.wordfreq_app import app as wordfreq
|
||||
2569
backend/venv/Lib/site-packages/nltk/app/chartparser_app.py
Normal file
2569
backend/venv/Lib/site-packages/nltk/app/chartparser_app.py
Normal file
File diff suppressed because it is too large
Load Diff
1500
backend/venv/Lib/site-packages/nltk/app/chunkparser_app.py
Normal file
1500
backend/venv/Lib/site-packages/nltk/app/chunkparser_app.py
Normal file
File diff suppressed because it is too large
Load Diff
438
backend/venv/Lib/site-packages/nltk/app/collocations_app.py
Normal file
438
backend/venv/Lib/site-packages/nltk/app/collocations_app.py
Normal file
@@ -0,0 +1,438 @@
|
||||
# Natural Language Toolkit: Collocations Application
|
||||
# Much of the GUI code is imported from concordance.py; We intend to merge these tools together
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
#
|
||||
|
||||
|
||||
import queue as q
|
||||
import threading
|
||||
from tkinter import (
|
||||
END,
|
||||
LEFT,
|
||||
SUNKEN,
|
||||
Button,
|
||||
Frame,
|
||||
IntVar,
|
||||
Label,
|
||||
Menu,
|
||||
OptionMenu,
|
||||
Scrollbar,
|
||||
StringVar,
|
||||
Text,
|
||||
Tk,
|
||||
)
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.corpus import (
|
||||
alpino,
|
||||
brown,
|
||||
cess_cat,
|
||||
cess_esp,
|
||||
floresta,
|
||||
indian,
|
||||
mac_morpho,
|
||||
machado,
|
||||
nps_chat,
|
||||
sinica_treebank,
|
||||
treebank,
|
||||
)
|
||||
from nltk.probability import FreqDist
|
||||
from nltk.util import in_idle
|
||||
|
||||
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
|
||||
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
|
||||
POLL_INTERVAL = 100
|
||||
|
||||
_DEFAULT = "English: Brown Corpus (Humor)"
|
||||
_CORPORA = {
|
||||
"Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
|
||||
"English: Brown Corpus": lambda: brown.words(),
|
||||
"English: Brown Corpus (Press)": lambda: brown.words(
|
||||
categories=["news", "editorial", "reviews"]
|
||||
),
|
||||
"English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
|
||||
"English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
|
||||
"English: Brown Corpus (Science Fiction)": lambda: brown.words(
|
||||
categories="science_fiction"
|
||||
),
|
||||
"English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
|
||||
"English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
|
||||
"English: NPS Chat Corpus": lambda: nps_chat.words(),
|
||||
"English: Wall Street Journal Corpus": lambda: treebank.words(),
|
||||
"Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
|
||||
"Dutch: Alpino Corpus": lambda: alpino.words(),
|
||||
"Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"),
|
||||
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(),
|
||||
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(),
|
||||
"Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
|
||||
"Spanish: CESS-ESP Corpus": lambda: cess_esp.words(),
|
||||
}
|
||||
|
||||
|
||||
class CollocationsView:
|
||||
_BACKGROUND_COLOUR = "#FFF" # white
|
||||
|
||||
def __init__(self):
|
||||
self.queue = q.Queue()
|
||||
self.model = CollocationsModel(self.queue)
|
||||
self.top = Tk()
|
||||
self._init_top(self.top)
|
||||
self._init_menubar()
|
||||
self._init_widgets(self.top)
|
||||
self.load_corpus(self.model.DEFAULT_CORPUS)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def _init_top(self, top):
|
||||
top.geometry("550x650+50+50")
|
||||
top.title("NLTK Collocations List")
|
||||
top.bind("<Control-q>", self.destroy)
|
||||
top.protocol("WM_DELETE_WINDOW", self.destroy)
|
||||
top.minsize(550, 650)
|
||||
|
||||
def _init_widgets(self, parent):
|
||||
self.main_frame = Frame(
|
||||
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
|
||||
)
|
||||
self._init_corpus_select(self.main_frame)
|
||||
self._init_results_box(self.main_frame)
|
||||
self._init_paging(self.main_frame)
|
||||
self._init_status(self.main_frame)
|
||||
self.main_frame.pack(fill="both", expand=True)
|
||||
|
||||
def _init_corpus_select(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.var = StringVar(innerframe)
|
||||
self.var.set(self.model.DEFAULT_CORPUS)
|
||||
Label(
|
||||
innerframe,
|
||||
justify=LEFT,
|
||||
text=" Corpus: ",
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
padx=2,
|
||||
pady=1,
|
||||
border=0,
|
||||
).pack(side="left")
|
||||
|
||||
other_corpora = list(self.model.CORPORA.keys()).remove(
|
||||
self.model.DEFAULT_CORPUS
|
||||
)
|
||||
om = OptionMenu(
|
||||
innerframe,
|
||||
self.var,
|
||||
self.model.DEFAULT_CORPUS,
|
||||
command=self.corpus_selected,
|
||||
*self.model.non_default_corpora()
|
||||
)
|
||||
om["borderwidth"] = 0
|
||||
om["highlightthickness"] = 1
|
||||
om.pack(side="left")
|
||||
innerframe.pack(side="top", fill="x", anchor="n")
|
||||
|
||||
def _init_status(self, parent):
|
||||
self.status = Label(
|
||||
parent,
|
||||
justify=LEFT,
|
||||
relief=SUNKEN,
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
border=0,
|
||||
padx=1,
|
||||
pady=0,
|
||||
)
|
||||
self.status.pack(side="top", anchor="sw")
|
||||
|
||||
def _init_menubar(self):
|
||||
self._result_size = IntVar(self.top)
|
||||
menubar = Menu(self.top)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
editmenu = Menu(menubar, tearoff=0)
|
||||
rescntmenu = Menu(editmenu, tearoff=0)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="20",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=20,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="50",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=50,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="100",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=100,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.invoke(1)
|
||||
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
|
||||
|
||||
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
||||
self.top.config(menu=menubar)
|
||||
|
||||
def set_result_size(self, **kwargs):
|
||||
self.model.result_count = self._result_size.get()
|
||||
|
||||
def _init_results_box(self, parent):
|
||||
innerframe = Frame(parent)
|
||||
i1 = Frame(innerframe)
|
||||
i2 = Frame(innerframe)
|
||||
vscrollbar = Scrollbar(i1, borderwidth=1)
|
||||
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
|
||||
self.results_box = Text(
|
||||
i1,
|
||||
font=Font(family="courier", size="16"),
|
||||
state="disabled",
|
||||
borderwidth=1,
|
||||
yscrollcommand=vscrollbar.set,
|
||||
xscrollcommand=hscrollbar.set,
|
||||
wrap="none",
|
||||
width="40",
|
||||
height="20",
|
||||
exportselection=1,
|
||||
)
|
||||
self.results_box.pack(side="left", fill="both", expand=True)
|
||||
vscrollbar.pack(side="left", fill="y", anchor="e")
|
||||
vscrollbar.config(command=self.results_box.yview)
|
||||
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
|
||||
hscrollbar.config(command=self.results_box.xview)
|
||||
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
|
||||
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
|
||||
side="left", anchor="e"
|
||||
)
|
||||
i1.pack(side="top", fill="both", expand=True, anchor="n")
|
||||
i2.pack(side="bottom", fill="x", anchor="s")
|
||||
innerframe.pack(side="top", fill="both", expand=True)
|
||||
|
||||
def _init_paging(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.prev = prev = Button(
|
||||
innerframe,
|
||||
text="Previous",
|
||||
command=self.previous,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
prev.pack(side="left", anchor="center")
|
||||
self.next = next = Button(
|
||||
innerframe,
|
||||
text="Next",
|
||||
command=self.__next__,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
next.pack(side="right", anchor="center")
|
||||
innerframe.pack(side="top", fill="y")
|
||||
self.reset_current_page()
|
||||
|
||||
def reset_current_page(self):
|
||||
self.current_page = -1
|
||||
|
||||
def _poll(self):
|
||||
try:
|
||||
event = self.queue.get(block=False)
|
||||
except q.Empty:
|
||||
pass
|
||||
else:
|
||||
if event == CORPUS_LOADED_EVENT:
|
||||
self.handle_corpus_loaded(event)
|
||||
elif event == ERROR_LOADING_CORPUS_EVENT:
|
||||
self.handle_error_loading_corpus(event)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def handle_error_loading_corpus(self, event):
|
||||
self.status["text"] = "Error in loading " + self.var.get()
|
||||
self.unfreeze_editable()
|
||||
self.clear_results_box()
|
||||
self.freeze_editable()
|
||||
self.reset_current_page()
|
||||
|
||||
def handle_corpus_loaded(self, event):
|
||||
self.status["text"] = self.var.get() + " is loaded"
|
||||
self.unfreeze_editable()
|
||||
self.clear_results_box()
|
||||
self.reset_current_page()
|
||||
# self.next()
|
||||
collocations = self.model.next(self.current_page + 1)
|
||||
self.write_results(collocations)
|
||||
self.current_page += 1
|
||||
|
||||
def corpus_selected(self, *args):
|
||||
new_selection = self.var.get()
|
||||
self.load_corpus(new_selection)
|
||||
|
||||
def previous(self):
|
||||
self.freeze_editable()
|
||||
collocations = self.model.prev(self.current_page - 1)
|
||||
self.current_page = self.current_page - 1
|
||||
self.clear_results_box()
|
||||
self.write_results(collocations)
|
||||
self.unfreeze_editable()
|
||||
|
||||
def __next__(self):
|
||||
self.freeze_editable()
|
||||
collocations = self.model.next(self.current_page + 1)
|
||||
self.clear_results_box()
|
||||
self.write_results(collocations)
|
||||
self.current_page += 1
|
||||
self.unfreeze_editable()
|
||||
|
||||
def load_corpus(self, selection):
|
||||
if self.model.selected_corpus != selection:
|
||||
self.status["text"] = "Loading " + selection + "..."
|
||||
self.freeze_editable()
|
||||
self.model.load_corpus(selection)
|
||||
|
||||
def freeze_editable(self):
|
||||
self.prev["state"] = "disabled"
|
||||
self.next["state"] = "disabled"
|
||||
|
||||
def clear_results_box(self):
|
||||
self.results_box["state"] = "normal"
|
||||
self.results_box.delete("1.0", END)
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
def fire_event(self, event):
|
||||
# Firing an event so that rendering of widgets happen in the mainloop thread
|
||||
self.top.event_generate(event, when="tail")
|
||||
|
||||
def destroy(self, *e):
|
||||
if self.top is None:
|
||||
return
|
||||
self.top.after_cancel(self.after)
|
||||
self.top.destroy()
|
||||
self.top = None
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
if in_idle():
|
||||
return
|
||||
self.top.mainloop(*args, **kwargs)
|
||||
|
||||
def unfreeze_editable(self):
|
||||
self.set_paging_button_states()
|
||||
|
||||
def set_paging_button_states(self):
|
||||
if self.current_page == -1 or self.current_page == 0:
|
||||
self.prev["state"] = "disabled"
|
||||
else:
|
||||
self.prev["state"] = "normal"
|
||||
if self.model.is_last_page(self.current_page):
|
||||
self.next["state"] = "disabled"
|
||||
else:
|
||||
self.next["state"] = "normal"
|
||||
|
||||
def write_results(self, results):
|
||||
self.results_box["state"] = "normal"
|
||||
row = 1
|
||||
for each in results:
|
||||
self.results_box.insert(str(row) + ".0", each[0] + " " + each[1] + "\n")
|
||||
row += 1
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
|
||||
class CollocationsModel:
|
||||
def __init__(self, queue):
|
||||
self.result_count = None
|
||||
self.selected_corpus = None
|
||||
self.collocations = None
|
||||
self.CORPORA = _CORPORA
|
||||
self.DEFAULT_CORPUS = _DEFAULT
|
||||
self.queue = queue
|
||||
self.reset_results()
|
||||
|
||||
def reset_results(self):
|
||||
self.result_pages = []
|
||||
self.results_returned = 0
|
||||
|
||||
def load_corpus(self, name):
|
||||
self.selected_corpus = name
|
||||
self.collocations = None
|
||||
runner_thread = self.LoadCorpus(name, self)
|
||||
runner_thread.start()
|
||||
self.reset_results()
|
||||
|
||||
def non_default_corpora(self):
|
||||
copy = []
|
||||
copy.extend(list(self.CORPORA.keys()))
|
||||
copy.remove(self.DEFAULT_CORPUS)
|
||||
copy.sort()
|
||||
return copy
|
||||
|
||||
def is_last_page(self, number):
|
||||
if number < len(self.result_pages):
|
||||
return False
|
||||
return self.results_returned + (
|
||||
number - len(self.result_pages)
|
||||
) * self.result_count >= len(self.collocations)
|
||||
|
||||
def next(self, page):
|
||||
if (len(self.result_pages) - 1) < page:
|
||||
for i in range(page - (len(self.result_pages) - 1)):
|
||||
self.result_pages.append(
|
||||
self.collocations[
|
||||
self.results_returned : self.results_returned
|
||||
+ self.result_count
|
||||
]
|
||||
)
|
||||
self.results_returned += self.result_count
|
||||
return self.result_pages[page]
|
||||
|
||||
def prev(self, page):
|
||||
if page == -1:
|
||||
return []
|
||||
return self.result_pages[page]
|
||||
|
||||
class LoadCorpus(threading.Thread):
|
||||
def __init__(self, name, model):
|
||||
threading.Thread.__init__(self)
|
||||
self.model, self.name = model, name
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
words = self.model.CORPORA[self.name]()
|
||||
from operator import itemgetter
|
||||
|
||||
text = [w for w in words if len(w) > 2]
|
||||
fd = FreqDist(tuple(text[i : i + 2]) for i in range(len(text) - 1))
|
||||
vocab = FreqDist(text)
|
||||
scored = [
|
||||
((w1, w2), fd[(w1, w2)] ** 3 / (vocab[w1] * vocab[w2]))
|
||||
for w1, w2 in fd
|
||||
]
|
||||
scored.sort(key=itemgetter(1), reverse=True)
|
||||
self.model.collocations = list(map(itemgetter(0), scored))
|
||||
self.model.queue.put(CORPUS_LOADED_EVENT)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
|
||||
|
||||
|
||||
# def collocations():
|
||||
# colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations[:num]]
|
||||
|
||||
|
||||
def app():
|
||||
c = CollocationsView()
|
||||
c.mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
709
backend/venv/Lib/site-packages/nltk/app/concordance_app.py
Normal file
709
backend/venv/Lib/site-packages/nltk/app/concordance_app.py
Normal file
@@ -0,0 +1,709 @@
|
||||
# Natural Language Toolkit: Concordance Application
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
import queue as q
|
||||
import re
|
||||
import threading
|
||||
from tkinter import (
|
||||
END,
|
||||
LEFT,
|
||||
SUNKEN,
|
||||
Button,
|
||||
Entry,
|
||||
Frame,
|
||||
IntVar,
|
||||
Label,
|
||||
Menu,
|
||||
OptionMenu,
|
||||
Scrollbar,
|
||||
StringVar,
|
||||
Text,
|
||||
Tk,
|
||||
)
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.corpus import (
|
||||
alpino,
|
||||
brown,
|
||||
cess_cat,
|
||||
cess_esp,
|
||||
floresta,
|
||||
indian,
|
||||
mac_morpho,
|
||||
nps_chat,
|
||||
sinica_treebank,
|
||||
treebank,
|
||||
)
|
||||
from nltk.draw.util import ShowText
|
||||
from nltk.util import in_idle
|
||||
|
||||
WORD_OR_TAG = "[^/ ]+"
|
||||
BOUNDARY = r"\b"
|
||||
|
||||
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
|
||||
SEARCH_TERMINATED_EVENT = "<<ST_EVENT>>"
|
||||
SEARCH_ERROR_EVENT = "<<SE_EVENT>>"
|
||||
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
|
||||
|
||||
POLL_INTERVAL = 50
|
||||
|
||||
# NB All corpora must be specified in a lambda expression so as not to be
|
||||
# loaded when the module is imported.
|
||||
|
||||
_DEFAULT = "English: Brown Corpus (Humor, simplified)"
|
||||
_CORPORA = {
|
||||
"Catalan: CESS-CAT Corpus (simplified)": lambda: cess_cat.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus": lambda: brown.tagged_sents(),
|
||||
"English: Brown Corpus (simplified)": lambda: brown.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Press, simplified)": lambda: brown.tagged_sents(
|
||||
categories=["news", "editorial", "reviews"], tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Religion, simplified)": lambda: brown.tagged_sents(
|
||||
categories="religion", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Learned, simplified)": lambda: brown.tagged_sents(
|
||||
categories="learned", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Science Fiction, simplified)": lambda: brown.tagged_sents(
|
||||
categories="science_fiction", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Romance, simplified)": lambda: brown.tagged_sents(
|
||||
categories="romance", tagset="universal"
|
||||
),
|
||||
"English: Brown Corpus (Humor, simplified)": lambda: brown.tagged_sents(
|
||||
categories="humor", tagset="universal"
|
||||
),
|
||||
"English: NPS Chat Corpus": lambda: nps_chat.tagged_posts(),
|
||||
"English: NPS Chat Corpus (simplified)": lambda: nps_chat.tagged_posts(
|
||||
tagset="universal"
|
||||
),
|
||||
"English: Wall Street Journal Corpus": lambda: treebank.tagged_sents(),
|
||||
"English: Wall Street Journal Corpus (simplified)": lambda: treebank.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Chinese: Sinica Corpus": lambda: sinica_treebank.tagged_sents(),
|
||||
"Chinese: Sinica Corpus (simplified)": lambda: sinica_treebank.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Dutch: Alpino Corpus": lambda: alpino.tagged_sents(),
|
||||
"Dutch: Alpino Corpus (simplified)": lambda: alpino.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Hindi: Indian Languages Corpus": lambda: indian.tagged_sents(files="hindi.pos"),
|
||||
"Hindi: Indian Languages Corpus (simplified)": lambda: indian.tagged_sents(
|
||||
files="hindi.pos", tagset="universal"
|
||||
),
|
||||
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.tagged_sents(),
|
||||
"Portuguese: Floresta Corpus (Portugal, simplified)": lambda: floresta.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.tagged_sents(),
|
||||
"Portuguese: MAC-MORPHO Corpus (Brazil, simplified)": lambda: mac_morpho.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
"Spanish: CESS-ESP Corpus (simplified)": lambda: cess_esp.tagged_sents(
|
||||
tagset="universal"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class ConcordanceSearchView:
|
||||
_BACKGROUND_COLOUR = "#FFF" # white
|
||||
|
||||
# Colour of highlighted results
|
||||
_HIGHLIGHT_WORD_COLOUR = "#F00" # red
|
||||
_HIGHLIGHT_WORD_TAG = "HL_WRD_TAG"
|
||||
|
||||
_HIGHLIGHT_LABEL_COLOUR = "#C0C0C0" # dark grey
|
||||
_HIGHLIGHT_LABEL_TAG = "HL_LBL_TAG"
|
||||
|
||||
# Percentage of text left of the scrollbar position
|
||||
_FRACTION_LEFT_TEXT = 0.30
|
||||
|
||||
def __init__(self):
|
||||
self.queue = q.Queue()
|
||||
self.model = ConcordanceSearchModel(self.queue)
|
||||
self.top = Tk()
|
||||
self._init_top(self.top)
|
||||
self._init_menubar()
|
||||
self._init_widgets(self.top)
|
||||
self.load_corpus(self.model.DEFAULT_CORPUS)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def _init_top(self, top):
|
||||
top.geometry("950x680+50+50")
|
||||
top.title("NLTK Concordance Search")
|
||||
top.bind("<Control-q>", self.destroy)
|
||||
top.protocol("WM_DELETE_WINDOW", self.destroy)
|
||||
top.minsize(950, 680)
|
||||
|
||||
def _init_widgets(self, parent):
|
||||
self.main_frame = Frame(
|
||||
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
|
||||
)
|
||||
self._init_corpus_select(self.main_frame)
|
||||
self._init_query_box(self.main_frame)
|
||||
self._init_results_box(self.main_frame)
|
||||
self._init_paging(self.main_frame)
|
||||
self._init_status(self.main_frame)
|
||||
self.main_frame.pack(fill="both", expand=True)
|
||||
|
||||
def _init_menubar(self):
|
||||
self._result_size = IntVar(self.top)
|
||||
self._cntx_bf_len = IntVar(self.top)
|
||||
self._cntx_af_len = IntVar(self.top)
|
||||
menubar = Menu(self.top)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
editmenu = Menu(menubar, tearoff=0)
|
||||
rescntmenu = Menu(editmenu, tearoff=0)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="20",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=20,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="50",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=50,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.add_radiobutton(
|
||||
label="100",
|
||||
variable=self._result_size,
|
||||
underline=0,
|
||||
value=100,
|
||||
command=self.set_result_size,
|
||||
)
|
||||
rescntmenu.invoke(1)
|
||||
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
|
||||
|
||||
cntxmenu = Menu(editmenu, tearoff=0)
|
||||
cntxbfmenu = Menu(cntxmenu, tearoff=0)
|
||||
cntxbfmenu.add_radiobutton(
|
||||
label="60 characters",
|
||||
variable=self._cntx_bf_len,
|
||||
underline=0,
|
||||
value=60,
|
||||
command=self.set_cntx_bf_len,
|
||||
)
|
||||
cntxbfmenu.add_radiobutton(
|
||||
label="80 characters",
|
||||
variable=self._cntx_bf_len,
|
||||
underline=0,
|
||||
value=80,
|
||||
command=self.set_cntx_bf_len,
|
||||
)
|
||||
cntxbfmenu.add_radiobutton(
|
||||
label="100 characters",
|
||||
variable=self._cntx_bf_len,
|
||||
underline=0,
|
||||
value=100,
|
||||
command=self.set_cntx_bf_len,
|
||||
)
|
||||
cntxbfmenu.invoke(1)
|
||||
cntxmenu.add_cascade(label="Before", underline=0, menu=cntxbfmenu)
|
||||
|
||||
cntxafmenu = Menu(cntxmenu, tearoff=0)
|
||||
cntxafmenu.add_radiobutton(
|
||||
label="70 characters",
|
||||
variable=self._cntx_af_len,
|
||||
underline=0,
|
||||
value=70,
|
||||
command=self.set_cntx_af_len,
|
||||
)
|
||||
cntxafmenu.add_radiobutton(
|
||||
label="90 characters",
|
||||
variable=self._cntx_af_len,
|
||||
underline=0,
|
||||
value=90,
|
||||
command=self.set_cntx_af_len,
|
||||
)
|
||||
cntxafmenu.add_radiobutton(
|
||||
label="110 characters",
|
||||
variable=self._cntx_af_len,
|
||||
underline=0,
|
||||
value=110,
|
||||
command=self.set_cntx_af_len,
|
||||
)
|
||||
cntxafmenu.invoke(1)
|
||||
cntxmenu.add_cascade(label="After", underline=0, menu=cntxafmenu)
|
||||
|
||||
editmenu.add_cascade(label="Context", underline=0, menu=cntxmenu)
|
||||
|
||||
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
||||
|
||||
self.top.config(menu=menubar)
|
||||
|
||||
def set_result_size(self, **kwargs):
|
||||
self.model.result_count = self._result_size.get()
|
||||
|
||||
def set_cntx_af_len(self, **kwargs):
|
||||
self._char_after = self._cntx_af_len.get()
|
||||
|
||||
def set_cntx_bf_len(self, **kwargs):
|
||||
self._char_before = self._cntx_bf_len.get()
|
||||
|
||||
def _init_corpus_select(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.var = StringVar(innerframe)
|
||||
self.var.set(self.model.DEFAULT_CORPUS)
|
||||
Label(
|
||||
innerframe,
|
||||
justify=LEFT,
|
||||
text=" Corpus: ",
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
padx=2,
|
||||
pady=1,
|
||||
border=0,
|
||||
).pack(side="left")
|
||||
|
||||
other_corpora = list(self.model.CORPORA.keys()).remove(
|
||||
self.model.DEFAULT_CORPUS
|
||||
)
|
||||
om = OptionMenu(
|
||||
innerframe,
|
||||
self.var,
|
||||
self.model.DEFAULT_CORPUS,
|
||||
command=self.corpus_selected,
|
||||
*self.model.non_default_corpora()
|
||||
)
|
||||
om["borderwidth"] = 0
|
||||
om["highlightthickness"] = 1
|
||||
om.pack(side="left")
|
||||
innerframe.pack(side="top", fill="x", anchor="n")
|
||||
|
||||
def _init_status(self, parent):
|
||||
self.status = Label(
|
||||
parent,
|
||||
justify=LEFT,
|
||||
relief=SUNKEN,
|
||||
background=self._BACKGROUND_COLOUR,
|
||||
border=0,
|
||||
padx=1,
|
||||
pady=0,
|
||||
)
|
||||
self.status.pack(side="top", anchor="sw")
|
||||
|
||||
def _init_query_box(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
another = Frame(innerframe, background=self._BACKGROUND_COLOUR)
|
||||
self.query_box = Entry(another, width=60)
|
||||
self.query_box.pack(side="left", fill="x", pady=25, anchor="center")
|
||||
self.search_button = Button(
|
||||
another,
|
||||
text="Search",
|
||||
command=self.search,
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
)
|
||||
self.search_button.pack(side="left", fill="x", pady=25, anchor="center")
|
||||
self.query_box.bind("<KeyPress-Return>", self.search_enter_keypress_handler)
|
||||
another.pack()
|
||||
innerframe.pack(side="top", fill="x", anchor="n")
|
||||
|
||||
def search_enter_keypress_handler(self, *event):
|
||||
self.search()
|
||||
|
||||
def _init_results_box(self, parent):
|
||||
innerframe = Frame(parent)
|
||||
i1 = Frame(innerframe)
|
||||
i2 = Frame(innerframe)
|
||||
vscrollbar = Scrollbar(i1, borderwidth=1)
|
||||
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
|
||||
self.results_box = Text(
|
||||
i1,
|
||||
font=Font(family="courier", size="16"),
|
||||
state="disabled",
|
||||
borderwidth=1,
|
||||
yscrollcommand=vscrollbar.set,
|
||||
xscrollcommand=hscrollbar.set,
|
||||
wrap="none",
|
||||
width="40",
|
||||
height="20",
|
||||
exportselection=1,
|
||||
)
|
||||
self.results_box.pack(side="left", fill="both", expand=True)
|
||||
self.results_box.tag_config(
|
||||
self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR
|
||||
)
|
||||
self.results_box.tag_config(
|
||||
self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR
|
||||
)
|
||||
vscrollbar.pack(side="left", fill="y", anchor="e")
|
||||
vscrollbar.config(command=self.results_box.yview)
|
||||
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
|
||||
hscrollbar.config(command=self.results_box.xview)
|
||||
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
|
||||
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
|
||||
side="left", anchor="e"
|
||||
)
|
||||
i1.pack(side="top", fill="both", expand=True, anchor="n")
|
||||
i2.pack(side="bottom", fill="x", anchor="s")
|
||||
innerframe.pack(side="top", fill="both", expand=True)
|
||||
|
||||
def _init_paging(self, parent):
|
||||
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
||||
self.prev = prev = Button(
|
||||
innerframe,
|
||||
text="Previous",
|
||||
command=self.previous,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
prev.pack(side="left", anchor="center")
|
||||
self.next = next = Button(
|
||||
innerframe,
|
||||
text="Next",
|
||||
command=self.__next__,
|
||||
width="10",
|
||||
borderwidth=1,
|
||||
highlightthickness=1,
|
||||
state="disabled",
|
||||
)
|
||||
next.pack(side="right", anchor="center")
|
||||
innerframe.pack(side="top", fill="y")
|
||||
self.current_page = 0
|
||||
|
||||
def previous(self):
|
||||
self.clear_results_box()
|
||||
self.freeze_editable()
|
||||
self.model.prev(self.current_page - 1)
|
||||
|
||||
def __next__(self):
|
||||
self.clear_results_box()
|
||||
self.freeze_editable()
|
||||
self.model.next(self.current_page + 1)
|
||||
|
||||
def about(self, *e):
|
||||
ABOUT = "NLTK Concordance Search Demo\n"
|
||||
TITLE = "About: NLTK Concordance Search Demo"
|
||||
try:
|
||||
from tkinter.messagebox import Message
|
||||
|
||||
Message(message=ABOUT, title=TITLE, parent=self.main_frame).show()
|
||||
except:
|
||||
ShowText(self.top, TITLE, ABOUT)
|
||||
|
||||
def _bind_event_handlers(self):
|
||||
self.top.bind(CORPUS_LOADED_EVENT, self.handle_corpus_loaded)
|
||||
self.top.bind(SEARCH_TERMINATED_EVENT, self.handle_search_terminated)
|
||||
self.top.bind(SEARCH_ERROR_EVENT, self.handle_search_error)
|
||||
self.top.bind(ERROR_LOADING_CORPUS_EVENT, self.handle_error_loading_corpus)
|
||||
|
||||
def _poll(self):
|
||||
try:
|
||||
event = self.queue.get(block=False)
|
||||
except q.Empty:
|
||||
pass
|
||||
else:
|
||||
if event == CORPUS_LOADED_EVENT:
|
||||
self.handle_corpus_loaded(event)
|
||||
elif event == SEARCH_TERMINATED_EVENT:
|
||||
self.handle_search_terminated(event)
|
||||
elif event == SEARCH_ERROR_EVENT:
|
||||
self.handle_search_error(event)
|
||||
elif event == ERROR_LOADING_CORPUS_EVENT:
|
||||
self.handle_error_loading_corpus(event)
|
||||
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
||||
|
||||
def handle_error_loading_corpus(self, event):
|
||||
self.status["text"] = "Error in loading " + self.var.get()
|
||||
self.unfreeze_editable()
|
||||
self.clear_all()
|
||||
self.freeze_editable()
|
||||
|
||||
def handle_corpus_loaded(self, event):
|
||||
self.status["text"] = self.var.get() + " is loaded"
|
||||
self.unfreeze_editable()
|
||||
self.clear_all()
|
||||
self.query_box.focus_set()
|
||||
|
||||
def handle_search_terminated(self, event):
|
||||
# todo: refactor the model such that it is less state sensitive
|
||||
results = self.model.get_results()
|
||||
self.write_results(results)
|
||||
self.status["text"] = ""
|
||||
if len(results) == 0:
|
||||
self.status["text"] = "No results found for " + self.model.query
|
||||
else:
|
||||
self.current_page = self.model.last_requested_page
|
||||
self.unfreeze_editable()
|
||||
self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT)
|
||||
|
||||
def handle_search_error(self, event):
|
||||
self.status["text"] = "Error in query " + self.model.query
|
||||
self.unfreeze_editable()
|
||||
|
||||
def corpus_selected(self, *args):
|
||||
new_selection = self.var.get()
|
||||
self.load_corpus(new_selection)
|
||||
|
||||
def load_corpus(self, selection):
|
||||
if self.model.selected_corpus != selection:
|
||||
self.status["text"] = "Loading " + selection + "..."
|
||||
self.freeze_editable()
|
||||
self.model.load_corpus(selection)
|
||||
|
||||
def search(self):
|
||||
self.current_page = 0
|
||||
self.clear_results_box()
|
||||
self.model.reset_results()
|
||||
query = self.query_box.get()
|
||||
if len(query.strip()) == 0:
|
||||
return
|
||||
self.status["text"] = "Searching for " + query
|
||||
self.freeze_editable()
|
||||
self.model.search(query, self.current_page + 1)
|
||||
|
||||
def write_results(self, results):
|
||||
self.results_box["state"] = "normal"
|
||||
row = 1
|
||||
for each in results:
|
||||
sent, pos1, pos2 = each[0].strip(), each[1], each[2]
|
||||
if len(sent) != 0:
|
||||
if pos1 < self._char_before:
|
||||
sent, pos1, pos2 = self.pad(sent, pos1, pos2)
|
||||
sentence = sent[pos1 - self._char_before : pos1 + self._char_after]
|
||||
if not row == len(results):
|
||||
sentence += "\n"
|
||||
self.results_box.insert(str(row) + ".0", sentence)
|
||||
word_markers, label_markers = self.words_and_labels(sent, pos1, pos2)
|
||||
for marker in word_markers:
|
||||
self.results_box.tag_add(
|
||||
self._HIGHLIGHT_WORD_TAG,
|
||||
str(row) + "." + str(marker[0]),
|
||||
str(row) + "." + str(marker[1]),
|
||||
)
|
||||
for marker in label_markers:
|
||||
self.results_box.tag_add(
|
||||
self._HIGHLIGHT_LABEL_TAG,
|
||||
str(row) + "." + str(marker[0]),
|
||||
str(row) + "." + str(marker[1]),
|
||||
)
|
||||
row += 1
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
def words_and_labels(self, sentence, pos1, pos2):
|
||||
search_exp = sentence[pos1:pos2]
|
||||
words, labels = [], []
|
||||
labeled_words = search_exp.split(" ")
|
||||
index = 0
|
||||
for each in labeled_words:
|
||||
if each == "":
|
||||
index += 1
|
||||
else:
|
||||
word, label = each.split("/")
|
||||
words.append(
|
||||
(self._char_before + index, self._char_before + index + len(word))
|
||||
)
|
||||
index += len(word) + 1
|
||||
labels.append(
|
||||
(self._char_before + index, self._char_before + index + len(label))
|
||||
)
|
||||
index += len(label)
|
||||
index += 1
|
||||
return words, labels
|
||||
|
||||
def pad(self, sent, hstart, hend):
|
||||
if hstart >= self._char_before:
|
||||
return sent, hstart, hend
|
||||
d = self._char_before - hstart
|
||||
sent = "".join([" "] * d) + sent
|
||||
return sent, hstart + d, hend + d
|
||||
|
||||
def destroy(self, *e):
|
||||
if self.top is None:
|
||||
return
|
||||
self.top.after_cancel(self.after)
|
||||
self.top.destroy()
|
||||
self.top = None
|
||||
|
||||
def clear_all(self):
|
||||
self.query_box.delete(0, END)
|
||||
self.model.reset_query()
|
||||
self.clear_results_box()
|
||||
|
||||
def clear_results_box(self):
|
||||
self.results_box["state"] = "normal"
|
||||
self.results_box.delete("1.0", END)
|
||||
self.results_box["state"] = "disabled"
|
||||
|
||||
def freeze_editable(self):
|
||||
self.query_box["state"] = "disabled"
|
||||
self.search_button["state"] = "disabled"
|
||||
self.prev["state"] = "disabled"
|
||||
self.next["state"] = "disabled"
|
||||
|
||||
def unfreeze_editable(self):
|
||||
self.query_box["state"] = "normal"
|
||||
self.search_button["state"] = "normal"
|
||||
self.set_paging_button_states()
|
||||
|
||||
def set_paging_button_states(self):
|
||||
if self.current_page == 0 or self.current_page == 1:
|
||||
self.prev["state"] = "disabled"
|
||||
else:
|
||||
self.prev["state"] = "normal"
|
||||
if self.model.has_more_pages(self.current_page):
|
||||
self.next["state"] = "normal"
|
||||
else:
|
||||
self.next["state"] = "disabled"
|
||||
|
||||
def fire_event(self, event):
|
||||
# Firing an event so that rendering of widgets happen in the mainloop thread
|
||||
self.top.event_generate(event, when="tail")
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
if in_idle():
|
||||
return
|
||||
self.top.mainloop(*args, **kwargs)
|
||||
|
||||
|
||||
class ConcordanceSearchModel:
|
||||
def __init__(self, queue):
|
||||
self.queue = queue
|
||||
self.CORPORA = _CORPORA
|
||||
self.DEFAULT_CORPUS = _DEFAULT
|
||||
self.selected_corpus = None
|
||||
self.reset_query()
|
||||
self.reset_results()
|
||||
self.result_count = None
|
||||
self.last_sent_searched = 0
|
||||
|
||||
def non_default_corpora(self):
|
||||
copy = []
|
||||
copy.extend(list(self.CORPORA.keys()))
|
||||
copy.remove(self.DEFAULT_CORPUS)
|
||||
copy.sort()
|
||||
return copy
|
||||
|
||||
def load_corpus(self, name):
|
||||
self.selected_corpus = name
|
||||
self.tagged_sents = []
|
||||
runner_thread = self.LoadCorpus(name, self)
|
||||
runner_thread.start()
|
||||
|
||||
def search(self, query, page):
|
||||
self.query = query
|
||||
self.last_requested_page = page
|
||||
self.SearchCorpus(self, page, self.result_count).start()
|
||||
|
||||
def next(self, page):
|
||||
self.last_requested_page = page
|
||||
if len(self.results) < page:
|
||||
self.search(self.query, page)
|
||||
else:
|
||||
self.queue.put(SEARCH_TERMINATED_EVENT)
|
||||
|
||||
def prev(self, page):
|
||||
self.last_requested_page = page
|
||||
self.queue.put(SEARCH_TERMINATED_EVENT)
|
||||
|
||||
def reset_results(self):
|
||||
self.last_sent_searched = 0
|
||||
self.results = []
|
||||
self.last_page = None
|
||||
|
||||
def reset_query(self):
|
||||
self.query = None
|
||||
|
||||
def set_results(self, page, resultset):
|
||||
self.results.insert(page - 1, resultset)
|
||||
|
||||
def get_results(self):
|
||||
return self.results[self.last_requested_page - 1]
|
||||
|
||||
def has_more_pages(self, page):
|
||||
if self.results == [] or self.results[0] == []:
|
||||
return False
|
||||
if self.last_page is None:
|
||||
return True
|
||||
return page < self.last_page
|
||||
|
||||
class LoadCorpus(threading.Thread):
|
||||
def __init__(self, name, model):
|
||||
threading.Thread.__init__(self)
|
||||
self.model, self.name = model, name
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
ts = self.model.CORPORA[self.name]()
|
||||
self.model.tagged_sents = [
|
||||
" ".join(w + "/" + t for (w, t) in sent) for sent in ts
|
||||
]
|
||||
self.model.queue.put(CORPUS_LOADED_EVENT)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
|
||||
|
||||
class SearchCorpus(threading.Thread):
|
||||
def __init__(self, model, page, count):
|
||||
self.model, self.count, self.page = model, count, page
|
||||
threading.Thread.__init__(self)
|
||||
|
||||
def run(self):
|
||||
q = self.processed_query()
|
||||
sent_pos, i, sent_count = [], 0, 0
|
||||
for sent in self.model.tagged_sents[self.model.last_sent_searched :]:
|
||||
try:
|
||||
m = re.search(q, sent)
|
||||
except re.error:
|
||||
self.model.reset_results()
|
||||
self.model.queue.put(SEARCH_ERROR_EVENT)
|
||||
return
|
||||
if m:
|
||||
sent_pos.append((sent, m.start(), m.end()))
|
||||
i += 1
|
||||
if i > self.count:
|
||||
self.model.last_sent_searched += sent_count - 1
|
||||
break
|
||||
sent_count += 1
|
||||
if self.count >= len(sent_pos):
|
||||
self.model.last_sent_searched += sent_count - 1
|
||||
self.model.last_page = self.page
|
||||
self.model.set_results(self.page, sent_pos)
|
||||
else:
|
||||
self.model.set_results(self.page, sent_pos[:-1])
|
||||
self.model.queue.put(SEARCH_TERMINATED_EVENT)
|
||||
|
||||
def processed_query(self):
|
||||
new = []
|
||||
for term in self.model.query.split():
|
||||
term = re.sub(r"\.", r"[^/ ]", term)
|
||||
if re.match("[A-Z]+$", term):
|
||||
new.append(BOUNDARY + WORD_OR_TAG + "/" + term + BOUNDARY)
|
||||
elif "/" in term:
|
||||
new.append(BOUNDARY + term + BOUNDARY)
|
||||
else:
|
||||
new.append(BOUNDARY + term + "/" + WORD_OR_TAG + BOUNDARY)
|
||||
return " ".join(new)
|
||||
|
||||
|
||||
def app():
|
||||
d = ConcordanceSearchView()
|
||||
d.mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
163
backend/venv/Lib/site-packages/nltk/app/nemo_app.py
Normal file
163
backend/venv/Lib/site-packages/nltk/app/nemo_app.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# Finding (and Replacing) Nemo, Version 1.1, Aristide Grange 2006/06/06
|
||||
# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783
|
||||
|
||||
"""
|
||||
Finding (and Replacing) Nemo
|
||||
|
||||
Instant Regular Expressions
|
||||
Created by Aristide Grange
|
||||
"""
|
||||
import itertools
|
||||
import re
|
||||
from tkinter import SEL_FIRST, SEL_LAST, Frame, Label, PhotoImage, Scrollbar, Text, Tk
|
||||
|
||||
windowTitle = "Finding (and Replacing) Nemo"
|
||||
initialFind = r"n(.*?)e(.*?)m(.*?)o"
|
||||
initialRepl = r"M\1A\2K\3I"
|
||||
initialText = """\
|
||||
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
||||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
|
||||
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
"""
|
||||
images = {
|
||||
"FIND": "R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=",
|
||||
"find": "R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7",
|
||||
"REPL": "R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7",
|
||||
"repl": "R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=",
|
||||
}
|
||||
colors = ["#FF7B39", "#80F121"]
|
||||
emphColors = ["#DAFC33", "#F42548"]
|
||||
fieldParams = {
|
||||
"height": 3,
|
||||
"width": 70,
|
||||
"font": ("monaco", 14),
|
||||
"highlightthickness": 0,
|
||||
"borderwidth": 0,
|
||||
"background": "white",
|
||||
}
|
||||
textParams = {
|
||||
"bg": "#F7E0D4",
|
||||
"fg": "#2321F1",
|
||||
"highlightthickness": 0,
|
||||
"width": 1,
|
||||
"height": 10,
|
||||
"font": ("verdana", 16),
|
||||
"wrap": "word",
|
||||
}
|
||||
|
||||
|
||||
class Zone:
|
||||
def __init__(self, image, initialField, initialText):
|
||||
frm = Frame(root)
|
||||
frm.config(background="white")
|
||||
self.image = PhotoImage(format="gif", data=images[image.upper()])
|
||||
self.imageDimmed = PhotoImage(format="gif", data=images[image])
|
||||
self.img = Label(frm)
|
||||
self.img.config(borderwidth=0)
|
||||
self.img.pack(side="left")
|
||||
self.fld = Text(frm, **fieldParams)
|
||||
self.initScrollText(frm, self.fld, initialField)
|
||||
frm = Frame(root)
|
||||
self.txt = Text(frm, **textParams)
|
||||
self.initScrollText(frm, self.txt, initialText)
|
||||
for i in range(2):
|
||||
self.txt.tag_config(colors[i], background=colors[i])
|
||||
self.txt.tag_config("emph" + colors[i], foreground=emphColors[i])
|
||||
|
||||
def initScrollText(self, frm, txt, contents):
|
||||
scl = Scrollbar(frm)
|
||||
scl.config(command=txt.yview)
|
||||
scl.pack(side="right", fill="y")
|
||||
txt.pack(side="left", expand=True, fill="x")
|
||||
txt.config(yscrollcommand=scl.set)
|
||||
txt.insert("1.0", contents)
|
||||
frm.pack(fill="x")
|
||||
Frame(height=2, bd=1, relief="ridge").pack(fill="x")
|
||||
|
||||
def refresh(self):
|
||||
self.colorCycle = itertools.cycle(colors)
|
||||
try:
|
||||
self.substitute()
|
||||
self.img.config(image=self.image)
|
||||
except re.error:
|
||||
self.img.config(image=self.imageDimmed)
|
||||
|
||||
|
||||
class FindZone(Zone):
|
||||
def addTags(self, m):
|
||||
color = next(self.colorCycle)
|
||||
self.txt.tag_add(color, "1.0+%sc" % m.start(), "1.0+%sc" % m.end())
|
||||
try:
|
||||
self.txt.tag_add(
|
||||
"emph" + color, "1.0+%sc" % m.start("emph"), "1.0+%sc" % m.end("emph")
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
def substitute(self, *args):
|
||||
for color in colors:
|
||||
self.txt.tag_remove(color, "1.0", "end")
|
||||
self.txt.tag_remove("emph" + color, "1.0", "end")
|
||||
self.rex = re.compile("") # default value in case of malformed regexp
|
||||
self.rex = re.compile(self.fld.get("1.0", "end")[:-1], re.MULTILINE)
|
||||
try:
|
||||
re.compile("(?P<emph>%s)" % self.fld.get(SEL_FIRST, SEL_LAST))
|
||||
self.rexSel = re.compile(
|
||||
"%s(?P<emph>%s)%s"
|
||||
% (
|
||||
self.fld.get("1.0", SEL_FIRST),
|
||||
self.fld.get(SEL_FIRST, SEL_LAST),
|
||||
self.fld.get(SEL_LAST, "end")[:-1],
|
||||
),
|
||||
re.MULTILINE,
|
||||
)
|
||||
except:
|
||||
self.rexSel = self.rex
|
||||
self.rexSel.sub(self.addTags, self.txt.get("1.0", "end"))
|
||||
|
||||
|
||||
class ReplaceZone(Zone):
|
||||
def addTags(self, m):
|
||||
s = sz.rex.sub(self.repl, m.group())
|
||||
self.txt.delete(
|
||||
"1.0+%sc" % (m.start() + self.diff), "1.0+%sc" % (m.end() + self.diff)
|
||||
)
|
||||
self.txt.insert("1.0+%sc" % (m.start() + self.diff), s, next(self.colorCycle))
|
||||
self.diff += len(s) - (m.end() - m.start())
|
||||
|
||||
def substitute(self):
|
||||
self.txt.delete("1.0", "end")
|
||||
self.txt.insert("1.0", sz.txt.get("1.0", "end")[:-1])
|
||||
self.diff = 0
|
||||
self.repl = rex0.sub(r"\\g<\1>", self.fld.get("1.0", "end")[:-1])
|
||||
sz.rex.sub(self.addTags, sz.txt.get("1.0", "end")[:-1])
|
||||
|
||||
|
||||
def launchRefresh(_):
|
||||
sz.fld.after_idle(sz.refresh)
|
||||
rz.fld.after_idle(rz.refresh)
|
||||
|
||||
|
||||
def app():
|
||||
global root, sz, rz, rex0
|
||||
root = Tk()
|
||||
root.resizable(height=False, width=True)
|
||||
root.title(windowTitle)
|
||||
root.minsize(width=250, height=0)
|
||||
sz = FindZone("find", initialFind, initialText)
|
||||
sz.fld.bind("<Button-1>", launchRefresh)
|
||||
sz.fld.bind("<ButtonRelease-1>", launchRefresh)
|
||||
sz.fld.bind("<B1-Motion>", launchRefresh)
|
||||
sz.rexSel = re.compile("")
|
||||
rz = ReplaceZone("repl", initialRepl, "")
|
||||
rex0 = re.compile(r"(?<!\\)\\([0-9]+)")
|
||||
root.bind_all("<Key>", launchRefresh)
|
||||
launchRefresh(None)
|
||||
root.mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
1052
backend/venv/Lib/site-packages/nltk/app/rdparser_app.py
Normal file
1052
backend/venv/Lib/site-packages/nltk/app/rdparser_app.py
Normal file
File diff suppressed because it is too large
Load Diff
937
backend/venv/Lib/site-packages/nltk/app/srparser_app.py
Normal file
937
backend/venv/Lib/site-packages/nltk/app/srparser_app.py
Normal file
@@ -0,0 +1,937 @@
|
||||
# Natural Language Toolkit: Shift-Reduce Parser Application
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Edward Loper <edloper@gmail.com>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
A graphical tool for exploring the shift-reduce parser.
|
||||
|
||||
The shift-reduce parser maintains a stack, which records the structure
|
||||
of the portion of the text that has been parsed. The stack is
|
||||
initially empty. Its contents are shown on the left side of the main
|
||||
canvas.
|
||||
|
||||
On the right side of the main canvas is the remaining text. This is
|
||||
the portion of the text which has not yet been considered by the
|
||||
parser.
|
||||
|
||||
The parser builds up a tree structure for the text using two
|
||||
operations:
|
||||
|
||||
- "shift" moves the first token from the remaining text to the top
|
||||
of the stack. In the demo, the top of the stack is its right-hand
|
||||
side.
|
||||
- "reduce" uses a grammar production to combine the rightmost stack
|
||||
elements into a single tree token.
|
||||
|
||||
You can control the parser's operation by using the "shift" and
|
||||
"reduce" buttons; or you can use the "step" button to let the parser
|
||||
automatically decide which operation to apply. The parser uses the
|
||||
following rules to decide which operation to apply:
|
||||
|
||||
- Only shift if no reductions are available.
|
||||
- If multiple reductions are available, then apply the reduction
|
||||
whose CFG production is listed earliest in the grammar.
|
||||
|
||||
The "reduce" button applies the reduction whose CFG production is
|
||||
listed earliest in the grammar. There are two ways to manually choose
|
||||
which reduction to apply:
|
||||
|
||||
- Click on a CFG production from the list of available reductions,
|
||||
on the left side of the main window. The reduction based on that
|
||||
production will be applied to the top of the stack.
|
||||
- Click on one of the stack elements. A popup window will appear,
|
||||
containing all available reductions. Select one, and it will be
|
||||
applied to the top of the stack.
|
||||
|
||||
Note that reductions can only be applied to the top of the stack.
|
||||
|
||||
Keyboard Shortcuts::
|
||||
[Space]\t Perform the next shift or reduce operation
|
||||
[s]\t Perform a shift operation
|
||||
[r]\t Perform a reduction operation
|
||||
[Ctrl-z]\t Undo most recent operation
|
||||
[Delete]\t Reset the parser
|
||||
[g]\t Show/hide available production list
|
||||
[Ctrl-a]\t Toggle animations
|
||||
[h]\t Help
|
||||
[Ctrl-p]\t Print
|
||||
[q]\t Quit
|
||||
|
||||
"""
|
||||
|
||||
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
|
||||
from tkinter.font import Font
|
||||
|
||||
from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
|
||||
from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget
|
||||
from nltk.parse import SteppingShiftReduceParser
|
||||
from nltk.tree import Tree
|
||||
from nltk.util import in_idle
|
||||
|
||||
"""
|
||||
Possible future improvements:
|
||||
- button/window to change and/or select text. Just pop up a window
|
||||
with an entry, and let them modify the text; and then retokenize
|
||||
it? Maybe give a warning if it contains tokens whose types are
|
||||
not in the grammar.
|
||||
- button/window to change and/or select grammar. Select from
|
||||
several alternative grammars? Or actually change the grammar? If
|
||||
the later, then I'd want to define nltk.draw.cfg, which would be
|
||||
responsible for that.
|
||||
"""
|
||||
|
||||
|
||||
class ShiftReduceApp:
|
||||
"""
|
||||
A graphical tool for exploring the shift-reduce parser. The tool
|
||||
displays the parser's stack and the remaining text, and allows the
|
||||
user to control the parser's operation. In particular, the user
|
||||
can shift tokens onto the stack, and can perform reductions on the
|
||||
top elements of the stack. A "step" button simply steps through
|
||||
the parsing process, performing the operations that
|
||||
``nltk.parse.ShiftReduceParser`` would use.
|
||||
"""
|
||||
|
||||
def __init__(self, grammar, sent, trace=0):
|
||||
self._sent = sent
|
||||
self._parser = SteppingShiftReduceParser(grammar, trace)
|
||||
|
||||
# Set up the main window.
|
||||
self._top = Tk()
|
||||
self._top.title("Shift Reduce Parser Application")
|
||||
|
||||
# Animations. animating_lock is a lock to prevent the demo
|
||||
# from performing new operations while it's animating.
|
||||
self._animating_lock = 0
|
||||
self._animate = IntVar(self._top)
|
||||
self._animate.set(10) # = medium
|
||||
|
||||
# The user can hide the grammar.
|
||||
self._show_grammar = IntVar(self._top)
|
||||
self._show_grammar.set(1)
|
||||
|
||||
# Initialize fonts.
|
||||
self._init_fonts(self._top)
|
||||
|
||||
# Set up key bindings.
|
||||
self._init_bindings()
|
||||
|
||||
# Create the basic frames.
|
||||
self._init_menubar(self._top)
|
||||
self._init_buttons(self._top)
|
||||
self._init_feedback(self._top)
|
||||
self._init_grammar(self._top)
|
||||
self._init_canvas(self._top)
|
||||
|
||||
# A popup menu for reducing.
|
||||
self._reduce_menu = Menu(self._canvas, tearoff=0)
|
||||
|
||||
# Reset the demo, and set the feedback frame to empty.
|
||||
self.reset()
|
||||
self._lastoper1["text"] = ""
|
||||
|
||||
#########################################
|
||||
## Initialization Helpers
|
||||
#########################################
|
||||
|
||||
def _init_fonts(self, root):
|
||||
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
||||
self._sysfont = Font(font=Button()["font"])
|
||||
root.option_add("*Font", self._sysfont)
|
||||
|
||||
# TWhat's our font size (default=same as sysfont)
|
||||
self._size = IntVar(root)
|
||||
self._size.set(self._sysfont.cget("size"))
|
||||
|
||||
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
|
||||
self._font = Font(family="helvetica", size=self._size.get())
|
||||
|
||||
def _init_grammar(self, parent):
|
||||
# Grammar view.
|
||||
self._prodframe = listframe = Frame(parent)
|
||||
self._prodframe.pack(fill="both", side="left", padx=2)
|
||||
self._prodlist_label = Label(
|
||||
self._prodframe, font=self._boldfont, text="Available Reductions"
|
||||
)
|
||||
self._prodlist_label.pack()
|
||||
self._prodlist = Listbox(
|
||||
self._prodframe,
|
||||
selectmode="single",
|
||||
relief="groove",
|
||||
background="white",
|
||||
foreground="#909090",
|
||||
font=self._font,
|
||||
selectforeground="#004040",
|
||||
selectbackground="#c0f0c0",
|
||||
)
|
||||
|
||||
self._prodlist.pack(side="right", fill="both", expand=1)
|
||||
|
||||
self._productions = list(self._parser.grammar().productions())
|
||||
for production in self._productions:
|
||||
self._prodlist.insert("end", (" %s" % production))
|
||||
self._prodlist.config(height=min(len(self._productions), 25))
|
||||
|
||||
# Add a scrollbar if there are more than 25 productions.
|
||||
if 1: # len(self._productions) > 25:
|
||||
listscroll = Scrollbar(self._prodframe, orient="vertical")
|
||||
self._prodlist.config(yscrollcommand=listscroll.set)
|
||||
listscroll.config(command=self._prodlist.yview)
|
||||
listscroll.pack(side="left", fill="y")
|
||||
|
||||
# If they select a production, apply it.
|
||||
self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
|
||||
|
||||
# When they hover over a production, highlight it.
|
||||
self._hover = -1
|
||||
self._prodlist.bind("<Motion>", self._highlight_hover)
|
||||
self._prodlist.bind("<Leave>", self._clear_hover)
|
||||
|
||||
def _init_bindings(self):
|
||||
# Quit
|
||||
self._top.bind("<Control-q>", self.destroy)
|
||||
self._top.bind("<Control-x>", self.destroy)
|
||||
self._top.bind("<Alt-q>", self.destroy)
|
||||
self._top.bind("<Alt-x>", self.destroy)
|
||||
|
||||
# Ops (step, shift, reduce, undo)
|
||||
self._top.bind("<space>", self.step)
|
||||
self._top.bind("<s>", self.shift)
|
||||
self._top.bind("<Alt-s>", self.shift)
|
||||
self._top.bind("<Control-s>", self.shift)
|
||||
self._top.bind("<r>", self.reduce)
|
||||
self._top.bind("<Alt-r>", self.reduce)
|
||||
self._top.bind("<Control-r>", self.reduce)
|
||||
self._top.bind("<Delete>", self.reset)
|
||||
self._top.bind("<u>", self.undo)
|
||||
self._top.bind("<Alt-u>", self.undo)
|
||||
self._top.bind("<Control-u>", self.undo)
|
||||
self._top.bind("<Control-z>", self.undo)
|
||||
self._top.bind("<BackSpace>", self.undo)
|
||||
|
||||
# Misc
|
||||
self._top.bind("<Control-p>", self.postscript)
|
||||
self._top.bind("<Control-h>", self.help)
|
||||
self._top.bind("<F1>", self.help)
|
||||
self._top.bind("<Control-g>", self.edit_grammar)
|
||||
self._top.bind("<Control-t>", self.edit_sentence)
|
||||
|
||||
# Animation speed control
|
||||
self._top.bind("-", lambda e, a=self._animate: a.set(20))
|
||||
self._top.bind("=", lambda e, a=self._animate: a.set(10))
|
||||
self._top.bind("+", lambda e, a=self._animate: a.set(4))
|
||||
|
||||
def _init_buttons(self, parent):
|
||||
# Set up the frames.
|
||||
self._buttonframe = buttonframe = Frame(parent)
|
||||
buttonframe.pack(fill="none", side="bottom")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Step",
|
||||
background="#90c0d0",
|
||||
foreground="black",
|
||||
command=self.step,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Shift",
|
||||
underline=0,
|
||||
background="#90f090",
|
||||
foreground="black",
|
||||
command=self.shift,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Reduce",
|
||||
underline=0,
|
||||
background="#90f090",
|
||||
foreground="black",
|
||||
command=self.reduce,
|
||||
).pack(side="left")
|
||||
Button(
|
||||
buttonframe,
|
||||
text="Undo",
|
||||
underline=0,
|
||||
background="#f0a0a0",
|
||||
foreground="black",
|
||||
command=self.undo,
|
||||
).pack(side="left")
|
||||
|
||||
def _init_menubar(self, parent):
|
||||
menubar = Menu(parent)
|
||||
|
||||
filemenu = Menu(menubar, tearoff=0)
|
||||
filemenu.add_command(
|
||||
label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
|
||||
)
|
||||
filemenu.add_command(
|
||||
label="Print to Postscript",
|
||||
underline=0,
|
||||
command=self.postscript,
|
||||
accelerator="Ctrl-p",
|
||||
)
|
||||
filemenu.add_command(
|
||||
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
|
||||
)
|
||||
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
||||
|
||||
editmenu = Menu(menubar, tearoff=0)
|
||||
editmenu.add_command(
|
||||
label="Edit Grammar",
|
||||
underline=5,
|
||||
command=self.edit_grammar,
|
||||
accelerator="Ctrl-g",
|
||||
)
|
||||
editmenu.add_command(
|
||||
label="Edit Text",
|
||||
underline=5,
|
||||
command=self.edit_sentence,
|
||||
accelerator="Ctrl-t",
|
||||
)
|
||||
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
||||
|
||||
rulemenu = Menu(menubar, tearoff=0)
|
||||
rulemenu.add_command(
|
||||
label="Step", underline=1, command=self.step, accelerator="Space"
|
||||
)
|
||||
rulemenu.add_separator()
|
||||
rulemenu.add_command(
|
||||
label="Shift", underline=0, command=self.shift, accelerator="Ctrl-s"
|
||||
)
|
||||
rulemenu.add_command(
|
||||
label="Reduce", underline=0, command=self.reduce, accelerator="Ctrl-r"
|
||||
)
|
||||
rulemenu.add_separator()
|
||||
rulemenu.add_command(
|
||||
label="Undo", underline=0, command=self.undo, accelerator="Ctrl-u"
|
||||
)
|
||||
menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
|
||||
|
||||
viewmenu = Menu(menubar, tearoff=0)
|
||||
viewmenu.add_checkbutton(
|
||||
label="Show Grammar",
|
||||
underline=0,
|
||||
variable=self._show_grammar,
|
||||
command=self._toggle_grammar,
|
||||
)
|
||||
viewmenu.add_separator()
|
||||
viewmenu.add_radiobutton(
|
||||
label="Tiny",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=10,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Small",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=12,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Medium",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=14,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Large",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=18,
|
||||
command=self.resize,
|
||||
)
|
||||
viewmenu.add_radiobutton(
|
||||
label="Huge",
|
||||
variable=self._size,
|
||||
underline=0,
|
||||
value=24,
|
||||
command=self.resize,
|
||||
)
|
||||
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
||||
|
||||
animatemenu = Menu(menubar, tearoff=0)
|
||||
animatemenu.add_radiobutton(
|
||||
label="No Animation", underline=0, variable=self._animate, value=0
|
||||
)
|
||||
animatemenu.add_radiobutton(
|
||||
label="Slow Animation",
|
||||
underline=0,
|
||||
variable=self._animate,
|
||||
value=20,
|
||||
accelerator="-",
|
||||
)
|
||||
animatemenu.add_radiobutton(
|
||||
label="Normal Animation",
|
||||
underline=0,
|
||||
variable=self._animate,
|
||||
value=10,
|
||||
accelerator="=",
|
||||
)
|
||||
animatemenu.add_radiobutton(
|
||||
label="Fast Animation",
|
||||
underline=0,
|
||||
variable=self._animate,
|
||||
value=4,
|
||||
accelerator="+",
|
||||
)
|
||||
menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
|
||||
|
||||
helpmenu = Menu(menubar, tearoff=0)
|
||||
helpmenu.add_command(label="About", underline=0, command=self.about)
|
||||
helpmenu.add_command(
|
||||
label="Instructions", underline=0, command=self.help, accelerator="F1"
|
||||
)
|
||||
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
||||
|
||||
parent.config(menu=menubar)
|
||||
|
||||
def _init_feedback(self, parent):
|
||||
self._feedbackframe = feedbackframe = Frame(parent)
|
||||
feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
|
||||
self._lastoper_label = Label(
|
||||
feedbackframe, text="Last Operation:", font=self._font
|
||||
)
|
||||
self._lastoper_label.pack(side="left")
|
||||
lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
|
||||
lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
|
||||
self._lastoper1 = Label(
|
||||
lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
|
||||
)
|
||||
self._lastoper2 = Label(
|
||||
lastoperframe,
|
||||
anchor="w",
|
||||
width=30,
|
||||
foreground="#004040",
|
||||
background="#f0f0f0",
|
||||
font=self._font,
|
||||
)
|
||||
self._lastoper1.pack(side="left")
|
||||
self._lastoper2.pack(side="left", fill="x", expand=1)
|
||||
|
||||
def _init_canvas(self, parent):
|
||||
self._cframe = CanvasFrame(
|
||||
parent,
|
||||
background="white",
|
||||
width=525,
|
||||
closeenough=10,
|
||||
border=2,
|
||||
relief="sunken",
|
||||
)
|
||||
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
|
||||
canvas = self._canvas = self._cframe.canvas()
|
||||
|
||||
self._stackwidgets = []
|
||||
self._rtextwidgets = []
|
||||
self._titlebar = canvas.create_rectangle(
|
||||
0, 0, 0, 0, fill="#c0f0f0", outline="black"
|
||||
)
|
||||
self._exprline = canvas.create_line(0, 0, 0, 0, dash=".")
|
||||
self._stacktop = canvas.create_line(0, 0, 0, 0, fill="#408080")
|
||||
size = self._size.get() + 4
|
||||
self._stacklabel = TextWidget(
|
||||
canvas, "Stack", color="#004040", font=self._boldfont
|
||||
)
|
||||
self._rtextlabel = TextWidget(
|
||||
canvas, "Remaining Text", color="#004040", font=self._boldfont
|
||||
)
|
||||
self._cframe.add_widget(self._stacklabel)
|
||||
self._cframe.add_widget(self._rtextlabel)
|
||||
|
||||
#########################################
|
||||
## Main draw procedure
|
||||
#########################################
|
||||
|
||||
def _redraw(self):
|
||||
scrollregion = self._canvas["scrollregion"].split()
|
||||
(cx1, cy1, cx2, cy2) = (int(c) for c in scrollregion)
|
||||
|
||||
# Delete the old stack & rtext widgets.
|
||||
for stackwidget in self._stackwidgets:
|
||||
self._cframe.destroy_widget(stackwidget)
|
||||
self._stackwidgets = []
|
||||
for rtextwidget in self._rtextwidgets:
|
||||
self._cframe.destroy_widget(rtextwidget)
|
||||
self._rtextwidgets = []
|
||||
|
||||
# Position the titlebar & exprline
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
y = y2 - y1 + 10
|
||||
self._canvas.coords(self._titlebar, -5000, 0, 5000, y - 4)
|
||||
self._canvas.coords(self._exprline, 0, y * 2 - 10, 5000, y * 2 - 10)
|
||||
|
||||
# Position the titlebar labels..
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
self._stacklabel.move(5 - x1, 3 - y1)
|
||||
(x1, y1, x2, y2) = self._rtextlabel.bbox()
|
||||
self._rtextlabel.move(cx2 - x2 - 5, 3 - y1)
|
||||
|
||||
# Draw the stack.
|
||||
stackx = 5
|
||||
for tok in self._parser.stack():
|
||||
if isinstance(tok, Tree):
|
||||
attribs = {
|
||||
"tree_color": "#4080a0",
|
||||
"tree_width": 2,
|
||||
"node_font": self._boldfont,
|
||||
"node_color": "#006060",
|
||||
"leaf_color": "#006060",
|
||||
"leaf_font": self._font,
|
||||
}
|
||||
widget = tree_to_treesegment(self._canvas, tok, **attribs)
|
||||
widget.label()["color"] = "#000000"
|
||||
else:
|
||||
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
|
||||
widget.bind_click(self._popup_reduce)
|
||||
self._stackwidgets.append(widget)
|
||||
self._cframe.add_widget(widget, stackx, y)
|
||||
stackx = widget.bbox()[2] + 10
|
||||
|
||||
# Draw the remaining text.
|
||||
rtextwidth = 0
|
||||
for tok in self._parser.remaining_text():
|
||||
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
|
||||
self._rtextwidgets.append(widget)
|
||||
self._cframe.add_widget(widget, rtextwidth, y)
|
||||
rtextwidth = widget.bbox()[2] + 4
|
||||
|
||||
# Allow enough room to shift the next token (for animations)
|
||||
if len(self._rtextwidgets) > 0:
|
||||
stackx += self._rtextwidgets[0].width()
|
||||
|
||||
# Move the remaining text to the correct location (keep it
|
||||
# right-justified, when possible); and move the remaining text
|
||||
# label, if necessary.
|
||||
stackx = max(stackx, self._stacklabel.width() + 25)
|
||||
rlabelwidth = self._rtextlabel.width() + 10
|
||||
if stackx >= cx2 - max(rtextwidth, rlabelwidth):
|
||||
cx2 = stackx + max(rtextwidth, rlabelwidth)
|
||||
for rtextwidget in self._rtextwidgets:
|
||||
rtextwidget.move(4 + cx2 - rtextwidth, 0)
|
||||
self._rtextlabel.move(cx2 - self._rtextlabel.bbox()[2] - 5, 0)
|
||||
|
||||
midx = (stackx + cx2 - max(rtextwidth, rlabelwidth)) / 2
|
||||
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
|
||||
# Set up binding to allow them to shift a token by dragging it.
|
||||
if len(self._rtextwidgets) > 0:
|
||||
|
||||
def drag_shift(widget, midx=midx, self=self):
|
||||
if widget.bbox()[0] < midx:
|
||||
self.shift()
|
||||
else:
|
||||
self._redraw()
|
||||
|
||||
self._rtextwidgets[0].bind_drag(drag_shift)
|
||||
self._rtextwidgets[0].bind_click(self.shift)
|
||||
|
||||
# Draw the stack top.
|
||||
self._highlight_productions()
|
||||
|
||||
def _draw_stack_top(self, widget):
|
||||
# hack..
|
||||
midx = widget.bbox()[2] + 50
|
||||
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
|
||||
|
||||
def _highlight_productions(self):
|
||||
# Highlight the productions that can be reduced.
|
||||
self._prodlist.selection_clear(0, "end")
|
||||
for prod in self._parser.reducible_productions():
|
||||
index = self._productions.index(prod)
|
||||
self._prodlist.selection_set(index)
|
||||
|
||||
#########################################
|
||||
## Button Callbacks
|
||||
#########################################
|
||||
|
||||
def destroy(self, *e):
|
||||
if self._top is None:
|
||||
return
|
||||
self._top.destroy()
|
||||
self._top = None
|
||||
|
||||
def reset(self, *e):
|
||||
self._parser.initialize(self._sent)
|
||||
self._lastoper1["text"] = "Reset App"
|
||||
self._lastoper2["text"] = ""
|
||||
self._redraw()
|
||||
|
||||
def step(self, *e):
|
||||
if self.reduce():
|
||||
return True
|
||||
elif self.shift():
|
||||
return True
|
||||
else:
|
||||
if list(self._parser.parses()):
|
||||
self._lastoper1["text"] = "Finished:"
|
||||
self._lastoper2["text"] = "Success"
|
||||
else:
|
||||
self._lastoper1["text"] = "Finished:"
|
||||
self._lastoper2["text"] = "Failure"
|
||||
|
||||
def shift(self, *e):
|
||||
if self._animating_lock:
|
||||
return
|
||||
if self._parser.shift():
|
||||
tok = self._parser.stack()[-1]
|
||||
self._lastoper1["text"] = "Shift:"
|
||||
self._lastoper2["text"] = "%r" % tok
|
||||
if self._animate.get():
|
||||
self._animate_shift()
|
||||
else:
|
||||
self._redraw()
|
||||
return True
|
||||
return False
|
||||
|
||||
def reduce(self, *e):
|
||||
if self._animating_lock:
|
||||
return
|
||||
production = self._parser.reduce()
|
||||
if production:
|
||||
self._lastoper1["text"] = "Reduce:"
|
||||
self._lastoper2["text"] = "%s" % production
|
||||
if self._animate.get():
|
||||
self._animate_reduce()
|
||||
else:
|
||||
self._redraw()
|
||||
return production
|
||||
|
||||
def undo(self, *e):
|
||||
if self._animating_lock:
|
||||
return
|
||||
if self._parser.undo():
|
||||
self._redraw()
|
||||
|
||||
def postscript(self, *e):
|
||||
self._cframe.print_to_file()
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
"""
|
||||
Enter the Tkinter mainloop. This function must be called if
|
||||
this demo is created from a non-interactive program (e.g.
|
||||
from a secript); otherwise, the demo will close as soon as
|
||||
the script completes.
|
||||
"""
|
||||
if in_idle():
|
||||
return
|
||||
self._top.mainloop(*args, **kwargs)
|
||||
|
||||
#########################################
|
||||
## Menubar callbacks
|
||||
#########################################
|
||||
|
||||
def resize(self, size=None):
|
||||
if size is not None:
|
||||
self._size.set(size)
|
||||
size = self._size.get()
|
||||
self._font.configure(size=-(abs(size)))
|
||||
self._boldfont.configure(size=-(abs(size)))
|
||||
self._sysfont.configure(size=-(abs(size)))
|
||||
|
||||
# self._stacklabel['font'] = ('helvetica', -size-4, 'bold')
|
||||
# self._rtextlabel['font'] = ('helvetica', -size-4, 'bold')
|
||||
# self._lastoper_label['font'] = ('helvetica', -size)
|
||||
# self._lastoper1['font'] = ('helvetica', -size)
|
||||
# self._lastoper2['font'] = ('helvetica', -size)
|
||||
# self._prodlist['font'] = ('helvetica', -size)
|
||||
# self._prodlist_label['font'] = ('helvetica', -size-2, 'bold')
|
||||
self._redraw()
|
||||
|
||||
def help(self, *e):
|
||||
# The default font's not very legible; try using 'fixed' instead.
|
||||
try:
|
||||
ShowText(
|
||||
self._top,
|
||||
"Help: Shift-Reduce Parser Application",
|
||||
(__doc__ or "").strip(),
|
||||
width=75,
|
||||
font="fixed",
|
||||
)
|
||||
except:
|
||||
ShowText(
|
||||
self._top,
|
||||
"Help: Shift-Reduce Parser Application",
|
||||
(__doc__ or "").strip(),
|
||||
width=75,
|
||||
)
|
||||
|
||||
def about(self, *e):
|
||||
ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper"
|
||||
TITLE = "About: Shift-Reduce Parser Application"
|
||||
try:
|
||||
from tkinter.messagebox import Message
|
||||
|
||||
Message(message=ABOUT, title=TITLE).show()
|
||||
except:
|
||||
ShowText(self._top, TITLE, ABOUT)
|
||||
|
||||
def edit_grammar(self, *e):
|
||||
CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
|
||||
|
||||
def set_grammar(self, grammar):
|
||||
self._parser.set_grammar(grammar)
|
||||
self._productions = list(grammar.productions())
|
||||
self._prodlist.delete(0, "end")
|
||||
for production in self._productions:
|
||||
self._prodlist.insert("end", (" %s" % production))
|
||||
|
||||
def edit_sentence(self, *e):
|
||||
sentence = " ".join(self._sent)
|
||||
title = "Edit Text"
|
||||
instr = "Enter a new sentence to parse."
|
||||
EntryDialog(self._top, sentence, instr, self.set_sentence, title)
|
||||
|
||||
def set_sentence(self, sent):
|
||||
self._sent = sent.split() # [XX] use tagged?
|
||||
self.reset()
|
||||
|
||||
#########################################
|
||||
## Reduce Production Selection
|
||||
#########################################
|
||||
|
||||
def _toggle_grammar(self, *e):
|
||||
if self._show_grammar.get():
|
||||
self._prodframe.pack(
|
||||
fill="both", side="left", padx=2, after=self._feedbackframe
|
||||
)
|
||||
self._lastoper1["text"] = "Show Grammar"
|
||||
else:
|
||||
self._prodframe.pack_forget()
|
||||
self._lastoper1["text"] = "Hide Grammar"
|
||||
self._lastoper2["text"] = ""
|
||||
|
||||
def _prodlist_select(self, event):
|
||||
selection = self._prodlist.curselection()
|
||||
if len(selection) != 1:
|
||||
return
|
||||
index = int(selection[0])
|
||||
production = self._parser.reduce(self._productions[index])
|
||||
if production:
|
||||
self._lastoper1["text"] = "Reduce:"
|
||||
self._lastoper2["text"] = "%s" % production
|
||||
if self._animate.get():
|
||||
self._animate_reduce()
|
||||
else:
|
||||
self._redraw()
|
||||
else:
|
||||
# Reset the production selections.
|
||||
self._prodlist.selection_clear(0, "end")
|
||||
for prod in self._parser.reducible_productions():
|
||||
index = self._productions.index(prod)
|
||||
self._prodlist.selection_set(index)
|
||||
|
||||
def _popup_reduce(self, widget):
|
||||
# Remove old commands.
|
||||
productions = self._parser.reducible_productions()
|
||||
if len(productions) == 0:
|
||||
return
|
||||
|
||||
self._reduce_menu.delete(0, "end")
|
||||
for production in productions:
|
||||
self._reduce_menu.add_command(label=str(production), command=self.reduce)
|
||||
self._reduce_menu.post(
|
||||
self._canvas.winfo_pointerx(), self._canvas.winfo_pointery()
|
||||
)
|
||||
|
||||
#########################################
|
||||
## Animations
|
||||
#########################################
|
||||
|
||||
def _animate_shift(self):
|
||||
# What widget are we shifting?
|
||||
widget = self._rtextwidgets[0]
|
||||
|
||||
# Where are we shifting from & to?
|
||||
right = widget.bbox()[0]
|
||||
if len(self._stackwidgets) == 0:
|
||||
left = 5
|
||||
else:
|
||||
left = self._stackwidgets[-1].bbox()[2] + 10
|
||||
|
||||
# Start animating.
|
||||
dt = self._animate.get()
|
||||
dx = (left - right) * 1.0 / dt
|
||||
self._animate_shift_frame(dt, widget, dx)
|
||||
|
||||
def _animate_shift_frame(self, frame, widget, dx):
|
||||
if frame > 0:
|
||||
self._animating_lock = 1
|
||||
widget.move(dx, 0)
|
||||
self._top.after(10, self._animate_shift_frame, frame - 1, widget, dx)
|
||||
else:
|
||||
# but: stacktop??
|
||||
|
||||
# Shift the widget to the stack.
|
||||
del self._rtextwidgets[0]
|
||||
self._stackwidgets.append(widget)
|
||||
self._animating_lock = 0
|
||||
|
||||
# Display the available productions.
|
||||
self._draw_stack_top(widget)
|
||||
self._highlight_productions()
|
||||
|
||||
def _animate_reduce(self):
|
||||
# What widgets are we shifting?
|
||||
numwidgets = len(self._parser.stack()[-1]) # number of children
|
||||
widgets = self._stackwidgets[-numwidgets:]
|
||||
|
||||
# How far are we moving?
|
||||
if isinstance(widgets[0], TreeSegmentWidget):
|
||||
ydist = 15 + widgets[0].label().height()
|
||||
else:
|
||||
ydist = 15 + widgets[0].height()
|
||||
|
||||
# Start animating.
|
||||
dt = self._animate.get()
|
||||
dy = ydist * 2.0 / dt
|
||||
self._animate_reduce_frame(dt / 2, widgets, dy)
|
||||
|
||||
def _animate_reduce_frame(self, frame, widgets, dy):
|
||||
if frame > 0:
|
||||
self._animating_lock = 1
|
||||
for widget in widgets:
|
||||
widget.move(0, dy)
|
||||
self._top.after(10, self._animate_reduce_frame, frame - 1, widgets, dy)
|
||||
else:
|
||||
del self._stackwidgets[-len(widgets) :]
|
||||
for widget in widgets:
|
||||
self._cframe.remove_widget(widget)
|
||||
tok = self._parser.stack()[-1]
|
||||
if not isinstance(tok, Tree):
|
||||
raise ValueError()
|
||||
label = TextWidget(
|
||||
self._canvas, str(tok.label()), color="#006060", font=self._boldfont
|
||||
)
|
||||
widget = TreeSegmentWidget(self._canvas, label, widgets, width=2)
|
||||
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
y = y2 - y1 + 10
|
||||
if not self._stackwidgets:
|
||||
x = 5
|
||||
else:
|
||||
x = self._stackwidgets[-1].bbox()[2] + 10
|
||||
self._cframe.add_widget(widget, x, y)
|
||||
self._stackwidgets.append(widget)
|
||||
|
||||
# Display the available productions.
|
||||
self._draw_stack_top(widget)
|
||||
self._highlight_productions()
|
||||
|
||||
# # Delete the old widgets..
|
||||
# del self._stackwidgets[-len(widgets):]
|
||||
# for widget in widgets:
|
||||
# self._cframe.destroy_widget(widget)
|
||||
#
|
||||
# # Make a new one.
|
||||
# tok = self._parser.stack()[-1]
|
||||
# if isinstance(tok, Tree):
|
||||
# attribs = {'tree_color': '#4080a0', 'tree_width': 2,
|
||||
# 'node_font': bold, 'node_color': '#006060',
|
||||
# 'leaf_color': '#006060', 'leaf_font':self._font}
|
||||
# widget = tree_to_treesegment(self._canvas, tok.type(),
|
||||
# **attribs)
|
||||
# widget.node()['color'] = '#000000'
|
||||
# else:
|
||||
# widget = TextWidget(self._canvas, tok.type(),
|
||||
# color='#000000', font=self._font)
|
||||
# widget.bind_click(self._popup_reduce)
|
||||
# (x1, y1, x2, y2) = self._stacklabel.bbox()
|
||||
# y = y2-y1+10
|
||||
# if not self._stackwidgets: x = 5
|
||||
# else: x = self._stackwidgets[-1].bbox()[2] + 10
|
||||
# self._cframe.add_widget(widget, x, y)
|
||||
# self._stackwidgets.append(widget)
|
||||
|
||||
# self._redraw()
|
||||
self._animating_lock = 0
|
||||
|
||||
#########################################
|
||||
## Hovering.
|
||||
#########################################
|
||||
|
||||
def _highlight_hover(self, event):
|
||||
# What production are we hovering over?
|
||||
index = self._prodlist.nearest(event.y)
|
||||
if self._hover == index:
|
||||
return
|
||||
|
||||
# Clear any previous hover highlighting.
|
||||
self._clear_hover()
|
||||
|
||||
# If the production corresponds to an available reduction,
|
||||
# highlight the stack.
|
||||
selection = [int(s) for s in self._prodlist.curselection()]
|
||||
if index in selection:
|
||||
rhslen = len(self._productions[index].rhs())
|
||||
for stackwidget in self._stackwidgets[-rhslen:]:
|
||||
if isinstance(stackwidget, TreeSegmentWidget):
|
||||
stackwidget.label()["color"] = "#00a000"
|
||||
else:
|
||||
stackwidget["color"] = "#00a000"
|
||||
|
||||
# Remember what production we're hovering over.
|
||||
self._hover = index
|
||||
|
||||
def _clear_hover(self, *event):
|
||||
# Clear any previous hover highlighting.
|
||||
if self._hover == -1:
|
||||
return
|
||||
self._hover = -1
|
||||
for stackwidget in self._stackwidgets:
|
||||
if isinstance(stackwidget, TreeSegmentWidget):
|
||||
stackwidget.label()["color"] = "black"
|
||||
else:
|
||||
stackwidget["color"] = "black"
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
Create a shift reduce parser app, using a simple grammar and
|
||||
text.
|
||||
"""
|
||||
|
||||
from nltk.grammar import CFG, Nonterminal, Production
|
||||
|
||||
nonterminals = "S VP NP PP P N Name V Det"
|
||||
(S, VP, NP, PP, P, N, Name, V, Det) = (Nonterminal(s) for s in nonterminals.split())
|
||||
|
||||
productions = (
|
||||
# Syntactic Productions
|
||||
Production(S, [NP, VP]),
|
||||
Production(NP, [Det, N]),
|
||||
Production(NP, [NP, PP]),
|
||||
Production(VP, [VP, PP]),
|
||||
Production(VP, [V, NP, PP]),
|
||||
Production(VP, [V, NP]),
|
||||
Production(PP, [P, NP]),
|
||||
# Lexical Productions
|
||||
Production(NP, ["I"]),
|
||||
Production(Det, ["the"]),
|
||||
Production(Det, ["a"]),
|
||||
Production(N, ["man"]),
|
||||
Production(V, ["saw"]),
|
||||
Production(P, ["in"]),
|
||||
Production(P, ["with"]),
|
||||
Production(N, ["park"]),
|
||||
Production(N, ["dog"]),
|
||||
Production(N, ["statue"]),
|
||||
Production(Det, ["my"]),
|
||||
)
|
||||
|
||||
grammar = CFG(S, productions)
|
||||
|
||||
# tokenize the sentence
|
||||
sent = "my dog saw a man in the park with a statue".split()
|
||||
|
||||
ShiftReduceApp(grammar, sent).mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
36
backend/venv/Lib/site-packages/nltk/app/wordfreq_app.py
Normal file
36
backend/venv/Lib/site-packages/nltk/app/wordfreq_app.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# Natural Language Toolkit: Wordfreq Application
|
||||
#
|
||||
# Copyright (C) 2001-2025 NLTK Project
|
||||
# Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
|
||||
# URL: <https://www.nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from matplotlib import pylab
|
||||
|
||||
from nltk.corpus import gutenberg
|
||||
from nltk.text import Text
|
||||
|
||||
|
||||
def plot_word_freq_dist(text):
|
||||
fd = text.vocab()
|
||||
|
||||
samples = [item for item, _ in fd.most_common(50)]
|
||||
values = [fd[sample] for sample in samples]
|
||||
values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
|
||||
pylab.title(text.name)
|
||||
pylab.xlabel("Samples")
|
||||
pylab.ylabel("Cumulative Percentage")
|
||||
pylab.plot(values)
|
||||
pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
|
||||
pylab.show()
|
||||
|
||||
|
||||
def app():
|
||||
t1 = Text(gutenberg.words("melville-moby_dick.txt"))
|
||||
plot_word_freq_dist(t1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
__all__ = ["app"]
|
||||
1006
backend/venv/Lib/site-packages/nltk/app/wordnet_app.py
Normal file
1006
backend/venv/Lib/site-packages/nltk/app/wordnet_app.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user