Skip to content

Commit

Permalink
layout as other widgets
Browse files Browse the repository at this point in the history
  • Loading branch information
eriktks committed Sep 23, 2024
1 parent e143b71 commit 06659aa
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 15 deletions.
16 changes: 9 additions & 7 deletions orangecontrib/storynavigation/modules/settinganalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class SettingAnalyzer:
language (str): ISO string of the language of the input text
n_segments (int): Number of segments to split each text into
text_tuples (list): binary tuple: text (str) and storyid
story_elements (list of lists): tokens with their Spacy analysis
callback: function in widget to show the progress of this process
"""

Expand All @@ -34,9 +35,10 @@ class SettingAnalyzer:
ENTITY_CACHE_FILE_NAME = "entity_cache.json"


def __init__(self, language, n_segments, text_tuples, story_elements, callback=None):
def __init__(self, language, n_segments, text_tuples, story_elements, user_defined_entities, callback=None):
self.text_tuples = text_tuples
self.n_segments = n_segments
self.user_defined_entities = user_defined_entities
self.callback = callback

self.__setup_required_nlp_resources(language)
Expand Down Expand Up @@ -91,16 +93,16 @@ def __setup_required_nlp_resources(self, language):
"""
if language == constants.NL:
self.model = constants.NL_SPACY_MODEL
self.entity_list = constants.NL_ENTITIES_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
#self.entity_list = constants.NL_ENTITIES_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
self.time_words = constants.NL_TIME_WORDS_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
elif language == constants.EN:
self.model = constants.EN_SPACY_MODEL
self.entity_list = constants.EN_ENTITIES_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
#self.entity_list = constants.EN_ENTITIES_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
self.time_words = constants.EN_TIME_WORDS_FILE.read_text(encoding="utf-8").strip().split(os.linesep)
else:
raise ValueError(f"settingsanalysis.py: unknown language {language}")

self.entity_list = [line.split(",") for line in self.entity_list]
# self.entity_list = [line.split(",") for line in self.entity_list]

def __sort_and_filter_results(self, results):
results = [(x[0], x[1], int(x[2]), x[3], x[4]) for x in results]
Expand All @@ -120,11 +122,11 @@ def __process_texts(self, nlp, text_tuples, entities, callback=None):
return self.__sort_and_filter_results(results)


def __analyze_text_with_list(self, text, nlp, entity_list):
def __analyze_text_with_list(self, text, nlp, user_defined_entities):
matcher = Matcher(nlp.vocab)
for entity_group in self.ENTITY_GROUPS:
patterns = [[{"lower": entity_token} for entity_token in entity_text.lower().split()]
for entity_label, entity_text in entity_list
for entity_text, entity_label in list(user_defined_entities.items())
if entity_label in entity_group]
matcher.add(entity_group[0], patterns)
tokens = nlp(text)
Expand Down Expand Up @@ -170,7 +172,7 @@ def __filter_dates(self, combined_analysis):


def __process_text(self, text_id, text, nlp, spacy_analysis):
list_analysis = self.__analyze_text_with_list(text, nlp, self.entity_list)
list_analysis = self.__analyze_text_with_list(text, nlp, self.user_defined_entities)
combined_analysis = self.__combine_analyses(spacy_analysis, list_analysis)
combined_analysis = self.__expand_locations(combined_analysis)
combined_analysis = self.__filter_dates(combined_analysis)
Expand Down
86 changes: 78 additions & 8 deletions orangecontrib/storynavigation/widgets/OWSNSettingAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
from storynavigation.modules.settinganalysis import SettingAnalyzer
import storynavigation.modules.util as util

from AnyQt.QtWidgets import QAbstractItemView, QHeaderView, QTableView
from storynavigation.widgets.OWSNActorAnalysis import DocumentTableView, DocumentListModel, DocumentsFilterProxyModel
from typing import Set
from orangecontrib.text.widgets.utils import widgets
from Orange.data.io import FileFormat


class OWSNSettingAnalysis(OWWidget, ConcurrentWidgetMixin):
name = 'Setting Analysis'
Expand All @@ -31,6 +37,12 @@ class OWSNSettingAnalysis(OWWidget, ConcurrentWidgetMixin):
"GPE": "lemonchiffon",
"LOC": "lightgreen",
"TIME": "thistle", }
dlgFormats = (
"All readable files ({});;".format(
'*' + ' *'.join(FileFormat.readers.keys())) +
";;".join("{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS))
for f in sorted(set(FileFormat.readers.values()),
key=list(FileFormat.readers.values()).index)))

class Inputs:
story_elements = Input("Story elements", Table)
Expand All @@ -50,12 +62,59 @@ def __init__(self):

self.make_language_selection_menu()

self.recent_files = []
self.user_defined_entities = {}
# next 9 lines copied from Corpus widget
fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0)
self.file_widget = widgets.FileWidget(
recent_files=self.recent_files, icon_size=(16, 16),
on_open=self.open_file, dialog_format=self.dlgFormats,
dialog_title='Open Orange Document Corpus',
reload_label='Reload', browse_label='Browse',
allow_empty=False, minimal_width=250,
)
fbox.layout().addWidget(self.file_widget)

# next 18 lines copied from CorpusViewer widget
self.splitter = QSplitter(orientation=Qt.Horizontal, childrenCollapsible=False)
self.doc_list = DocumentTableView()
self.doc_list.setSelectionBehavior(QTableView.SelectRows)
self.doc_list.setSelectionMode(QTableView.ExtendedSelection)
self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers)
self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
self.doc_list.horizontalHeader().setVisible(False)
self.splitter.addWidget(self.doc_list)

self.doc_list_model = DocumentListModel()
proxy_model = DocumentsFilterProxyModel()
proxy_model.setSourceModel(self.doc_list_model)
self.doc_list.setModel(proxy_model)
self.doc_list.selectionModel().selectionChanged.connect(self.selection_changed)
self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
self.doc_webview.setHtml("")
self.mainArea.layout().addWidget(self.splitter)


def open_file(self, user_defined_entities_file):
self.user_defined_entities = {}
with open(user_defined_entities_file) as file:
for line in file:
fields = line.strip().split(",")
self.user_defined_entities[fields[1]] = fields[0]
if self.story_elements:
self.set_story_elements(self.story_elements)


def get_selected_indexes(self) -> Set[int]:
m = self.doc_list.model().mapToSource
return [int(m(i).row()) for i in self.doc_list.selectionModel().selectedRows()]


def selection_changed(self) -> None:
self.stories_selected = self.get_selected_indexes()
self.__visualize_text_data()


def make_language_selection_menu(self):
self.select_language_combo = gui.comboBox(
widget=self.controlArea,
Expand All @@ -81,6 +140,15 @@ def set_story_elements(self, story_elements=None):
self.text_tuples = self.make_text_tuples(story_elements)
self.__action_analyze_setting_wrapper()

self.doc_list_model.setup_data(self.make_document_names(self.text_tuples), None)


def make_document_names(self, text_tuples):
document_names = []
for text, text_id in text_tuples:
document_names.append("Document " + str(int(text_id) + 1))
return document_names


def make_text_tuples(self, story_elements):
story_elements_df = util.convert_orangetable_to_dataframe(story_elements)
Expand Down Expand Up @@ -124,12 +192,13 @@ def move_progress_bar(progress):
n_segments=int(self.n_segments),
text_tuples=self.text_tuples,
story_elements=self.story_elements,
user_defined_entities=self.user_defined_entities,
callback=move_progress_bar
)


def on_done(self, result) -> None:
self.__visualize_text_data(self.text_tuples, self.analyzer.settings_analysis)
self.__visualize_text_data()
self.Outputs.dataset_level_data.send(table_from_frame(self.analyzer.settings_analysis))


Expand All @@ -149,9 +218,9 @@ def __insert_entity_color_in_story_text(self, story_text, start, end, label):
return story_text


def __add_entity_colors_to_story_text(self, story_text, story_id, settings_analysis):
for index, row in settings_analysis.loc[
settings_analysis["storyid"] == "ST" + str(story_id)].iloc[::-1].iterrows():
def __add_entity_colors_to_story_text(self, story_text, story_id):
for index, row in self.analyzer.settings_analysis.loc[
self.analyzer.settings_analysis["storyid"] == "ST" + str(story_id)].iloc[::-1].iterrows():
start = int(row["character id"])
end = start + len(row["text"])
story_text = self.__insert_entity_color_in_story_text(story_text,
Expand All @@ -165,12 +234,13 @@ def __add_paragraphs_to_story_text(self, story_text):
return "<p>" + re.sub("\n\n", "<p>", story_text)


def __visualize_text_data(self, text_tuples, settings_analysis):
def __visualize_text_data(self):
html_text = "<html><body>"
html_text += self.__make_entity_bar_for_html()
for story_text, story_id in text_tuples:
story_text = self.__add_entity_colors_to_story_text(story_text, story_id, settings_analysis)
html_text += "<hr>" + self.__add_paragraphs_to_story_text(story_text)
for story_text, story_id in self.text_tuples:
if len(self.stories_selected) == 0 or int(story_id) in self.stories_selected:
story_text = self.__add_entity_colors_to_story_text(story_text, story_id)
html_text += "<hr>" + self.__add_paragraphs_to_story_text(story_text)
html_text += "</body></html>"
self.doc_webview.setHtml(html_text)

Expand Down

0 comments on commit 06659aa

Please sign in to comment.