Skip to content

Commit

Permalink
fixes bug which causes no updating or refreshing of custom tags in th…
Browse files Browse the repository at this point in the history
…e tagger when a new CSV dictionary is loaded
  • Loading branch information
kodymoodley committed Jan 9, 2024
1 parent bcd4bcc commit 73bb28d
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 97 deletions.
63 changes: 6 additions & 57 deletions orangecontrib/storynavigation/modules/actionanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,42 +171,19 @@ def postag_text(self, text, past_vbz, present_vbz):
normalised_token, is_valid_token = self.__is_valid_token(tag)
if is_valid_token:
if ((tag[4].text.lower().strip() in self.past_tense_verbs) or (tag[4].text.lower().strip()[:2] == "ge")) and (tag[4].text.lower().strip() not in self.false_positive_verbs): # past tense
# if tag[4].pos_ == "VERB":
# if (tag[4].text.lower().strip() in self.past_tense_verbs) or (tag[4].text.lower().strip()[:2] == "ge"): # past tense
# vb_tense = tag[4].morph.get("Tense")
# if vb_tense == "Past":
ents.append(
{"start": span[0], "end": span[1], "label": "PAST_VB"}
)
# elif vb_tense == "Pres":
else:
if (tag[4].pos_ == "VERB") and (tag[4].text.lower().strip() not in self.false_positive_verbs):
# elif tag[4].text.lower().strip() in self.present_tense_verbs:
if (tag[4].pos_ == "VERB") and (tag[4].text.lower().strip() not in self.false_positive_verbs): # present tense
ents.append(
{"start": span[0], "end": span[1], "label": "PRES_VB"}
)
# else:
# if tag[4].text.lower().strip()[:2] == "ge": # past tense
# ents.append(
# {
# "start": span[0],
# "end": span[1],
# "label": "PAST_VB",
# }
# )
# else:
# ents.append(
# {
# "start": span[0],
# "end": span[1],
# "label": "PRES_VB",
# }
# )

# elif tag[4].pos_ in ["NOUN", "PRON", "PROPN"]:
# self.__update_postagging_metrics(
# tag[4].text.lower().strip(), tag[4]
# )

elif tag[4].pos_ in ["NOUN", "PRON", "PROPN"]: # non-verbs (for noun-action table)
self.__update_postagging_metrics(
tag[4].text.lower().strip(), tag[4]
)

# specify sentences and filtered entities to tag / highlight
doc = {"text": sentence, "ents": ents}
Expand Down Expand Up @@ -357,34 +334,6 @@ def generate_noun_action_table(self):

return pd.DataFrame(rows, columns=["actor", "actions"])

# def generate_halliday_action_counts_table(self, text, dim_type="realm"):
# rows = []

# # Valid values for 'dim_type' parameter: realm, process, prosub, sub\
# halliday_fname = constants.HALLIDAY_FILENAME.format(dim_type)
# # halliday_fname = "halliday_dimensions_" + dim_type + ".json"
# RESOURCES = ActionTagger.PKG / constants.RESOURCES_SUBPACKAGE
# json_file = RESOURCES.joinpath(halliday_fname).open("r", encoding="utf8")
# halliday_dict = json.load(json_file)

# # Calculate the number of story words in each halliday dimension
# words = text.split()
# halliday_counts = {}
# for item in halliday_dict:
# halliday_counts[item] = 0

# for word in words:
# processed_word = word.lower().strip()
# for item in halliday_dict:
# if processed_word in halliday_dict[item]:
# halliday_counts[item] += 1

# for item in halliday_dict:
# rows.append([item, halliday_counts[item]])

# return pd.DataFrame(rows, columns=["action", "frequency"])


class ActionMetricCalculator:
"""Unused class / code so far..."""

Expand Down
21 changes: 10 additions & 11 deletions orangecontrib/storynavigation/widgets/OWSNActionAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,11 @@ class Inputs:
# corpus = Input("Corpus", Corpus, replaces=["Data"])

class Outputs:
matching_docs = Output("Matching Docs", Corpus, default=True)
other_docs = Output("Other Docs", Corpus)
corpus = Output("Corpus", Corpus)
# matching_docs = Output("Matching Docs", Corpus, default=True)
# other_docs = Output("Other Docs", Corpus)
# corpus = Output("Corpus", Corpus)
metrics_freq_table = Output("Frequency", Table)
metrics_tensefreq_table = Output("Tense frequency", Table)
# halliday_actions_table = Output("Halliday action counts", Table)
actor_action_table = Output("Actor action table", Table)

settingsHandler = DomainContextHandler()
Expand Down Expand Up @@ -731,17 +730,17 @@ def update_info(self):

@gui.deferred
def commit(self):
matched = unmatched = annotated_corpus = None
# matched = unmatched = annotated_corpus = None
if self.corpus is not None:
selected_docs = sorted(self.get_selected_indexes())
matched = self.corpus[selected_docs] if selected_docs else None
# matched = self.corpus[selected_docs] if selected_docs else None
mask = np.ones(len(self.corpus), bool)
mask[selected_docs] = 0
unmatched = self.corpus[mask] if mask.any() else None
annotated_corpus = create_annotated_table(self.corpus, selected_docs)
self.Outputs.matching_docs.send(matched)
self.Outputs.other_docs.send(unmatched)
self.Outputs.corpus.send(annotated_corpus)
# unmatched = self.corpus[mask] if mask.any() else None
# annotated_corpus = create_annotated_table(self.corpus, selected_docs)
# self.Outputs.matching_docs.send(matched)
# self.Outputs.other_docs.send(unmatched)
# self.Outputs.corpus.send(annotated_corpus)

def send_report(self):
self.report_items(
Expand Down
54 changes: 25 additions & 29 deletions orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,9 +330,6 @@ class Inputs:
word_dict = Input("Token categories", Table)

class Outputs:
# matching_docs = Output("Matching Docs", Corpus, default=True)
# other_docs = Output("Other Docs", Corpus)
# corpus = Output("Corpus", Corpus)
metrics_freq_table = Output("Frequency", Table)
metrics_subfreq_table = Output("Frequency as subject", Table)
metrics_customfreq_table = Output("Custom token frequency", Table)
Expand Down Expand Up @@ -388,7 +385,6 @@ def __init__(self):
self.corpus = None # initialise list of documents (corpus)
self.word_dict = None # initialise word dictionary
self.custom_tag_dictionary = None
self.custom_tags = None
self.__pending_selected_documents = self.selected_documents

# Search features
Expand Down Expand Up @@ -424,8 +420,19 @@ def __init__(self):
)
self.allc = gui.checkBox(self.postags_box, self, "all_pos", "All")
self.allc.setChecked(False)

self.custom_tags = gui.checkBox(
self.postags_box,
self,
"custom",
"Custom tokens",
callback=self.pos_selection_changed,
)

self.custom_tags.setEnabled(False)

self.allc.stateChanged.connect(self.on_state_changed_pos)
self.pos_checkboxes = [self.sc, self.nc]
self.pos_checkboxes = [self.sc, self.nc, self.custom_tags]
self.controlArea.layout().addWidget(self.postags_box)

# Prominence score slider
Expand Down Expand Up @@ -536,6 +543,7 @@ def rehighlight_entities(self):
self.commit.deferred()

def __create_customtag_checkbox(self, wd):
self.custom_tags.setEnabled(True)
# extract all categorisations in the input dictionary
list_of_lists_categories = []
if len(wd.columns) >= 2:
Expand All @@ -553,15 +561,6 @@ def __create_customtag_checkbox(self, wd):
if len(current_dict_values) > 0:
self.custom_tag_dictionary[category] = current_dict_values

if self.custom_tags not in self.pos_checkboxes:
self.custom_tags = gui.checkBox(
self.postags_box,
self,
"custom",
"Custom tokens",
callback=self.pos_selection_changed,
)

@Inputs.corpus
def set_data(self, corpus=None):
self.actortagger = ActorTagger(constants.NL_SPACY_MODEL)
Expand Down Expand Up @@ -593,10 +592,9 @@ def set_word_dict(self, word_dict=None):
rows.append(item.metas)

self.word_dict = pd.DataFrame(rows[1:], index=None)
if self.custom_tags is None:
self.__create_customtag_checkbox(self.word_dict)
self.__create_customtag_checkbox(self.word_dict)

if self.corpus is not None and word_dict is not None:
if self.corpus is not None:
self.setup_controls()
self.openContext(self.corpus)
self.doc_list.model().set_filter_string(self.regexp_filter)
Expand All @@ -622,10 +620,9 @@ def set_word_dict(self, word_dict=None):
rows.append(item.metas)

self.word_dict = pd.DataFrame(rows[1:], index=None)
if self.custom_tags is None:
self.__create_customtag_checkbox(self.word_dict)
self.__create_customtag_checkbox(self.word_dict)

if self.corpus is not None and word_dict is not None:
if self.corpus is not None:
self.setup_controls()
self.openContext(self.corpus)
self.doc_list.model().set_filter_string(self.regexp_filter)
Expand All @@ -641,8 +638,7 @@ def reset_widget(self):
# Corpus
self.corpus = None
self.custom_tag_dictionary = None
self.pos_checkboxes = [self.sc, self.nc]
self.custom_tags = None
# self.pos_checkboxes = [self.sc, self.nc]
# self.tagtype_box = None
# Widgets
self.search_listbox.model().set_domain(None)
Expand Down Expand Up @@ -778,7 +774,7 @@ def show_docs(self, slider_engaged=False):
self.custom,
self.custom_tag_dictionary,
self.agent_prominence_metric,
self.agent_prominence_score_min
self.agent_prominence_score_min,
)
self.Outputs.metrics_freq_table.send(
table_from_frame(
Expand Down Expand Up @@ -897,7 +893,7 @@ def on_exception(self, ex):
raise ex

def update_info(self):
self.pos_checkboxes = [self.sc, self.nc]
# self.pos_checkboxes = [self.sc, self.nc]
if self.corpus is not None:
has_tokens = self.corpus.has_tokens()
self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.corpus)}"
Expand All @@ -911,15 +907,15 @@ def update_info(self):

@gui.deferred
def commit(self):
self.pos_checkboxes = [self.sc, self.nc]
matched = unmatched = annotated_corpus = None
# self.pos_checkboxes = [self.sc, self.nc]
# matched = unmatched = annotated_corpus = None
if self.corpus is not None:
selected_docs = sorted(self.get_selected_indexes())
matched = self.corpus[selected_docs] if selected_docs else None
# matched = self.corpus[selected_docs] if selected_docs else None
mask = np.ones(len(self.corpus), bool)
mask[selected_docs] = 0
unmatched = self.corpus[mask] if mask.any() else None
annotated_corpus = create_annotated_table(self.corpus, selected_docs)
# unmatched = self.corpus[mask] if mask.any() else None
# annotated_corpus = create_annotated_table(self.corpus, selected_docs)
# self.Outputs.matching_docs.send(matched)
# self.Outputs.other_docs.send(unmatched)
# self.Outputs.corpus.send(annotated_corpus)
Expand Down

0 comments on commit 73bb28d

Please sign in to comment.