fixes bug which causes no updating or refreshing of custom tags in th…

…e tagger when a new CSV dictionary is loaded
navigating-stories · Jan 9, 2024 · 73bb28d · 73bb28d
1 parent bcd4bcc
commit 73bb28d
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 97 deletions.
diff --git a/orangecontrib/storynavigation/modules/actionanalysis.py b/orangecontrib/storynavigation/modules/actionanalysis.py
@@ -171,42 +171,19 @@ def postag_text(self, text, past_vbz, present_vbz):
                 normalised_token, is_valid_token = self.__is_valid_token(tag)
                 if is_valid_token:
                     if ((tag[4].text.lower().strip() in self.past_tense_verbs) or (tag[4].text.lower().strip()[:2] == "ge")) and (tag[4].text.lower().strip() not in self.false_positive_verbs):  # past tense
-                    # if tag[4].pos_ == "VERB":
-                        # if (tag[4].text.lower().strip() in self.past_tense_verbs) or (tag[4].text.lower().strip()[:2] == "ge"):  # past tense
-                        # vb_tense = tag[4].morph.get("Tense")
-                        # if vb_tense == "Past":
                         ents.append(
                             {"start": span[0], "end": span[1], "label": "PAST_VB"}
                         )
-                        # elif vb_tense == "Pres":
                     else:
-                        if (tag[4].pos_ == "VERB") and (tag[4].text.lower().strip() not in self.false_positive_verbs):
-                        # elif tag[4].text.lower().strip() in self.present_tense_verbs:
+                        if (tag[4].pos_ == "VERB") and (tag[4].text.lower().strip() not in self.false_positive_verbs):  # present tense
                             ents.append(
                                 {"start": span[0], "end": span[1], "label": "PRES_VB"}
                             )
-                        # else:
-                        #     if tag[4].text.lower().strip()[:2] == "ge":  # past tense
-                        #         ents.append(
-                        #             {
-                        #                 "start": span[0],
-                        #                 "end": span[1],
-                        #                 "label": "PAST_VB",
-                        #             }
-                        #         )
-                        #     else:
-                        #         ents.append(
-                        #             {
-                        #                 "start": span[0],
-                        #                 "end": span[1],
-                        #                 "label": "PRES_VB",
-                        #             }
-                        #         )
-
-                    # elif tag[4].pos_ in ["NOUN", "PRON", "PROPN"]:
-                    #     self.__update_postagging_metrics(
-                    #         tag[4].text.lower().strip(), tag[4]
-                    #     )
+
+                        elif tag[4].pos_ in ["NOUN", "PRON", "PROPN"]: # non-verbs (for noun-action table)
+                            self.__update_postagging_metrics(
+                                tag[4].text.lower().strip(), tag[4]
+                            )
 
             # specify sentences and filtered entities to tag / highlight
             doc = {"text": sentence, "ents": ents}
@@ -357,34 +334,6 @@ def generate_noun_action_table(self):
 
         return pd.DataFrame(rows, columns=["actor", "actions"])
 
-    # def generate_halliday_action_counts_table(self, text, dim_type="realm"):
-    #     rows = []
-
-    #     # Valid values for 'dim_type' parameter: realm, process, prosub, sub\
-    #     halliday_fname = constants.HALLIDAY_FILENAME.format(dim_type)
-    #     # halliday_fname = "halliday_dimensions_" + dim_type + ".json"
-    #     RESOURCES = ActionTagger.PKG / constants.RESOURCES_SUBPACKAGE
-    #     json_file = RESOURCES.joinpath(halliday_fname).open("r", encoding="utf8")
-    #     halliday_dict = json.load(json_file)
-
-    #     # Calculate the number of story words in each halliday dimension
-    #     words = text.split()
-    #     halliday_counts = {}
-    #     for item in halliday_dict:
-    #         halliday_counts[item] = 0
-
-    #     for word in words:
-    #         processed_word = word.lower().strip()
-    #         for item in halliday_dict:
-    #             if processed_word in halliday_dict[item]:
-    #                 halliday_counts[item] += 1
-
-    #     for item in halliday_dict:
-    #         rows.append([item, halliday_counts[item]])
-
-    #     return pd.DataFrame(rows, columns=["action", "frequency"])
-
-
 class ActionMetricCalculator:
     """Unused class / code so far..."""
 

diff --git a/orangecontrib/storynavigation/widgets/OWSNActionAnalysis.py b/orangecontrib/storynavigation/widgets/OWSNActionAnalysis.py
@@ -321,12 +321,11 @@ class Inputs:
         # corpus = Input("Corpus", Corpus, replaces=["Data"])
 
     class Outputs:
-        matching_docs = Output("Matching Docs", Corpus, default=True)
-        other_docs = Output("Other Docs", Corpus)
-        corpus = Output("Corpus", Corpus)
+        # matching_docs = Output("Matching Docs", Corpus, default=True)
+        # other_docs = Output("Other Docs", Corpus)
+        # corpus = Output("Corpus", Corpus)
         metrics_freq_table = Output("Frequency", Table)
         metrics_tensefreq_table = Output("Tense frequency", Table)
-        # halliday_actions_table = Output("Halliday action counts", Table)
         actor_action_table = Output("Actor action table", Table)
 
     settingsHandler = DomainContextHandler()
@@ -731,17 +730,17 @@ def update_info(self):
 
     @gui.deferred
     def commit(self):
-        matched = unmatched = annotated_corpus = None
+        # matched = unmatched = annotated_corpus = None
         if self.corpus is not None:
             selected_docs = sorted(self.get_selected_indexes())
-            matched = self.corpus[selected_docs] if selected_docs else None
+            # matched = self.corpus[selected_docs] if selected_docs else None
             mask = np.ones(len(self.corpus), bool)
             mask[selected_docs] = 0
-            unmatched = self.corpus[mask] if mask.any() else None
-            annotated_corpus = create_annotated_table(self.corpus, selected_docs)
-        self.Outputs.matching_docs.send(matched)
-        self.Outputs.other_docs.send(unmatched)
-        self.Outputs.corpus.send(annotated_corpus)
+            # unmatched = self.corpus[mask] if mask.any() else None
+            # annotated_corpus = create_annotated_table(self.corpus, selected_docs)
+        # self.Outputs.matching_docs.send(matched)
+        # self.Outputs.other_docs.send(unmatched)
+        # self.Outputs.corpus.send(annotated_corpus)
 
     def send_report(self):
         self.report_items(

diff --git a/orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py b/orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py
@@ -330,9 +330,6 @@ class Inputs:
         word_dict = Input("Token categories", Table)
 
     class Outputs:
-        # matching_docs = Output("Matching Docs", Corpus, default=True)
-        # other_docs = Output("Other Docs", Corpus)
-        # corpus = Output("Corpus", Corpus)
         metrics_freq_table = Output("Frequency", Table)
         metrics_subfreq_table = Output("Frequency as subject", Table)
         metrics_customfreq_table = Output("Custom token frequency", Table)
@@ -388,7 +385,6 @@ def __init__(self):
         self.corpus = None  # initialise list of documents (corpus)
         self.word_dict = None  # initialise word dictionary
         self.custom_tag_dictionary = None
-        self.custom_tags = None
         self.__pending_selected_documents = self.selected_documents
 
         # Search features
@@ -424,8 +420,19 @@ def __init__(self):
         )
         self.allc = gui.checkBox(self.postags_box, self, "all_pos", "All")
         self.allc.setChecked(False)
+
+        self.custom_tags = gui.checkBox(
+            self.postags_box,
+            self,
+            "custom",
+            "Custom tokens",
+            callback=self.pos_selection_changed,
+        )
+
+        self.custom_tags.setEnabled(False)
+
         self.allc.stateChanged.connect(self.on_state_changed_pos)
-        self.pos_checkboxes = [self.sc, self.nc]
+        self.pos_checkboxes = [self.sc, self.nc, self.custom_tags]
         self.controlArea.layout().addWidget(self.postags_box)
 
         # Prominence score slider
@@ -536,6 +543,7 @@ def rehighlight_entities(self):
         self.commit.deferred()
 
     def __create_customtag_checkbox(self, wd):
+        self.custom_tags.setEnabled(True)
         # extract all categorisations in the input dictionary
         list_of_lists_categories = []
         if len(wd.columns) >= 2: 
@@ -553,15 +561,6 @@ def __create_customtag_checkbox(self, wd):
                     if len(current_dict_values) > 0:
                         self.custom_tag_dictionary[category] = current_dict_values
 
-        if self.custom_tags not in self.pos_checkboxes:
-            self.custom_tags = gui.checkBox(
-                self.postags_box,
-                self,
-                "custom",
-                "Custom tokens",
-                callback=self.pos_selection_changed,
-            )
-
     @Inputs.corpus
     def set_data(self, corpus=None):
         self.actortagger = ActorTagger(constants.NL_SPACY_MODEL)
@@ -593,10 +592,9 @@ def set_word_dict(self, word_dict=None):
                         rows.append(item.metas)
 
                     self.word_dict = pd.DataFrame(rows[1:], index=None)
-                    if self.custom_tags is None:
-                        self.__create_customtag_checkbox(self.word_dict)
+                    self.__create_customtag_checkbox(self.word_dict)
 
-                if self.corpus is not None and word_dict is not None:
+                if self.corpus is not None:
                     self.setup_controls()
                     self.openContext(self.corpus)
                     self.doc_list.model().set_filter_string(self.regexp_filter)
@@ -622,10 +620,9 @@ def set_word_dict(self, word_dict=None):
                         rows.append(item.metas)
 
                     self.word_dict = pd.DataFrame(rows[1:], index=None)
-                    if self.custom_tags is None:
-                        self.__create_customtag_checkbox(self.word_dict)
+                    self.__create_customtag_checkbox(self.word_dict)
 
-                    if self.corpus is not None and word_dict is not None:
+                    if self.corpus is not None:
                         self.setup_controls()
                         self.openContext(self.corpus)
                         self.doc_list.model().set_filter_string(self.regexp_filter)
@@ -641,8 +638,7 @@ def reset_widget(self):
         # Corpus
         self.corpus = None
         self.custom_tag_dictionary = None
-        self.pos_checkboxes = [self.sc, self.nc]
-        self.custom_tags = None
+        # self.pos_checkboxes = [self.sc, self.nc]
         # self.tagtype_box = None
         # Widgets
         self.search_listbox.model().set_domain(None)
@@ -778,7 +774,7 @@ def show_docs(self, slider_engaged=False):
                         self.custom,
                         self.custom_tag_dictionary,
                         self.agent_prominence_metric,
-                        self.agent_prominence_score_min
+                        self.agent_prominence_score_min,
                     )
                     self.Outputs.metrics_freq_table.send(
                         table_from_frame(
@@ -897,7 +893,7 @@ def on_exception(self, ex):
         raise ex
 
     def update_info(self):
-        self.pos_checkboxes = [self.sc, self.nc]
+        # self.pos_checkboxes = [self.sc, self.nc]
         if self.corpus is not None:
             has_tokens = self.corpus.has_tokens()
             self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.corpus)}"
@@ -911,15 +907,15 @@ def update_info(self):
 
     @gui.deferred
     def commit(self):
-        self.pos_checkboxes = [self.sc, self.nc]
-        matched = unmatched = annotated_corpus = None
+        # self.pos_checkboxes = [self.sc, self.nc]
+        # matched = unmatched = annotated_corpus = None
         if self.corpus is not None:
             selected_docs = sorted(self.get_selected_indexes())
-            matched = self.corpus[selected_docs] if selected_docs else None
+            # matched = self.corpus[selected_docs] if selected_docs else None
             mask = np.ones(len(self.corpus), bool)
             mask[selected_docs] = 0
-            unmatched = self.corpus[mask] if mask.any() else None
-            annotated_corpus = create_annotated_table(self.corpus, selected_docs)
+            # unmatched = self.corpus[mask] if mask.any() else None
+            # annotated_corpus = create_annotated_table(self.corpus, selected_docs)
         # self.Outputs.matching_docs.send(matched)
         # self.Outputs.other_docs.send(unmatched)
         # self.Outputs.corpus.send(annotated_corpus)