Merge pull request #29 from navigating-stories/adapting-actor-widget

Adapting actor widget
navigating-stories · Jan 23, 2024 · 5473ad3 · 5473ad3
2 parents c1fa47e + cff5c24
commit 5473ad3
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 89 deletions.
diff --git a/orangecontrib/storynavigation/modules/actoranalysis.py b/orangecontrib/storynavigation/modules/actoranalysis.py
@@ -288,11 +288,6 @@ def postag_text(
             pos_tags.append("SP")
             pos_tags.append("SNP")
 
-        print()
-        print()
-        print("pos_tags: ", pos_tags)
-        print()
-        print()
         if len(pos_tags) == 0:
             for sentence in sentences:
                 doc = {"text": sentence, "ents": []}
@@ -302,34 +297,13 @@ def postag_text(
 
         story_elements_df = story_elements_df.copy()
         story_elements_df['story_navigator_tag'] = story_elements_df['story_navigator_tag'].astype(str)
-        story_elements_df['spacy_tag'] = story_elements_df['spacy_tag'].astype(str)
-
-        print()
-        print()
-        print("story_elements: ", story_elements_df)
-        print()
-        print()        
-
-
-        matched_df = story_elements_df[story_elements_df['story_navigator_tag'].isin(pos_tags) | story_elements_df['spacy_tag'].isin(pos_tags)]
-        print()
-        print()
-        print("matched_df1: ", matched_df)
-        print()
-        print()        
+        story_elements_df['spacy_tag'] = story_elements_df['spacy_tag'].astype(str) 
+        matched_df = story_elements_df[story_elements_df['story_navigator_tag'].isin(pos_tags) | story_elements_df['spacy_tag'].isin(pos_tags)]       
 
-        # matched_df = matched_df.copy()
         matched_df['merged_tags'] = np.where(matched_df['story_navigator_tag'] == '-', matched_df['spacy_tag'], matched_df['story_navigator_tag'])
         matched_df['token_start_idx'] = matched_df['token_start_idx'].astype(str)
         matched_df['token_end_idx'] = matched_df['token_end_idx'].astype(str)
         matched_df['displacy_tag_strings'] = matched_df['token_start_idx'] + ' | ' + matched_df['token_end_idx'] + ' | ' + matched_df['merged_tags']
-
-        print()
-        print()
-        print("matched_df2: ", matched_df)
-        print()
-        print()
-
         order_mapping = {value: index for index, value in enumerate(sentences)}
 
         for sentence in sentences:
@@ -347,12 +321,6 @@ def postag_text(
 
                 ents = util.remove_duplicate_tagged_entities(ents)                
 
-            # print()
-            # print()
-            # print('ents: ', ents)
-            # print()
-            # print()
-
             doc = {"text": sentence, "ents": ents}
             options = {"ents": pos_tags, "colors": constants.COLOR_MAP}
             html += displacy.render(doc, style="ent", options=options, manual=True)

diff --git a/orangecontrib/storynavigation/modules/util.py b/orangecontrib/storynavigation/modules/util.py
@@ -5,6 +5,7 @@
 import spacy
 import os
 import string
+import pandas as pd
 import storynavigation.modules.constants as constants
 
 def entity_tag_already_exists(ents, start, end):
@@ -135,6 +136,29 @@ def preprocess_text(text):
 
     return cleaned_sents
 
+def convert_orangetable_to_dataframe(table):
+    """Converts an Orange Data Table object to a Pandas dataframe
+
+    Args:
+        table (Orange.data.Table): an Orange Data Table instance
+
+    Returns:
+        df (pandas.DataFrame): a pandas dataframe with the same content (info) and structure contained in the Orange Data Table
+    """
+    # Extract attribute names, class variable name, and meta attribute names
+    column_names = [var.name for var in table.domain.variables]
+    meta_names = [meta.name for meta in table.domain.metas]
+
+    # Combine attribute and meta names
+    all_column_names = column_names + meta_names
+
+    # Create a list of lists representing the data
+    data = [[str(entry[var]) for var in table.domain.variables + table.domain.metas] for entry in table]
+
+    # Convert to a pandas DataFrame
+    df = pd.DataFrame(data, columns=all_column_names)
+
+    return df
 
 def remove_span_tags(html_string):
     """Removes span tags (including content) from an HTML string

diff --git a/orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py b/orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py
@@ -5,6 +5,7 @@
 from typing import Any, Iterable, List, Set
 import numpy as np
 import pandas as pd
+from orangecontrib.storynavigation.modules import util
 
 # Imports from Qt
 from AnyQt.QtCore import (
@@ -428,7 +429,6 @@ def __init__(self):
         )
 
         self.custom_tags.setEnabled(False)
-
         self.allc.stateChanged.connect(self.on_state_changed_pos)
         self.pos_checkboxes = [self.sc, self.nc, self.custom_tags]
         self.controlArea.layout().addWidget(self.postags_box)
@@ -517,37 +517,44 @@ def __init__(self):
         self.doc_list.selectionModel().selectionChanged.connect(self.selection_changed)
         # Document contents
         self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
-        # self.doc_webview.setStyleSheet("QWidget {background-color:   #0ff}")
+        self.doc_webview.setStyleSheet("QWidget {background-color:   #0ff}")
         self.mainArea.layout().addWidget(self.splitter)
 
-    def on_state_changed_pos(self, state):
+    def __uncheckAll(self):
+        for checkBox in self.pos_checkboxes:
+            checkBox.setCheckState(False)
+
+    def __checkAll(self):
         for checkBox in self.pos_checkboxes:
-            checkBox.setCheckState(state)
+            checkBox.setCheckState(True)
+
+    def on_state_changed_pos(self, checked):
+        for checkBox in self.pos_checkboxes:
+            if checkBox == self.allc:
+                if checkBox.isChecked() and not checked:
+                    self.__uncheckAll()
+                elif (not checkBox.isChecked()) and checked:
+                    self.__checkAll()
+
+            checkBox.setCheckState(checked)
 
     def copy_to_clipboard(self):
         text = self.doc_webview.selectedText()
+        print('selected text: ', text)
         QApplication.clipboard().setText(text)
 
     def pos_selection_changed(self):
         self.show_docs()
         self.commit.deferred()
 
-    def ner_selection_changed(self):
-        # self.show_docs()
-        self.commit.deferred()
-
-    def rehighlight_entities(self):
-        # self.show_docs()
-        self.commit.deferred()
-
     @Inputs.stories
     def set_stories(self, stories=None):
         self.stories = stories
         self.actortagger = ActorTagger(constants.NL_SPACY_MODEL)
         if stories is not None:
             self.setup_controls()
-            # self.openContext(self.corpus)
-            # self.doc_list.model().set_filter_string(self.regexp_filter)
+            # self.openContext(self.stories)
+            self.doc_list.model().set_filter_string(self.regexp_filter)
             # self.select_variables()
             self.list_docs()
             # self.update_info()
@@ -558,39 +565,18 @@ def set_stories(self, stories=None):
     @Inputs.story_elements
     def set_tagging_data(self, story_elements=None):
         if story_elements is not None:
-            self.story_elements = pd.concat(table_to_frames(story_elements), axis=1)
-            print()
-            print()
-            print('story-n: ', self.story_elements['story_navigator_tag'])
-            print()
-            print()
-            print()
-            print()
-            print('story-s: ', self.story_elements['spacy_tag'])
-            print()
-            print()
-
-
+            self.story_elements = util.convert_orangetable_to_dataframe(story_elements)
             story_elements_grouped_by_story = self.story_elements.groupby('storyid')
             for storyid, story_df in story_elements_grouped_by_story:
                 self.story_elements_dict[storyid] = story_df
-                print()
-                print()
-                print(storyid)
-                print('dataframe1: ', self.story_elements_dict[storyid]['story_navigator_tag'])
-                print('dataframe2: ', self.story_elements_dict[storyid]['spacy_tag'])
-                print()
-                print()
-
 
             self.setup_controls()
-            # self.openContext(self.corpus)
-            # self.doc_list.model().set_filter_string(self.regexp_filter)
+            # self.openContext(self.stories)
+            self.doc_list.model().set_filter_string(self.regexp_filter)
             # self.select_variables()
             self.list_docs()
             # self.update_info()
             # self.set_selection()
-            # self.show_docs()
             self.show_docs()
 
     def reset_widget(self):
@@ -600,7 +586,6 @@ def reset_widget(self):
         self.search_listbox.model().set_domain(None)
         self.display_listbox.model().set_domain(None)
         self.filter_input.clear()
-        self.update_info()
         # Models/vars
         self.doc_list_model.clear()
         # Warnings
@@ -726,7 +711,7 @@ def show_docs(self, slider_engaged=False):
                             self.subjs,
                             self.agent_prominence_metric,
                             self.agent_prominence_score_min,
-                            self.story_elements_dict[c_index]
+                            self.story_elements_dict[str(c_index)]
                         )
                     self.Outputs.metrics_freq_table.send(
                         table_from_frame(
@@ -813,7 +798,7 @@ def search_features_changed(self):
 
     def display_features_changed(self):
         self.display_features = self.__get_selected_rows(self.display_listbox)
-        # self.show_docs()
+        self.show_docs()
 
     def regenerate_docs(self) -> List[str]:
         self.Warning.no_feats_search.clear()
@@ -828,13 +813,13 @@ def refresh_search(self):
                 # when currently selected items are filtered selection is empty
                 # select first element in the view in that case
                 self.doc_list.setCurrentIndex(self.doc_list.model().index(0, 0))
-            self.update_info()
+            # self.update_info()
             self.start(
                 _count_matches,
                 self.doc_list_model.get_filter_content(),
                 self.regexp_filter,
             )
-            # self.show_docs()
+            self.show_docs()
             self.commit.deferred()
 
     def on_done(self, res: int):
@@ -844,18 +829,18 @@ def on_done(self, res: int):
     def on_exception(self, ex):
         raise ex
 
-    def update_info(self):
-        # self.pos_checkboxes = [self.sc, self.nc]
-        if self.stories is not None:
-            has_tokens = self.stories.has_tokens()
-            self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.stories)}"
-            self.n_tokens = sum(map(len, self.stories.tokens)) if has_tokens else "n/a"
-            self.n_types = len(self.stories.dictionary) if has_tokens else "n/a"
-        else:
-            self.n_matching = "n/a"
-            self.n_matches = "n/a"
-            self.n_tokens = "n/a"
-            self.n_types = "n/a"
+    # def update_info(self):
+    #     # self.pos_checkboxes = [self.sc, self.nc]
+    #     if self.stories is not None:
+    #         has_tokens = self.stories.has_tokens()
+    #         self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.stories)}"
+    #         self.n_tokens = sum(map(len, self.stories.tokens)) if has_tokens else "n/a"
+    #         self.n_types = len(self.stories.dictionary) if has_tokens else "n/a"
+    #     else:
+    #         self.n_matching = "n/a"
+    #         self.n_matches = "n/a"
+    #         self.n_tokens = "n/a"
+    #         self.n_types = "n/a"
 
     @gui.deferred
     def commit(self):