Skip to content

Commit

Permalink
Merge pull request #29 from navigating-stories/adapting-actor-widget
Browse files Browse the repository at this point in the history
Adapting actor widget
  • Loading branch information
kodymoodley committed Jan 23, 2024
2 parents c1fa47e + cff5c24 commit 5473ad3
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 89 deletions.
36 changes: 2 additions & 34 deletions orangecontrib/storynavigation/modules/actoranalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,6 @@ def postag_text(
pos_tags.append("SP")
pos_tags.append("SNP")

print()
print()
print("pos_tags: ", pos_tags)
print()
print()
if len(pos_tags) == 0:
for sentence in sentences:
doc = {"text": sentence, "ents": []}
Expand All @@ -302,34 +297,13 @@ def postag_text(

story_elements_df = story_elements_df.copy()
story_elements_df['story_navigator_tag'] = story_elements_df['story_navigator_tag'].astype(str)
story_elements_df['spacy_tag'] = story_elements_df['spacy_tag'].astype(str)

print()
print()
print("story_elements: ", story_elements_df)
print()
print()


matched_df = story_elements_df[story_elements_df['story_navigator_tag'].isin(pos_tags) | story_elements_df['spacy_tag'].isin(pos_tags)]
print()
print()
print("matched_df1: ", matched_df)
print()
print()
story_elements_df['spacy_tag'] = story_elements_df['spacy_tag'].astype(str)
matched_df = story_elements_df[story_elements_df['story_navigator_tag'].isin(pos_tags) | story_elements_df['spacy_tag'].isin(pos_tags)]

# matched_df = matched_df.copy()
matched_df['merged_tags'] = np.where(matched_df['story_navigator_tag'] == '-', matched_df['spacy_tag'], matched_df['story_navigator_tag'])
matched_df['token_start_idx'] = matched_df['token_start_idx'].astype(str)
matched_df['token_end_idx'] = matched_df['token_end_idx'].astype(str)
matched_df['displacy_tag_strings'] = matched_df['token_start_idx'] + ' | ' + matched_df['token_end_idx'] + ' | ' + matched_df['merged_tags']

print()
print()
print("matched_df2: ", matched_df)
print()
print()

order_mapping = {value: index for index, value in enumerate(sentences)}

for sentence in sentences:
Expand All @@ -347,12 +321,6 @@ def postag_text(

ents = util.remove_duplicate_tagged_entities(ents)

# print()
# print()
# print('ents: ', ents)
# print()
# print()

doc = {"text": sentence, "ents": ents}
options = {"ents": pos_tags, "colors": constants.COLOR_MAP}
html += displacy.render(doc, style="ent", options=options, manual=True)
Expand Down
24 changes: 24 additions & 0 deletions orangecontrib/storynavigation/modules/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import spacy
import os
import string
import pandas as pd
import storynavigation.modules.constants as constants

def entity_tag_already_exists(ents, start, end):
Expand Down Expand Up @@ -135,6 +136,29 @@ def preprocess_text(text):

return cleaned_sents

def convert_orangetable_to_dataframe(table):
"""Converts an Orange Data Table object to a Pandas dataframe
Args:
table (Orange.data.Table): an Orange Data Table instance
Returns:
df (pandas.DataFrame): a pandas dataframe with the same content (info) and structure contained in the Orange Data Table
"""
# Extract attribute names, class variable name, and meta attribute names
column_names = [var.name for var in table.domain.variables]
meta_names = [meta.name for meta in table.domain.metas]

# Combine attribute and meta names
all_column_names = column_names + meta_names

# Create a list of lists representing the data
data = [[str(entry[var]) for var in table.domain.variables + table.domain.metas] for entry in table]

# Convert to a pandas DataFrame
df = pd.DataFrame(data, columns=all_column_names)

return df

def remove_span_tags(html_string):
"""Removes span tags (including content) from an HTML string
Expand Down
95 changes: 40 additions & 55 deletions orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any, Iterable, List, Set
import numpy as np
import pandas as pd
from orangecontrib.storynavigation.modules import util

# Imports from Qt
from AnyQt.QtCore import (
Expand Down Expand Up @@ -428,7 +429,6 @@ def __init__(self):
)

self.custom_tags.setEnabled(False)

self.allc.stateChanged.connect(self.on_state_changed_pos)
self.pos_checkboxes = [self.sc, self.nc, self.custom_tags]
self.controlArea.layout().addWidget(self.postags_box)
Expand Down Expand Up @@ -517,37 +517,44 @@ def __init__(self):
self.doc_list.selectionModel().selectionChanged.connect(self.selection_changed)
# Document contents
self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
# self.doc_webview.setStyleSheet("QWidget {background-color: #0ff}")
self.doc_webview.setStyleSheet("QWidget {background-color: #0ff}")
self.mainArea.layout().addWidget(self.splitter)

def on_state_changed_pos(self, state):
def __uncheckAll(self):
for checkBox in self.pos_checkboxes:
checkBox.setCheckState(False)

def __checkAll(self):
for checkBox in self.pos_checkboxes:
checkBox.setCheckState(state)
checkBox.setCheckState(True)

def on_state_changed_pos(self, checked):
for checkBox in self.pos_checkboxes:
if checkBox == self.allc:
if checkBox.isChecked() and not checked:
self.__uncheckAll()
elif (not checkBox.isChecked()) and checked:
self.__checkAll()

checkBox.setCheckState(checked)

def copy_to_clipboard(self):
text = self.doc_webview.selectedText()
print('selected text: ', text)
QApplication.clipboard().setText(text)

def pos_selection_changed(self):
self.show_docs()
self.commit.deferred()

def ner_selection_changed(self):
# self.show_docs()
self.commit.deferred()

def rehighlight_entities(self):
# self.show_docs()
self.commit.deferred()

@Inputs.stories
def set_stories(self, stories=None):
self.stories = stories
self.actortagger = ActorTagger(constants.NL_SPACY_MODEL)
if stories is not None:
self.setup_controls()
# self.openContext(self.corpus)
# self.doc_list.model().set_filter_string(self.regexp_filter)
# self.openContext(self.stories)
self.doc_list.model().set_filter_string(self.regexp_filter)
# self.select_variables()
self.list_docs()
# self.update_info()
Expand All @@ -558,39 +565,18 @@ def set_stories(self, stories=None):
@Inputs.story_elements
def set_tagging_data(self, story_elements=None):
if story_elements is not None:
self.story_elements = pd.concat(table_to_frames(story_elements), axis=1)
print()
print()
print('story-n: ', self.story_elements['story_navigator_tag'])
print()
print()
print()
print()
print('story-s: ', self.story_elements['spacy_tag'])
print()
print()


self.story_elements = util.convert_orangetable_to_dataframe(story_elements)
story_elements_grouped_by_story = self.story_elements.groupby('storyid')
for storyid, story_df in story_elements_grouped_by_story:
self.story_elements_dict[storyid] = story_df
print()
print()
print(storyid)
print('dataframe1: ', self.story_elements_dict[storyid]['story_navigator_tag'])
print('dataframe2: ', self.story_elements_dict[storyid]['spacy_tag'])
print()
print()


self.setup_controls()
# self.openContext(self.corpus)
# self.doc_list.model().set_filter_string(self.regexp_filter)
# self.openContext(self.stories)
self.doc_list.model().set_filter_string(self.regexp_filter)
# self.select_variables()
self.list_docs()
# self.update_info()
# self.set_selection()
# self.show_docs()
self.show_docs()

def reset_widget(self):
Expand All @@ -600,7 +586,6 @@ def reset_widget(self):
self.search_listbox.model().set_domain(None)
self.display_listbox.model().set_domain(None)
self.filter_input.clear()
self.update_info()
# Models/vars
self.doc_list_model.clear()
# Warnings
Expand Down Expand Up @@ -726,7 +711,7 @@ def show_docs(self, slider_engaged=False):
self.subjs,
self.agent_prominence_metric,
self.agent_prominence_score_min,
self.story_elements_dict[c_index]
self.story_elements_dict[str(c_index)]
)
self.Outputs.metrics_freq_table.send(
table_from_frame(
Expand Down Expand Up @@ -813,7 +798,7 @@ def search_features_changed(self):

def display_features_changed(self):
self.display_features = self.__get_selected_rows(self.display_listbox)
# self.show_docs()
self.show_docs()

def regenerate_docs(self) -> List[str]:
self.Warning.no_feats_search.clear()
Expand All @@ -828,13 +813,13 @@ def refresh_search(self):
# when currently selected items are filtered selection is empty
# select first element in the view in that case
self.doc_list.setCurrentIndex(self.doc_list.model().index(0, 0))
self.update_info()
# self.update_info()
self.start(
_count_matches,
self.doc_list_model.get_filter_content(),
self.regexp_filter,
)
# self.show_docs()
self.show_docs()
self.commit.deferred()

def on_done(self, res: int):
Expand All @@ -844,18 +829,18 @@ def on_done(self, res: int):
def on_exception(self, ex):
raise ex

def update_info(self):
# self.pos_checkboxes = [self.sc, self.nc]
if self.stories is not None:
has_tokens = self.stories.has_tokens()
self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.stories)}"
self.n_tokens = sum(map(len, self.stories.tokens)) if has_tokens else "n/a"
self.n_types = len(self.stories.dictionary) if has_tokens else "n/a"
else:
self.n_matching = "n/a"
self.n_matches = "n/a"
self.n_tokens = "n/a"
self.n_types = "n/a"
# def update_info(self):
# # self.pos_checkboxes = [self.sc, self.nc]
# if self.stories is not None:
# has_tokens = self.stories.has_tokens()
# self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.stories)}"
# self.n_tokens = sum(map(len, self.stories.tokens)) if has_tokens else "n/a"
# self.n_types = len(self.stories.dictionary) if has_tokens else "n/a"
# else:
# self.n_matching = "n/a"
# self.n_matches = "n/a"
# self.n_tokens = "n/a"
# self.n_types = "n/a"

@gui.deferred
def commit(self):
Expand Down

0 comments on commit 5473ad3

Please sign in to comment.