Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapting actor widget #29

Merged
merged 2 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 2 additions & 34 deletions orangecontrib/storynavigation/modules/actoranalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,6 @@ def postag_text(
pos_tags.append("SP")
pos_tags.append("SNP")

print()
print()
print("pos_tags: ", pos_tags)
print()
print()
if len(pos_tags) == 0:
for sentence in sentences:
doc = {"text": sentence, "ents": []}
Expand All @@ -302,34 +297,13 @@ def postag_text(

story_elements_df = story_elements_df.copy()
story_elements_df['story_navigator_tag'] = story_elements_df['story_navigator_tag'].astype(str)
story_elements_df['spacy_tag'] = story_elements_df['spacy_tag'].astype(str)

print()
print()
print("story_elements: ", story_elements_df)
print()
print()


matched_df = story_elements_df[story_elements_df['story_navigator_tag'].isin(pos_tags) | story_elements_df['spacy_tag'].isin(pos_tags)]
print()
print()
print("matched_df1: ", matched_df)
print()
print()
story_elements_df['spacy_tag'] = story_elements_df['spacy_tag'].astype(str)
matched_df = story_elements_df[story_elements_df['story_navigator_tag'].isin(pos_tags) | story_elements_df['spacy_tag'].isin(pos_tags)]

# matched_df = matched_df.copy()
matched_df['merged_tags'] = np.where(matched_df['story_navigator_tag'] == '-', matched_df['spacy_tag'], matched_df['story_navigator_tag'])
matched_df['token_start_idx'] = matched_df['token_start_idx'].astype(str)
matched_df['token_end_idx'] = matched_df['token_end_idx'].astype(str)
matched_df['displacy_tag_strings'] = matched_df['token_start_idx'] + ' | ' + matched_df['token_end_idx'] + ' | ' + matched_df['merged_tags']

print()
print()
print("matched_df2: ", matched_df)
print()
print()

order_mapping = {value: index for index, value in enumerate(sentences)}

for sentence in sentences:
Expand All @@ -347,12 +321,6 @@ def postag_text(

ents = util.remove_duplicate_tagged_entities(ents)

# print()
# print()
# print('ents: ', ents)
# print()
# print()

doc = {"text": sentence, "ents": ents}
options = {"ents": pos_tags, "colors": constants.COLOR_MAP}
html += displacy.render(doc, style="ent", options=options, manual=True)
Expand Down
24 changes: 24 additions & 0 deletions orangecontrib/storynavigation/modules/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import spacy
import os
import string
import pandas as pd
import storynavigation.modules.constants as constants

def entity_tag_already_exists(ents, start, end):
Expand Down Expand Up @@ -135,6 +136,29 @@ def preprocess_text(text):

return cleaned_sents

def convert_orangetable_to_dataframe(table):
"""Converts an Orange Data Table object to a Pandas dataframe
Args:
table (Orange.data.Table): an Orange Data Table instance
Returns:
df (pandas.DataFrame): a pandas dataframe with the same content (info) and structure contained in the Orange Data Table
"""
# Extract attribute names, class variable name, and meta attribute names
column_names = [var.name for var in table.domain.variables]
meta_names = [meta.name for meta in table.domain.metas]

# Combine attribute and meta names
all_column_names = column_names + meta_names

# Create a list of lists representing the data
data = [[str(entry[var]) for var in table.domain.variables + table.domain.metas] for entry in table]

# Convert to a pandas DataFrame
df = pd.DataFrame(data, columns=all_column_names)

return df

def remove_span_tags(html_string):
"""Removes span tags (including content) from an HTML string
Expand Down
95 changes: 40 additions & 55 deletions orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any, Iterable, List, Set
import numpy as np
import pandas as pd
from orangecontrib.storynavigation.modules import util

# Imports from Qt
from AnyQt.QtCore import (
Expand Down Expand Up @@ -428,7 +429,6 @@ def __init__(self):
)

self.custom_tags.setEnabled(False)

self.allc.stateChanged.connect(self.on_state_changed_pos)
self.pos_checkboxes = [self.sc, self.nc, self.custom_tags]
self.controlArea.layout().addWidget(self.postags_box)
Expand Down Expand Up @@ -517,37 +517,44 @@ def __init__(self):
self.doc_list.selectionModel().selectionChanged.connect(self.selection_changed)
# Document contents
self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
# self.doc_webview.setStyleSheet("QWidget {background-color: #0ff}")
self.doc_webview.setStyleSheet("QWidget {background-color: #0ff}")
self.mainArea.layout().addWidget(self.splitter)

def on_state_changed_pos(self, state):
def __uncheckAll(self):
for checkBox in self.pos_checkboxes:
checkBox.setCheckState(False)

def __checkAll(self):
for checkBox in self.pos_checkboxes:
checkBox.setCheckState(state)
checkBox.setCheckState(True)

def on_state_changed_pos(self, checked):
for checkBox in self.pos_checkboxes:
if checkBox == self.allc:
if checkBox.isChecked() and not checked:
self.__uncheckAll()
elif (not checkBox.isChecked()) and checked:
self.__checkAll()

checkBox.setCheckState(checked)

def copy_to_clipboard(self):
text = self.doc_webview.selectedText()
print('selected text: ', text)
QApplication.clipboard().setText(text)

def pos_selection_changed(self):
self.show_docs()
self.commit.deferred()

def ner_selection_changed(self):
# self.show_docs()
self.commit.deferred()

def rehighlight_entities(self):
# self.show_docs()
self.commit.deferred()

@Inputs.stories
def set_stories(self, stories=None):
self.stories = stories
self.actortagger = ActorTagger(constants.NL_SPACY_MODEL)
if stories is not None:
self.setup_controls()
# self.openContext(self.corpus)
# self.doc_list.model().set_filter_string(self.regexp_filter)
# self.openContext(self.stories)
self.doc_list.model().set_filter_string(self.regexp_filter)
# self.select_variables()
self.list_docs()
# self.update_info()
Expand All @@ -558,39 +565,18 @@ def set_stories(self, stories=None):
@Inputs.story_elements
def set_tagging_data(self, story_elements=None):
if story_elements is not None:
self.story_elements = pd.concat(table_to_frames(story_elements), axis=1)
print()
print()
print('story-n: ', self.story_elements['story_navigator_tag'])
print()
print()
print()
print()
print('story-s: ', self.story_elements['spacy_tag'])
print()
print()


self.story_elements = util.convert_orangetable_to_dataframe(story_elements)
story_elements_grouped_by_story = self.story_elements.groupby('storyid')
for storyid, story_df in story_elements_grouped_by_story:
self.story_elements_dict[storyid] = story_df
print()
print()
print(storyid)
print('dataframe1: ', self.story_elements_dict[storyid]['story_navigator_tag'])
print('dataframe2: ', self.story_elements_dict[storyid]['spacy_tag'])
print()
print()


self.setup_controls()
# self.openContext(self.corpus)
# self.doc_list.model().set_filter_string(self.regexp_filter)
# self.openContext(self.stories)
self.doc_list.model().set_filter_string(self.regexp_filter)
# self.select_variables()
self.list_docs()
# self.update_info()
# self.set_selection()
# self.show_docs()
self.show_docs()

def reset_widget(self):
Expand All @@ -600,7 +586,6 @@ def reset_widget(self):
self.search_listbox.model().set_domain(None)
self.display_listbox.model().set_domain(None)
self.filter_input.clear()
self.update_info()
# Models/vars
self.doc_list_model.clear()
# Warnings
Expand Down Expand Up @@ -726,7 +711,7 @@ def show_docs(self, slider_engaged=False):
self.subjs,
self.agent_prominence_metric,
self.agent_prominence_score_min,
self.story_elements_dict[c_index]
self.story_elements_dict[str(c_index)]
)
self.Outputs.metrics_freq_table.send(
table_from_frame(
Expand Down Expand Up @@ -813,7 +798,7 @@ def search_features_changed(self):

def display_features_changed(self):
self.display_features = self.__get_selected_rows(self.display_listbox)
# self.show_docs()
self.show_docs()

def regenerate_docs(self) -> List[str]:
self.Warning.no_feats_search.clear()
Expand All @@ -828,13 +813,13 @@ def refresh_search(self):
# when currently selected items are filtered selection is empty
# select first element in the view in that case
self.doc_list.setCurrentIndex(self.doc_list.model().index(0, 0))
self.update_info()
# self.update_info()
self.start(
_count_matches,
self.doc_list_model.get_filter_content(),
self.regexp_filter,
)
# self.show_docs()
self.show_docs()
self.commit.deferred()

def on_done(self, res: int):
Expand All @@ -844,18 +829,18 @@ def on_done(self, res: int):
def on_exception(self, ex):
raise ex

def update_info(self):
# self.pos_checkboxes = [self.sc, self.nc]
if self.stories is not None:
has_tokens = self.stories.has_tokens()
self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.stories)}"
self.n_tokens = sum(map(len, self.stories.tokens)) if has_tokens else "n/a"
self.n_types = len(self.stories.dictionary) if has_tokens else "n/a"
else:
self.n_matching = "n/a"
self.n_matches = "n/a"
self.n_tokens = "n/a"
self.n_types = "n/a"
# def update_info(self):
# # self.pos_checkboxes = [self.sc, self.nc]
# if self.stories is not None:
# has_tokens = self.stories.has_tokens()
# self.n_matching = f"{self.doc_list.model().rowCount()}/{len(self.stories)}"
# self.n_tokens = sum(map(len, self.stories.tokens)) if has_tokens else "n/a"
# self.n_types = len(self.stories.dictionary) if has_tokens else "n/a"
# else:
# self.n_matching = "n/a"
# self.n_matches = "n/a"
# self.n_tokens = "n/a"
# self.n_types = "n/a"

@gui.deferred
def commit(self):
Expand Down
Loading