Skip to content

Commit

Permalink
Merge pull request #68 from navigating-stories/issue54
Browse files Browse the repository at this point in the history
 issue54, changed ids to numeric
  • Loading branch information
ThijsVroegh committed Jun 21, 2024
2 parents 158c987 + 4df39fa commit 4d49b87
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions orangecontrib/storynavigation/modules/actoranalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

from __future__ import annotations


import os
import sys
import pandas as pd
import numpy as np
import storynavigation.modules.constants as constants
Expand Down Expand Up @@ -89,8 +89,9 @@ def __filter_and_sort_matched_dataframe_by_sentence(self, df, sent, sents):
matched_sent_df = df[df['sentence'] == sent]
matched_sent_df = matched_sent_df.copy()
matched_sent_df.loc[:, 'sorting_key'] = matched_sent_df['sentence'].map(lambda value: order_mapping.get(value, len(sents)))
matched_sent_df_sorted = matched_sent_df.sort_values(by='sorting_key').drop('sorting_key', axis=1)
matched_sent_df_sorted = matched_sent_df.sort_values(by='sorting_key').drop('sorting_key', axis=1)
return matched_sent_df_sorted


def __do_custom_tagging(self, df, cust_tag_cols):
df = df.copy()
Expand Down Expand Up @@ -145,15 +146,15 @@ def __postag_sents(
return self.__print_html_no_highlighted_tokens(sentences)

matched_df = None

for sentence in sentences:
ents = []
if custom:
nents = []
cents = []
new_color_map = constants.COLOR_MAP
if nouns or subjs:
matched_df = self.__filter_rows(story_elements_df, pos_tags)
matched_df = self.__filter_rows(story_elements_df, pos_tags)
matched_sent_df_sorted = self.__filter_and_sort_matched_dataframe_by_sentence(matched_df, sentence, sentences)
matched_sent_df_sorted['displacy_tag_strings'] = matched_sent_df_sorted['token_start_idx'] + ' | ' + matched_sent_df_sorted['token_end_idx'] + ' | ' + matched_sent_df_sorted['merged_tags']
nents = self.__do_tagging(matched_sent_df_sorted)
Expand All @@ -172,6 +173,7 @@ def __postag_sents(
else:
matched_df = self.__filter_rows(story_elements_df, pos_tags)
matched_sent_df_sorted = self.__filter_and_sort_matched_dataframe_by_sentence(matched_df, sentence, sentences)

matched_sent_df_sorted['displacy_tag_strings'] = matched_sent_df_sorted['token_start_idx'] + ' | ' + matched_sent_df_sorted['token_end_idx'] + ' | ' + matched_sent_df_sorted['merged_tags']
ents = self.__do_tagging(matched_sent_df_sorted)
options = {"ents": pos_tags, "colors": constants.COLOR_MAP}
Expand All @@ -185,6 +187,8 @@ def __postag_sents(

doc = {"text": sentence, "ents": ents}
html += displacy.render(doc, style="ent", options=options, manual=True)



if custom:
self.html_result = util.remove_span_tags_except_custom(html)
Expand All @@ -210,13 +214,17 @@ def postag_text(
Returns:
string: HTML string representation of POS tagged text
"""

if story_elements_df is None or (len(story_elements_df) == 0):
sentences = util.preprocess_text(text)
return self.__print_html_no_highlighted_tokens(sentences)

story_elements_df['sentence_id'] = pd.to_numeric(story_elements_df['sentence_id'])
story_elements_df['storyid'] = pd.to_numeric(story_elements_df['storyid'])
sorted_df = story_elements_df.sort_values(by=['storyid', 'sentence_id'], ascending=True)
sentences = sorted_df['sentence'].unique().tolist()

sentences_df = sorted_df[['sentence','storyid','sentence_id']].drop_duplicates()
sentences = sentences_df['sentence'].tolist()

selected_storyid = story_elements_df['storyid'].unique().tolist()[0]
specific_tag_choice_html = (str(int(nouns)) + str(int(subjs)) + str(int(custom)))
if selected_storyid in self.tagging_cache:
Expand Down

0 comments on commit 4d49b87

Please sign in to comment.