Merge pull request #68 from navigating-stories/issue54

issue54, changed ids to numeric
navigating-stories · Jun 21, 2024 · 4d49b87 · 4d49b87
2 parents 158c987 + 4df39fa
commit 4d49b87
Showing 1 changed file with 14 additions and 6 deletions.
diff --git a/orangecontrib/storynavigation/modules/actoranalysis.py b/orangecontrib/storynavigation/modules/actoranalysis.py
@@ -3,8 +3,8 @@
 
 from __future__ import annotations
 
-
 import os
+import sys
 import pandas as pd
 import numpy as np
 import storynavigation.modules.constants as constants
@@ -89,8 +89,9 @@ def __filter_and_sort_matched_dataframe_by_sentence(self, df, sent, sents):
         matched_sent_df = df[df['sentence'] == sent]
         matched_sent_df = matched_sent_df.copy()
         matched_sent_df.loc[:, 'sorting_key'] = matched_sent_df['sentence'].map(lambda value: order_mapping.get(value, len(sents)))
-        matched_sent_df_sorted = matched_sent_df.sort_values(by='sorting_key').drop('sorting_key', axis=1)
+        matched_sent_df_sorted = matched_sent_df.sort_values(by='sorting_key').drop('sorting_key', axis=1)        
         return matched_sent_df_sorted
+
 
     def __do_custom_tagging(self, df, cust_tag_cols):
         df = df.copy()
@@ -145,15 +146,15 @@ def __postag_sents(
             return self.__print_html_no_highlighted_tokens(sentences)
 
         matched_df = None
-
+        
         for sentence in sentences:
             ents = []
             if custom:
                 nents = []
                 cents = []
                 new_color_map = constants.COLOR_MAP
                 if nouns or subjs:
-                    matched_df = self.__filter_rows(story_elements_df, pos_tags)
+                    matched_df = self.__filter_rows(story_elements_df, pos_tags)                    
                     matched_sent_df_sorted = self.__filter_and_sort_matched_dataframe_by_sentence(matched_df, sentence, sentences)
                     matched_sent_df_sorted['displacy_tag_strings'] = matched_sent_df_sorted['token_start_idx'] + ' | ' + matched_sent_df_sorted['token_end_idx'] + ' | ' + matched_sent_df_sorted['merged_tags']
                     nents = self.__do_tagging(matched_sent_df_sorted)
@@ -172,6 +173,7 @@ def __postag_sents(
             else:
                 matched_df = self.__filter_rows(story_elements_df, pos_tags)
                 matched_sent_df_sorted = self.__filter_and_sort_matched_dataframe_by_sentence(matched_df, sentence, sentences)
+
                 matched_sent_df_sorted['displacy_tag_strings'] = matched_sent_df_sorted['token_start_idx'] + ' | ' + matched_sent_df_sorted['token_end_idx'] + ' | ' + matched_sent_df_sorted['merged_tags']
                 ents = self.__do_tagging(matched_sent_df_sorted)
                 options = {"ents": pos_tags, "colors": constants.COLOR_MAP}
@@ -185,6 +187,8 @@ def __postag_sents(
 
             doc = {"text": sentence, "ents": ents}    
             html += displacy.render(doc, style="ent", options=options, manual=True)
+
+
 
         if custom:
             self.html_result = util.remove_span_tags_except_custom(html)
@@ -210,13 +214,17 @@ def postag_text(
         Returns:
             string: HTML string representation of POS tagged text
         """
+
         if story_elements_df is None or (len(story_elements_df) == 0):
             sentences = util.preprocess_text(text)
             return self.__print_html_no_highlighted_tokens(sentences)
 
+        story_elements_df['sentence_id'] = pd.to_numeric(story_elements_df['sentence_id'])
+        story_elements_df['storyid'] = pd.to_numeric(story_elements_df['storyid'])
         sorted_df = story_elements_df.sort_values(by=['storyid', 'sentence_id'], ascending=True)
-        sentences = sorted_df['sentence'].unique().tolist()
-
+        sentences_df = sorted_df[['sentence','storyid','sentence_id']].drop_duplicates()
+        sentences = sentences_df['sentence'].tolist()      
+
         selected_storyid = story_elements_df['storyid'].unique().tolist()[0]
         specific_tag_choice_html = (str(int(nouns)) + str(int(subjs)) + str(int(custom)))
         if selected_storyid in self.tagging_cache: