remove old function

navigating-stories · Jan 8, 2024 · 4f2f4a9 · 4f2f4a9
1 parent 8b44579
commit 4f2f4a9
Showing 1 changed file with 0 additions and 175 deletions.
diff --git a/orangecontrib/storynavigation/modules/actoranalysis.py b/orangecontrib/storynavigation/modules/actoranalysis.py
@@ -254,181 +254,6 @@ def __get_custom_tags_list(self, custom_dict):
             result.append(token.upper())
         return result
 
-    def postag_text(
-        self, text, nouns, subjs, custom, custom_dict, selected_prominence_metric, prominence_score_min
-    ):
-        self.custom_category_frequencies = {}
-
-        # print()
-        # print()
-        # print(custom_dict)
-        # print()
-        # print()
-
-        """POS-tags story text and returns HTML string which encodes the the tagged text, ready for rendering in the UI
-
-        Args:
-            text (string): Story text
-            nouns (boolean): whether noun tokens should be tagged
-            subjs (boolean): whether subject tokens should be tagged
-            selected_prominence_metric: the selected metric by which to calculate the word prominence score
-
-        Returns:
-            string: HTML string representation of POS tagged text
-        """
-        # print()
-        # print('text:')
-        # print(text)
-        # print()
-
-        text = re.sub(";", ".", text) # this is only for the test cases that have no "." 
-        # -> otherwise, we have only one sentence after the next step
-        sentences = util.preprocess_text(text)
-
-        # print('sentences:')
-        # print(sentences)
-        # print()
-
-        self.__calculate_pretagging_metrics(sentences)
-
-        # pos tags that the user wants to highlight
-        pos_tags = []
-        custom_tag_labels = []
-        if nouns:
-            pos_tags.append("NOUN")
-            pos_tags.append("PRON")
-            pos_tags.append("PROPN")
-            pos_tags.append("NSP")
-            pos_tags.append("NSNP")
-        if subjs:
-            pos_tags.append("SUBJ")
-            pos_tags.append("SP")
-            pos_tags.append("SNP")
-        if custom:
-            if custom_dict is not None:
-                custom_tag_labels = self.__get_custom_tags_list(custom_dict)
-                pos_tags.extend(custom_tag_labels)
-
-        # output of this function
-        html = ""
-        # breakpoint()
-        logging.debug("vars(self).keys: %s", vars(self).keys())
-
-        # generate and store nlp tagged models for each sentence
-        if self.sentence_nlp_models is None or len(self.sentence_nlp_models) == 0:
-            for sentence in sentences:	
-                tagged_sentence = self.nlp(sentence.replace("`", "").replace("'", "").replace("‘", "").replace("’", ""))
-                self.sentence_nlp_models.append(tagged_sentence)
-
-            self.__calculate_word_type_count(sentences, self.sentence_nlp_models)
-
-        logging.debug("vars(self).keys: %s", vars(self).keys())
-        breakpoint()
-
-        # loop through model to filter out those words that need to be tagged (based on user selection and prominence score)
-        for sentence, tagged_sentence in zip(sentences, self.sentence_nlp_models):
-            if len(sentence.split()) > 0: # sentence has at least one word in it
-                breakpoint()
-                first_word_in_sent = sentence.split()[0].lower().strip()
-                tags = []
-                tokenizer = RegexpTokenizer(r"\w+|\$[\d\.]+|\S+")
-                spans = list(tokenizer.span_tokenize(sentence))
-
-                for token in tagged_sentence:
-                    tags.append((token.text, token.pos_, token.tag_, token.dep_, token))
-
-                # identify and tag custom words in the story text
-                ents = []
-                if custom_dict is not None:
-                    custom_matched_tags = self.__find_custom_word_matches(custom_dict, sentence)
-                    for matched_tag in custom_matched_tags:
-                        ents.append(matched_tag)
-
-                # identify and tag POS / NER tokens in the story text
-                for tag, span in zip(tags, spans):
-                    normalised_token, is_valid_token = self.__is_valid_token(tag)
-                    if is_valid_token:
-                        is_subj, subj_type = self.__is_subject(tag)
-                        if is_subj:
-                            p_score_greater_than_min = self.__update_postagging_metrics(
-                                tag[0].lower().strip(),
-                                selected_prominence_metric,
-                                prominence_score_min,
-                                token,
-                            )
-                            if p_score_greater_than_min:
-                                if self.__is_pronoun(tag):
-                                    ents.append(
-                                        {"start": span[0], "end": span[1], "label": "SP"}
-                                    )
-                                else:
-                                    ents.append(
-                                        {"start": span[0], "end": span[1], "label": "SNP"}
-                                    )
-                        else:
-                            if self.__is_pronoun(tag):
-                                ents.append(
-                                    {"start": span[0], "end": span[1], "label": "NSP"}
-                                )
-                            elif self.__is_noun_but_not_pronoun(tag):
-                                ents.append(
-                                    {"start": span[0], "end": span[1], "label": "NSNP"}
-                                )
-
-                if any(word == first_word_in_sent for word in self.pronouns):
-                    p_score_greater_than_min = self.__update_postagging_metrics(
-                        first_word_in_sent,
-                        selected_prominence_metric,
-                        prominence_score_min,
-                        token,
-                    )
-
-                    if p_score_greater_than_min:
-                        ents.append(
-                            {"start": 0, "end": len(first_word_in_sent), "label": "SP"}
-                        )
-
-                    if first_word_in_sent in self.passive_agency_scores:
-                        self.passive_agency_scores[first_word_in_sent] += 1
-                    else:
-                        self.passive_agency_scores[first_word_in_sent] = 1
-
-                    # if first_word_in_sent not in self.active_agency_scores:
-                    #     self.active_agency_scores[first_word_in_sent] = 0
-
-                # remove duplicate tags (sometimes one entity can fall under multiple tag categories.
-                # to avoid duplication, only tag each entity using ONE tag category.
-                ents = util.remove_duplicate_tagged_entities(ents)
-                # specify sentences and filtered entities to tag / highlight
-                doc = {"text": sentence, "ents": ents}
-
-                # specify colors for highlighting each entity type
-                colors = {}
-                if nouns:
-                    colors["NSP"] = constants.NONSUBJECT_PRONOUN_HIGHLIGHT_COLOR
-                    colors["NSNP"] = constants.NONSUBJECT_NONPRONOUN_HIGHLIGHT_COLOR
-                if subjs:
-                    colors["SP"] = constants.SUBJECT_PRONOUN_HIGHLIGHT_COLOR
-                    colors["SNP"] = constants.SUBJECT_NONPRONOUN_HIGHLIGHT_COLOR
-                if custom:
-                    for custom_label in custom_tag_labels:
-                        colors[custom_label] = constants.CUSTOMTAG_HIGHLIGHT_COLOR
-
-                self.agent_prominence_score_max = self.__get_max_prominence_score()
-                # TODO (NOW/PR): re-calculate this based on the dataframe
-                # collect the above config params together
-                options = {"ents": pos_tags, "colors": colors}
-                # give all the params to displacy to generate HTML code of the text with highlighted tags
-                html += displacy.render(doc, style="ent", options=options, manual=True)
-
-        self.html_result = html
-
-        # return html
-        if custom:
-            return util.remove_span_tags_except_custom(html)
-        else:
-            return util.remove_span_tags(html)
-
 
     def postag_text_to_table(
             self, text, custom, custom_dict