Skip to content

Commit

Permalink
remove old function
Browse files Browse the repository at this point in the history
  • Loading branch information
f-hafner committed Jan 8, 2024
1 parent 8b44579 commit 4f2f4a9
Showing 1 changed file with 0 additions and 175 deletions.
175 changes: 0 additions & 175 deletions orangecontrib/storynavigation/modules/actoranalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,181 +254,6 @@ def __get_custom_tags_list(self, custom_dict):
result.append(token.upper())
return result

def postag_text(
self, text, nouns, subjs, custom, custom_dict, selected_prominence_metric, prominence_score_min
):
self.custom_category_frequencies = {}

# print()
# print()
# print(custom_dict)
# print()
# print()

"""POS-tags story text and returns HTML string which encodes the the tagged text, ready for rendering in the UI
Args:
text (string): Story text
nouns (boolean): whether noun tokens should be tagged
subjs (boolean): whether subject tokens should be tagged
selected_prominence_metric: the selected metric by which to calculate the word prominence score
Returns:
string: HTML string representation of POS tagged text
"""
# print()
# print('text:')
# print(text)
# print()

text = re.sub(";", ".", text) # this is only for the test cases that have no "."
# -> otherwise, we have only one sentence after the next step
sentences = util.preprocess_text(text)

# print('sentences:')
# print(sentences)
# print()

self.__calculate_pretagging_metrics(sentences)

# pos tags that the user wants to highlight
pos_tags = []
custom_tag_labels = []
if nouns:
pos_tags.append("NOUN")
pos_tags.append("PRON")
pos_tags.append("PROPN")
pos_tags.append("NSP")
pos_tags.append("NSNP")
if subjs:
pos_tags.append("SUBJ")
pos_tags.append("SP")
pos_tags.append("SNP")
if custom:
if custom_dict is not None:
custom_tag_labels = self.__get_custom_tags_list(custom_dict)
pos_tags.extend(custom_tag_labels)

# output of this function
html = ""
# breakpoint()
logging.debug("vars(self).keys: %s", vars(self).keys())

# generate and store nlp tagged models for each sentence
if self.sentence_nlp_models is None or len(self.sentence_nlp_models) == 0:
for sentence in sentences:
tagged_sentence = self.nlp(sentence.replace("`", "").replace("'", "").replace("‘", "").replace("’", ""))
self.sentence_nlp_models.append(tagged_sentence)

self.__calculate_word_type_count(sentences, self.sentence_nlp_models)

logging.debug("vars(self).keys: %s", vars(self).keys())
breakpoint()

# loop through model to filter out those words that need to be tagged (based on user selection and prominence score)
for sentence, tagged_sentence in zip(sentences, self.sentence_nlp_models):
if len(sentence.split()) > 0: # sentence has at least one word in it
breakpoint()
first_word_in_sent = sentence.split()[0].lower().strip()
tags = []
tokenizer = RegexpTokenizer(r"\w+|\$[\d\.]+|\S+")
spans = list(tokenizer.span_tokenize(sentence))

for token in tagged_sentence:
tags.append((token.text, token.pos_, token.tag_, token.dep_, token))

# identify and tag custom words in the story text
ents = []
if custom_dict is not None:
custom_matched_tags = self.__find_custom_word_matches(custom_dict, sentence)
for matched_tag in custom_matched_tags:
ents.append(matched_tag)

# identify and tag POS / NER tokens in the story text
for tag, span in zip(tags, spans):
normalised_token, is_valid_token = self.__is_valid_token(tag)
if is_valid_token:
is_subj, subj_type = self.__is_subject(tag)
if is_subj:
p_score_greater_than_min = self.__update_postagging_metrics(
tag[0].lower().strip(),
selected_prominence_metric,
prominence_score_min,
token,
)
if p_score_greater_than_min:
if self.__is_pronoun(tag):
ents.append(
{"start": span[0], "end": span[1], "label": "SP"}
)
else:
ents.append(
{"start": span[0], "end": span[1], "label": "SNP"}
)
else:
if self.__is_pronoun(tag):
ents.append(
{"start": span[0], "end": span[1], "label": "NSP"}
)
elif self.__is_noun_but_not_pronoun(tag):
ents.append(
{"start": span[0], "end": span[1], "label": "NSNP"}
)

if any(word == first_word_in_sent for word in self.pronouns):
p_score_greater_than_min = self.__update_postagging_metrics(
first_word_in_sent,
selected_prominence_metric,
prominence_score_min,
token,
)

if p_score_greater_than_min:
ents.append(
{"start": 0, "end": len(first_word_in_sent), "label": "SP"}
)

if first_word_in_sent in self.passive_agency_scores:
self.passive_agency_scores[first_word_in_sent] += 1
else:
self.passive_agency_scores[first_word_in_sent] = 1

# if first_word_in_sent not in self.active_agency_scores:
# self.active_agency_scores[first_word_in_sent] = 0

# remove duplicate tags (sometimes one entity can fall under multiple tag categories.
# to avoid duplication, only tag each entity using ONE tag category.
ents = util.remove_duplicate_tagged_entities(ents)
# specify sentences and filtered entities to tag / highlight
doc = {"text": sentence, "ents": ents}

# specify colors for highlighting each entity type
colors = {}
if nouns:
colors["NSP"] = constants.NONSUBJECT_PRONOUN_HIGHLIGHT_COLOR
colors["NSNP"] = constants.NONSUBJECT_NONPRONOUN_HIGHLIGHT_COLOR
if subjs:
colors["SP"] = constants.SUBJECT_PRONOUN_HIGHLIGHT_COLOR
colors["SNP"] = constants.SUBJECT_NONPRONOUN_HIGHLIGHT_COLOR
if custom:
for custom_label in custom_tag_labels:
colors[custom_label] = constants.CUSTOMTAG_HIGHLIGHT_COLOR

self.agent_prominence_score_max = self.__get_max_prominence_score()
# TODO (NOW/PR): re-calculate this based on the dataframe
# collect the above config params together
options = {"ents": pos_tags, "colors": colors}
# give all the params to displacy to generate HTML code of the text with highlighted tags
html += displacy.render(doc, style="ent", options=options, manual=True)

self.html_result = html

# return html
if custom:
return util.remove_span_tags_except_custom(html)
else:
return util.remove_span_tags(html)


def postag_text_to_table(
self, text, custom, custom_dict
Expand Down

0 comments on commit 4f2f4a9

Please sign in to comment.