-
Notifications
You must be signed in to change notification settings - Fork 2
/
oldannotationhelper.py
88 lines (77 loc) · 3.22 KB
/
oldannotationhelper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import vozbase
import settings
import voz
import logging
import os
import util
from bs4 import BeautifulSoup
import formatter
from nltk.tree import Tree,ParentedTree
import csv
import collections
import pickle
import narrativehelper
import re
import verbmanager
logger = logging.getLogger(__name__)
def add_functions(last_func,key_to_role,document_id,verbs,ENTITY_ROLE6,taxonomy_labels,narrative,offset,length,role_counts):
for function in last_func.split():
if function.startswith('?'):
location = narrativehelper.NarrativeFunctionLocation('IMPLICIT',[])
function = function[1:]
else:
location = narrativehelper.NarrativeFunctionLocation('ACTUAL',[])
people = [i.split(')')[0].split('(')[1].split(',') for i in verbs]
people = set(util.flatten(people))
for character in people:
role = key_to_role.get((document_id,character),['NA']*8)[ENTITY_ROLE6]
role_counts[taxonomy_labels.index(role)]+=1
verbs_ = [i.split('(')[0] for i in verbs]
narrative.add_function(0,offset,length,function,[location])
narrative.function_list[-1].role_counts = role_counts
narrative.function_list[-1]._verbs = [verbmanager.Verb(0,0,0,voz.Token(0,0,0,'VB',i,i),None,{}) for i in verbs_]
def load_old_annotations_into_document(document_id):
text = ''
for line in open('stories/old-annotations/GoogleDocsGroundTruth.csv').readlines():
line = line.split('\t')
if line[0] and int(line[0])==document_id:
text += line[7].strip()+' \n'
properties = {}
properties = dict({'source':'load_old_annotations_into_document'}, **properties)
document = voz.Document(text,[],properties,document_id)
narrative = narrativehelper.Narrative(document)
last_func = None
offset = 0
text = ''
verbs = []
import entitymanager
taxonomy_labels = entitymanager.taxonomy_dict[entitymanager.TaxonomyContainer.TAXONOMY_CHARACTER_6ROLES].labels
role_counts = [0 for _ in taxonomy_labels]
key_to_role = csv.reader(open(settings.STY_FILE_PATH+settings.STY_KEY_TO_ROLE,'rU'))
ENTITY_TYPE = 3
ENTITY_ROLE3 = 4
ENTITY_ROLE6 = 6
ENTITY_ROLES = 7
ENTITY_SYMBOL = 2
ENTITY_GROUP = 8
key_to_role = dict([((int(j[0]),j[ENTITY_GROUP]),j) for j in key_to_role if j[0].isdigit()])
for line in open('stories/old-annotations/GoogleDocsGroundTruth.csv').readlines():
line = line.split('\t')
if line[0] and int(line[0])==document_id:
functions = line[5]
text += line[7].strip()+'\n'
verbs.extend(line[6].split())
if last_func is None:
last_func = functions
elif last_func == functions:
pass
else:
length = len(text) - offset
offset = len(text)
add_functions(last_func,key_to_role,document_id,verbs,ENTITY_ROLE6,taxonomy_labels,narrative,offset,length,role_counts)
last_func = functions
verbs = []
add_functions(last_func,key_to_role,document_id,verbs,ENTITY_ROLE6,taxonomy_labels,narrative,offset,length,role_counts)
narrative.document = document
document.narrative = narrative
return document