-
Notifications
You must be signed in to change notification settings - Fork 0
/
compute_ica.py
50 lines (43 loc) · 1.99 KB
/
compute_ica.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import argparse
import xml.etree.ElementTree as ET
#from nltk.metrics import masi_distance, jaccard_distance
def main():
parser = argparse.ArgumentParser(description='Inter-coder Agreement Calculator')
parser.add_argument('-a1', '--anno_1', type=str, required=True, help='Intertext alignment directory for annotator 1.')
parser.add_argument('-a2', '--anno_2', type=str, required=True, help='Intertext alignment directory for annotator 2.')
args = parser.parse_args()
anno_1_files = [file for file in sorted(os.listdir(args.anno_1)) if len(file.split('.')) == 4]
anno_2_files = [file for file in sorted(os.listdir(args.anno_2)) if len(file.split('.')) == 4]
intersections = 0
unions = 0
for anno_1_file, anno_2_file in zip(anno_1_files, anno_2_files):
alignments_1 = get_alignments(os.path.join(args.anno_1, anno_1_file))
alignments_2 = get_alignments(os.path.join(args.anno_2, anno_2_file))
len_intersection = len(alignments_1.intersection(alignments_2))
len_union = len(alignments_1.union(alignments_2))
#print("Len_intersection: {}".format(len_intersection))
#print("Len_union: {}".format(len_union))
#masi = masi_distance(alignments_1, alignments_2)
#print("MASI: {}".format(masi))
intersections += len_intersection
unions += len_union
jac = intersections / unions
print("Jaccard Index: {:.3f}".format(jac))
def get_alignments(xml_file):
doc = ET.parse(xml_file)
links = []
for link in doc.iterfind('link'):
tgt_link, src_link = link.get('xtargets').split(';')
src_bead = parse_link(src_link)
tgt_bead = parse_link(tgt_link)
links.append((src_bead, tgt_bead))
alignments = set([(tuple(x), tuple(y)) for x, y in links])
return alignments
def parse_link(link):
bead = []
if len(link) > 0:
bead = [int(item.split(':')[1]) - 1 for item in link.split(' ')]
return bead
if __name__ == '__main__':
main()