-
Notifications
You must be signed in to change notification settings - Fork 0
/
phonoEditDistanceBETWEENsubjects.py
63 lines (48 loc) · 1.47 KB
/
phonoEditDistanceBETWEENsubjects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from corpustools.corpus import io
from corpustools.symbolsim import phono_edit_distance
import numpy
import itertools
#compares the phonological similarity of two corpuses
#replace 'corpusA.csv' and 'corpusB.csv' with the corpuses (corpora?) you wish to compare.
myCorpus = io.csv.load_corpus_csv(
"corpusA",
"corpusA.csv",
",",
".",
annotation_types=None,
feature_system_path=None,
stop_check=None,
call_back=None)
everythingElse = io.csv.load_corpus_csv(
"corpusB",
"corpusB.csv",
",",
".",
annotation_types=None,
feature_system_path=None,
stop_check=None,
call_back=None)
print ("loaded? Loaded.",myCorpus)
io.binary.download_binary("ipa2hayes", "/matrix", call_back=None)
ipa2hayes = io.binary.load_binary("/matrix")
io.binary.save_binary(ipa2hayes, "/matrix")
print("did it load?",ipa2hayes.features)
giantPhonoSimList = []
for word in myCorpus.wordlist:
for compareWord in everythingElse.wordlist:
phonoEditDistance = phono_edit_distance.phono_edit_distance(
myCorpus.wordlist.get(word),
everythingElse.wordlist.get(compareWord),
"transcription",
io.binary.load_binary("/matrix")
)
print("comparing giant phono sim list!!: ",
myCorpus.wordlist.get(word).transcription,
" to: ",
everythingElse.wordlist.get(compareWord).transcription,
": ",
phonoEditDistance
)
giantPhonoSimList.append(phonoEditDistance)
print("giant phonoSimList: ",giantPhonoSimList)
print("mean result: ", numpy.mean(giantPhonoSimList))