-
Notifications
You must be signed in to change notification settings - Fork 0
/
distances.py
79 lines (50 loc) · 1.43 KB
/
distances.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def levenshtein(a, b, mod = 'WER'):
""" Calcul distance de Levenshtein
entrée :
a & b : str ou list
mod : 'CER' ou 'WER'
sortie : nombre d'erreur et le taux d'erreur.
"""
if mod == 'CER':
a = a.split()
b = b.split()
if len(a) < len(b):
return levenshtein(b, a)
# len(a) >= len(b)
if len(b) == 0:
return len(a)
previous_row = range(len(b) + 1)
for i, c1 in enumerate(a):
current_row = [i + 1]
for j, c2 in enumerate(b):
insertions = previous_row[j + 1] + 1
suppressions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, suppressions, substitutions))
previous_row = current_row
error = previous_row[-1]
error_rate = (error / max(len(a),len(b))*100)
return error, error_rate
def jaccard(a, b, coef=1):
""" Similarité de Jaccard
entrée : str ou list
sortie : float
"""
return dice(a, b, coef)
def dice(a, b, coef=2):
""" Similarité de Jaccard avec un coef de similarité de Dice
entrée : str ou list
sortie : float
"""
if type(a) == str and type(b) == str:
a, b = list(a), list(b)
if type(a) == list and type(b) == list:
intersection = len(list(set(a).intersection(b)))
union = (len(a) + len(b)) - intersection
return float(coef*intersection) / union
return False
if __name__ == '__main__':
t1 = 'tes yeux sont verts'
t2 = 'a quoi sert des verres'
print(levenshtein(t1,t2))
print(jaccard(t1,t2))