-
Notifications
You must be signed in to change notification settings - Fork 4
/
matching.py
99 lines (56 loc) · 1.62 KB
/
matching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#! /usr/bin/env python
import nltk
import string
import question_analysis
import preprocessing
import MicrosoftNgram
import itertools
from nltk.corpus import wordnet as wn
list_tuples = preprocessing.actual_tuples
list_tuples.sort()
keywords = question_analysis.keywords_analysis
tuples_needed = []
concept = []
for i in range(len(keywords)):
for j in range(len(list_tuples)):
if( list_tuples[j].startswith(keywords[i]+",")):
tuples_needed.append(list_tuples[j])
#print tuples_needed
concept2 = []
for i in range(len(tuples_needed)):
a = tuples_needed[i].split(",")
b = nltk.word_tokenize(a[1])
for j in range(len(b)):
concept2.append(b[j])
tuples_normalized = []
s = MicrosoftNgram.LookupService()
string_perm = itertools.permutations(keywords)
string = []
for i in string_perm:
string.append(" ".join(i))
results = []
for i in range(len(string)):
for t in s.Generate(string[i], maxgen=5): results.append(t)
answers = []
for i in range(len(results)):
answers.append(results[i][0])
concept2 = list(set(concept2))
list_answers = list( set(concept2) & set(answers) )
print "the most probable answers for your question"
print list_answers
"""
# Needs some editing to filter some answers from the list_answers to get the exact answer(s)
synsets = []
for i in keywords:
synsets.append(wn.synsets(i))
wordnet_words = []
for i in range(len(synsets)):
myString = str(synsets[i])
wordnet_words.append(myString[myString.find("'")+1:myString.find("')")])
lemma_words = []
for i in wordnet_words:
w = wn.synset(i).hyponyms()
for j in w:
lemma_words.append(j.lemma_names)
print lemma_words
"""