-
Notifications
You must be signed in to change notification settings - Fork 3
/
parser.py
102 lines (85 loc) · 3.3 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/python
# coding=utf-8
# -*- encoding: utf-8 -*-
from chart import *
from grammar import *
class Parser:
GAMMA_SYMBOL = 'GAMMA'
def __init__(self, grammar, sentence, debug=False):
'''Initialize parser with grammar and sentence'''
self.grammar = grammar
self.sentence = sentence
self.debug = debug
# prepare a chart for every input word
self.charts = [Chart([]) for i in range(len(self)+1)]
self.complete_parses = []
def __len__(self):
'''Length of input sentence'''
return len(self.sentence)
def init_first_chart(self):
'''Add initial Gamma rule to first chart'''
row = ChartRow(Rule(Parser.GAMMA_SYMBOL, ['S']), 0, 0)
self.charts[0].add_row(row)
def prescan(self, chart, position):
'''Scan current word in sentence, and add appropriate
grammar categories to current chart'''
word = self.sentence[position-1]
if word:
rules = [Rule(tag, [word.word]) for tag in word.tags]
for rule in rules:
chart.add_row(ChartRow(rule, 1, position-1))
def predict(self, chart, position):
'''Predict next parse by looking up grammar rules
for pending categories in current chart'''
for row in chart.rows:
next_cat = row.next_category()
rules = self.grammar[next_cat]
if rules:
for rule in rules:
new = ChartRow(rule, 0, position)
chart.add_row(new)
def complete(self, chart, position):
'''Complete a rule that was done parsing, and
promote previously pending rules'''
for row in chart.rows:
if row.is_complete():
completed = row.rule.lhs
for r in self.charts[row.start].rows:
if completed == r.next_category():
new = ChartRow(r.rule, r.dot+1, r.start, r, row)
chart.add_row(new)
def parse(self):
'''Main Earley's Parser loop'''
self.init_first_chart()
i = 0
# we go word by word
while i < len(self.charts):
chart = self.charts[i]
self.prescan(chart, i) # scan current input
# predict & complete loop
# rinse & repeat until chart stops changing
length = len(chart)
old_length = -1
while old_length != length:
self.predict(chart, i)
self.complete(chart, i)
old_length = length
length = len(chart)
i+= 1
# finally, print charts for debuggers
if self.debug:
print "Parsing charts:"
for i in range(len(self.charts)):
print "-----------{0}-------------".format(i)
print self.charts[i]
print "-------------------------".format(i)
def is_valid_sentence(self):
'''Returns true if sentence has a complete parse tree'''
res = False
for row in self.charts[-1].rows:
if row.start == 0:
if row.rule.lhs == self.GAMMA_SYMBOL:
if row.is_complete():
self.complete_parses.append(row)
res = True
return res