forked from ShivamSarodia/ShivyC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
144 lines (106 loc) · 4.57 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""Utilities for the parser."""
from shivyc.errors import CompilerError, Range
# This is a little bit messy, but worth the repetition it saves. In the
# parser.py file, the main parse function sets this global variable to the
# list of tokens. Then, all functions in the parser can reference this
# variable rather than passing around the tokens list everywhere.
tokens = None
class ParserError(CompilerError):
"""Class representing parser errors.
amount_parsed (int) - Number of tokens successfully parsed before this
error was encountered. This value is used by the Parser to determine which
error corresponds to the most successful parse.
"""
# Options for the message_type constructor field.
#
# AT generates a message like "expected semicolon at '}'", GOT generates a
# message like "expected semicolon, got '}'", and AFTER generates a message
# like "expected semicolon after '15'" (if possible).
#
# As a very general guide, use AT when a token should be removed, use AFTER
# when a token should be to be inserted (esp. because of what came before),
# and GOT when a token should be changed.
AT = 1
GOT = 2
AFTER = 3
def __init__(self, message, index, tokens, message_type):
"""Initialize a ParserError from the given arguments.
message (str) - Base message to put in the error.
tokens (List[Token]) - List of tokens.
index (int) - Index of the offending token.
message_type (int) - One of self.AT, self.GOT, or self.AFTER.
Example:
ParserError("unexpected semicolon", 10, [...], self.AT)
-> CompilerError("unexpected semicolon at ';'", ..., ...)
-> "main.c:10: unexpected semicolon at ';'"
"""
self.amount_parsed = index
if len(tokens) == 0:
super().__init__(f"{message} at beginning of source")
return
# If the index is too big, we're always using the AFTER form
if index >= len(tokens):
index = len(tokens)
message_type = self.AFTER
# If the index is too small, we should not use the AFTER form
elif index <= 0:
index = 0
if message_type == self.AFTER:
message_type = self.GOT
if message_type == self.AT:
super().__init__(f"{message} at '{tokens[index]}'",
tokens[index].r)
elif message_type == self.GOT:
super().__init__(f"{message}, got '{tokens[index]}'",
tokens[index].r)
elif message_type == self.AFTER:
if tokens[index - 1].r:
new_range = Range(tokens[index - 1].r.end + 1)
else:
new_range = None
super().__init__(
f"{message} after '{tokens[index - 1]}'", new_range)
def raise_error(err, index, error_type):
"""Raise a parser error."""
global tokens
raise ParserError(err, index, tokens, error_type)
# Used to store the best error found in the parsing phase.
best_error = None
def log_error(error):
"""Log the error in the parser to be used for error reporting.
The value of error.amount_parsed is used to determine the amount
successfully parsed before encountering the error.
error (ParserError) - Error encountered.
"""
global best_error
if not best_error or error.amount_parsed >= best_error.amount_parsed:
best_error = error
def token_is(index, kind):
"""Return true iff the next token is of the given kind."""
global tokens
return len(tokens) > index and tokens[index].kind == kind
def match_token(index, kind, message_type, message=None):
"""Raise ParserError if tokens[index] is not of the expected kind.
If tokens[index] is of the expected kind, returns index + 1.
Otherwise, raises a ParserError with the given message and
message_type.
"""
global tokens
if not message:
message = f"expected '{kind.text_repr}'"
if token_is(index, kind):
return index + 1
else:
raise ParserError(message, index, tokens, message_type)
def add_range(parse_func):
"""Return a decorated function that tags the produced node with a range.
Accepts a parse_* function, and returns a version of the function where
the returned node has its range attribute set
"""
global tokens
def parse_with_range(index):
start_index = index
node, end_index = parse_func(index)
node.r = tokens[start_index].r + tokens[end_index - 1].r
return node, end_index
return parse_with_range