-
Notifications
You must be signed in to change notification settings - Fork 0
/
Parse.hs
335 lines (291 loc) · 11.1 KB
/
Parse.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
-- todo inversable parser techniques could simplify this
-- todo hide Lit to enforce whitespace always consumed
-- todo use monad for parseInt, etc
-- convention is parse consumes up until next non ignorable code
module Parse(
staticIntParser,
intParser,
strParser,
chrParser,
parseDataExpr,
parseCountTuple,
cp,
tildaOp,
onlyCheckMatch,
match,
parseError,
parseLitWarning,
nextOffset,
consumeWhitespace,
litDigit,
empty,
fromByte,
toByte,
parse1Nibble,
errorUnderLine,
onlyCheckMatchIdentifier,
isIdentifierChar,
getLitParseCoordinates) where
import Expr
import Types
import Hs
import Header (at,fromBase,toBase)
import Data.Char
import Numeric (showOct,showHex)
import Data.Maybe (fromMaybe)
import State
import Text.ParserCombinators.ReadP (gather, readP_to_S)
import Text.Read.Lex as Lex (readDecP, readHexP, lex, Lexeme(String), Lexeme(Char))
import Data.List
toByte :: [Int] -> Char
toByte (a:b:_)=chr $ 16 * a + b
fromByte b=[ord b `div` 16, ord b `mod` 16]
sLit :: String -> String -> Int -> Code
sLit f s cp = consumeWhitespace $ Lit f s cp
-- use as needle in match to match int digit
litDigit = ""
tildaOp = ([0],["~"])
parseInt (Nib (0:s) cp) = case parseNibInt s 0 (cp+1) of
(6,rest) -> (-10,rest)
(9,rest) -> (-7,rest)
(n,rest) -> (-1-n,rest)
parseInt (Nib s cp) = case parseNibInt s 0 cp of
(7,rest) -> (10,rest)
(10,rest) -> (7,rest)
(n,rest) -> (n,rest)
parseInt (Lit f ('0':s) cp) = (0, sLit f s (cp+1))
parseInt (Lit f ('-':s) cp) = let (n, rest) = parseInt $ Lit f s (cp+1) in
(-n, rest)
-- Thanks Jon Purdy for the readP info!
parseInt (Lit f s cp) = case readP_to_S (gather readDecP) s of
[((used, n), rest)] -> (n, sLit f rest (cp+length used))
_ -> error $ "unparsable int: " ++ s
parseInt16 (Lit f s cp) = case readP_to_S (gather readHexP) s of
[((used, n), rest)] -> (n, sLit f rest (cp+length used))
_ -> error $ "unparsable int: " ++ s
parseNibInt [] _ _ = error "unterminated binary number" -- todo make last digit base 16, but this is non trivial due to padding odd nibble lengths into bytes
parseNibInt(f:s) a cp
| f>=8 = (c,Nib s (cp+1))
| otherwise = parseNibInt s c (cp+1)
where c=a*8+toInteger f`mod`8
intToNib :: Integer -> [Int]
intToNib 7 = [1,10] -- 7 is represented by 10
intToNib 10 = [15] -- 10 is represented by 7
intToNib (-7) = [0,1,9]
intToNib (-10) = [0,14]
intToNib n | n<0 = 0:intToNibH (-1 - n)
intToNib n = intToNibH n
intToNibH n = init digits ++ [last digits + 8]
where digits = map digitToInt $ showOct n ""
parseStr :: Code -> ([String], Code)
parseStr(Nib [] cp) = error "unterminated binary string" -- todo make this string the new default separator instead of " ", but this is also non trivial due to padding odd nibble lengths into bytes
parseStr(Nib (a:s) cp)
| a8==0 = cont '\n' 1
| a8==1 = cont ' ' 1
| c==32 = if a<8
then ([""], Nib (tail s) (cp+2))
else let (r2,code2) = parseStr $ Nib (drop 1 s) (cp+2)
in ("":r2,code2)
| c==127 = cont (toByte $ tail s) 4
| otherwise = cont (chr c) 2
where
a8=a`mod`8
c=a8*16+if null s
then error "unterminated binary string with partial character"
else head s
cont ch used = if a>=8
then ([[ch]], after)
else let (rest:rest2, rs)=parseStr after in ((ch:rest):rest2, rs)
where after = Nib (drop (used-1) s) (cp+used)
parseStr (Lit f s cp) =
case readP_to_S (gather Lex.lex) s of
[((used, Lex.String str), rest)] ->
let after = sLit f rest (cp+length used) in
case rest of
('"':_) ->
let (strRest, after2) = parseStr after
in (str:strRest, after2)
_ -> ([str], after)
_ -> error $ "unparsable string: " ++ s
strToNib :: [String] -> [Int]
strToNib [""] = [2,0]
strToNib [s] = (concatMap (\(c,last)->let oc = ord c in case c of
'\n' -> [last]
' ' -> [1+last]
c | oc > 126 || oc < 32 -> [last+7, 15, div oc 16, mod oc 16]
_ -> [last+div oc 16, mod oc 16]
) (zip s $ take (length s - 1) (repeat 0) ++ [8]))
strToNib ("":rest) = [10,0] ++ strToNib rest
strToNib (s:rest) =
let nib1 = strToNib [s++" "]
in init nib1 ++ [10,0] ++ strToNib rest
specialChars = "\n /.,`a@A0"
-- This would be good with custom ascii table
--specialChars = "\n -.,azAZ0"
specialChars127n = '\127':tail specialChars
swap127 '\n' = '\127'
swap127 '\127' = '\n'
swap127 c = c
-- 10 special chars
-- 10 first op nibbles to denote them, otherwise normal 2 nibble char
-- if you encoded a normal char that is a special char, it denotes beginning of nonprintable char (of which there are 256-96)
parseChr :: Code -> (Char, Code)
parseChr(Nib [] cp) = error "unterminated char"
parseChr(Nib (c:rest) cp)
| c < 2 = (specialChars !! c, Nib rest (cp+1))
| c > 7 = (specialChars !! (c-6), Nib rest (cp+1))
parseChr(Nib [_] cp) = error "unterminated char"
parseChr(Nib (a:b:rest) cp) =
case elemIndex c specialChars127n of
Just i -> case uncons rest of
Nothing -> error "unterminated binary char"
Just (b2,rest2) -> (swap127 $ toByte [if i < 2 then i else i+6,b2], Nib rest2 (cp+3))
Nothing -> (c, Nib rest (cp+2))
where c = toByte [a,b]
parseChr (Lit f s cp) = case readP_to_S (gather Lex.lex) s of
[((used, Lex.Char char), rest)] -> (char, sLit f rest (cp+length used))
_ -> error $ "unparsable char: " ++ s
chrToNib :: Char -> [Int]
chrToNib c =
case elemIndex c specialChars of
Just i -> [if i < 2 then i else i + 6]
Nothing -> if c >= chr 32 && c < chr 127
then fromByte c
else let [a,b] = fromByte (swap127 c)
in fromByte (specialChars127n!!(if a<2 then a else a-6))++[b]
-- Consume rest of program as an integer (efficient binary packing)
parseData :: Code -> Integer
parseData l@(Lit _ _ _) = if empty rest then n else error $ "program must be empty after storing ~ integer data. See "++webpage++"/tutorial_minutiae.html#data " where
(n,rest)=parseInt16 l
parseData (Nib b _) = fromBase 16 (map fromIntegral $ padSafeDat b)
parseCountTuple :: ParseState Int
parseCountTuple = do
matched <- match tildaOp
if matched then fmap (+1) parseCountTuple else return 0
cp (Lit _ _ cp) = cp
cp (Nib _ cp) = cp
empty (Nib [op] _) | op == uselessOp = True
empty (Nib [] _) = True
empty (Lit _ [] _) = True
empty _ = False
nextOffset (Nib (c:s) cp) = Nib s (cp+1)
nextOffset (Lit f (c:s) cp) = Lit f s (cp+1)
onlyCheckMatch :: Code -> ([Int],[String]) -> Maybe Code
onlyCheckMatch (Nib s cp) (needle, _) = if isPrefixOf needle s
then Just $ Nib (drop (length needle) s) (cp+length needle)
else Nothing
onlyCheckMatch lit@(Lit _ _ _) (_, []) = Just lit
onlyCheckMatch lit@(Lit _ _ _) (_, needle:s) = match1Lit lit needle >>= flip onlyCheckMatch (undefined, s)
match1Lit lit@(Lit f s cp) needle
| null s = Nothing
| needle == litDigit =
case s of
(c:_) | isDigit c -> Just lit
('-':c:_) | isDigit c -> Just lit
otherwise -> Nothing
| needle == "\"" && '"' == head s = Just lit
| needle == "\'" && '\'' == head s = Just lit
| isPrefixOf needle s = Just $ sLit f (drop (length needle) s) (cp+length needle)
| otherwise = Nothing
match :: ([Int],[String]) -> ParseState Bool
match (nib,lit) = do
code <- gets pdCode
case onlyCheckMatch code (nib,lit) of
Just nextCode -> do
modify $ \s -> s { pdCode = nextCode }
appendRep (nib,concat lit)
return True
Nothing -> return False
onlyCheckMatchIdentifier :: Code -> Maybe (String, Code)
onlyCheckMatchIdentifier rawCode@(Lit f code cp) =
if not (empty rawCode)
&& isIdentifierChar (head code)
&& not (isDigit $ head code) then
let (name,rest) = span isIdentifierChar code in
Just $ (name, sLit f rest (cp+length name))
else Nothing
onlyCheckMatchIdentifier _ = Nothing
-- also includes digits since this is a superset
isIdentifierChar :: Char -> Bool
isIdentifierChar c | isAlphaNum c = True
isIdentifierChar '_' = True
isIdentifierChar c = False
generateErrorMsg :: String -> ParseState String
generateErrorMsg msg = do
s <- gets pdCode
return $ msg ++ "\n" ++ case s of
Lit f s cp -> literateError f cp
Nib s cp -> "at nibble #" ++ show cp ++ "\nnext nibble is" ++ fromMaybe "" (at s 0>>=Just . show)
parseLitWarning :: String -> ParseState ()
parseLitWarning msg = generateErrorMsg msg >>= addLitWarning
parseError :: String -> ParseState a
parseError msg = do
generateErrorMsg msg >>= errorWithoutStackTrace
errorUnderLine line charno = let arrows = replicate (charno-1) ' ' in
"\n" ++ arrows ++ "v"
++ "\n" ++ line
++ "\n" ++ arrows ++ "^"
literateError s cp =
"at line: " ++ show lineno
++ ", char: " ++ show charno
++ errorUnderLine line charno
where
(lineno,charno) = getLitParseCoordinates s cp
line = lines s !! (lineno-1)
getLitParseCoordinates s cp = (lineno,charno)
where
prev = take (cp+1) s
lineno = length $ lines prev
line = lines s !! (lineno-1)
charno = 1+(fromMaybe (cp+1) $ elemIndex '\n' $ reverse prev) - 1
consumeWhitespace :: Code -> Code
consumeWhitespace n@(Nib _ _) = n
consumeWhitespace l@(Lit _ [] _) = l
consumeWhitespace (Lit f (c:s) cp)
| c=='#' = sLit f rest (cp+1+length comment)
| isSpace c = sLit f s (cp+1)
| otherwise = Lit f (c:s) cp
where (comment, rest) = break (=='\n') s
intParser :: ParseState (VT, String)
intParser = do
~(_, hs) <- staticIntParser
return (VInt, hs)
staticIntParser :: ParseState (VT, String)
staticIntParser = do
~(n, rest) <- gets $ parseInt . pdCode
modify $ \st -> st { pdCode=rest }
appendRep (intToNib n," "++show n) -- only need to prepend " " if last was digit, but that would be expensive to check with dlist data structure
return (StaticInt n, flatHs $ i n)
strParser :: ParseState (VT,String)
strParser = do
~(ss, rest) <- gets $ parseStr . pdCode
modify $ \st -> st { pdCode=rest }
appendRep (strToNib ss, (tail $ concat $ map show ss) ++ " ") -- add an extra space so that expand of two strings in a row doesn't become a list of strings
-- Set element type rather than total type so that lazy evaluation can deduce either way is not a num
let (et,val) = case ss of
[s] -> (VChr, "sToA " ++ show s)
_ -> (vstr, "map sToA " ++ show ss)
return (VList [et], val)
chrParser :: ParseState (VT,String)
chrParser = do
~(s, rest) <- gets $ parseChr . pdCode
modify $ \st -> st { pdCode=rest }
appendRep (chrToNib s,tail $ show s)
return (VChr, "myOrd " ++ show s)
parseDataExpr :: ParseState Integer
parseDataExpr = do
dat <- gets $ parseData . pdCode
let datNibs = map fromIntegral (toBase 16 dat)
progNibs <- gets pdNib
appendRep (padSafeDat datNibs," "++showHex dat "")
return dat
-- reverse (so leading digit is last) and swap useless op with 0 so that when it is padded to even nibbles the value isn't changed
padSafeDat = reverse . map (\e ->
if e == uselessOp then 0
else if e == 0 then uselessOp
else e)
parse1Nibble :: String -> [(Int, (Char, ParseState a))] -> State ParseData a
parse1Nibble name [] = parseError $ "symbol not found in " ++ name
parse1Nibble name ((nib,(lit,impl)):rest) = do
match <- match ([nib],[[lit]])
if match then impl else parse1Nibble name rest