1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
-- vim: et ts=2 sw=2 ai:
{-# LANGUAGE LambdaCase #-}
module SPL.Parse (parse)
where
import Control.Applicative
import Control.Monad
import Data.Functor
import Text.Parsec (sepBy, (<?>))
import qualified Text.Parsec as P
import SPL.Syntax hiding (TInt,TBool,TChar,TArrow,TVar)
import qualified SPL.Syntax
import SPL.Lex
type Parser t = P.Parsec [Token] () t
parse :: [Token] -> Either P.ParseError Program
parse = P.parse spl "Not a valid program" . filter (not . isCommentToken)
check :: String -> (Token -> Maybe a) -> Parser a
check e f = P.tokenPrim show (\p _ _ -> P.incSourceColumn p 1) f <?> e
-- TODO: We increment the source column to get meaningful error messages;
-- the error that is furthest in the parsing process. We need to keep positions
-- in the list of tokens so that we can update this properly.
token :: Token -> Parser Token
token t = check (show t) (\u -> if t == u then Just t else Nothing)
choice :: [Parser a] -> Parser a
choice = P.choice . map P.try
trans :: Token -> a -> Parser a
trans t x = token t $> x
spl :: Parser Program
spl = collect <$> (many toplevel <* P.eof)
where
collect :: [Either Variable Function] -> Program
collect vfs = Program [f | Right f <- vfs] [v | Left v <- vfs]
toplevel :: Parser (Either Variable Function)
toplevel = choice
[ Left <$> var
, Right <$> fun
]
comment :: Parser String
comment = check "comment" $ \case
TSingleComment s -> Just s
TBlockComment s -> Just s
_ -> Nothing
parenthesised :: Parser a -> Parser a
parenthesised = P.between (token TParenOpen) (token TParenClose)
braced :: Parser a -> Parser a
braced = P.between (token TBraceOpen) (token TBraceClose)
bracked :: Parser a -> Parser a
bracked = P.between (token TBrackOpen) (token TBrackClose)
var :: Parser Variable
var = do
t <- Just <$> plainType <|> (token TVar $> Nothing)
id <- ident
token TEquals
val <- expr
token TSemicolon
return $ Variable id t val
fun :: Parser Function
fun = do
id <- ident
args <- parenthesised (ident `sepBy` token TComma)
ftype <- optional (token TColonColon *> funType)
token TBraceOpen
vars <- many (P.try var)
stmt <- statements
token TBraceClose
return $ Function id ftype args vars stmt
ident :: Parser Name
ident = check "identifier" $ \case
TIdent id -> Just id
_ -> Nothing
funType :: Parser Type
funType = do
argtypes <- many plainType
token TArrow
rettype <- plainType
return $ SPL.Syntax.TArrow argtypes rettype
plainType :: Parser Type
plainType = choice
[ trans TIntType SPL.Syntax.TInt
, trans TBoolType SPL.Syntax.TBool
, trans TCharType SPL.Syntax.TChar
, trans TVoidType SPL.Syntax.TVoid
, TList <$> bracked plainType
, parenthesised $ liftM2 TTuple plainType (token TComma *> plainType)
, SPL.Syntax.TVar <$> ident
]
expr :: Parser Expression
expr =
rightAssoc (trans TPipePipe Or) $
rightAssoc (trans TAmpAmp And) $
rightAssoc (trans TEqEq Eq
<|> trans TExclamEq Ne
<|> trans TLtEq Le
<|> trans TGtEq Ge
<|> trans TLt Lt
<|> trans TGt Gt) $
rightAssoc (trans TColon Cons) $
leftAssoc (trans TPlus Add
<|> trans TMinus Sub) $
leftAssoc (trans TAsterisk Mul
<|> trans TSlash Div) $
leftAssoc (trans TPercent Mod) $
fields $
simpleExpr
where
rightAssoc :: Parser Op2 -> Parser Expression -> Parser Expression
rightAssoc op e = do
e1 <- e
rest <- optional (liftM2 (,) op (rightAssoc op e))
return $ case rest of
Nothing -> e1
Just (op,e2) -> Op2 e1 op e2
leftAssoc :: Parser Op2 -> Parser Expression -> Parser Expression
leftAssoc op e = do
e1 <- e
rest <- many (liftM2 (,) op e)
foldM (\e1 (op,e2) -> pure $ Op2 e1 op e2) e1 rest
fields :: Parser Expression -> Parser Expression
fields p = liftM2 (foldl Field) p (many field)
simpleExpr = choice
[ liftM2 Op1 (trans TExclam Not <|> trans TMinus Neg) simpleExpr
, literal
, tuple
, funcall
, var
, parenthesised expr
]
literal :: Parser Expression
literal = fmap Literal $ nil <|> (check "literal" $ \case
TInt i -> Just (LInt i)
TBool b -> Just (LBool b)
TChar c -> Just (LChar c)
_ -> Nothing)
where nil = pure LNil <* token TBrackOpen <* token TBrackClose
tuple = parenthesised (liftM2 Tuple expr (token TComma *> expr))
funcall = liftM2 FunCall ident (parenthesised $ expr `sepBy` token TComma)
var = Var <$> ident
statements :: Parser Statement
statements = (foldr1 Seq <$> some statement <|> pure Nop) <?> "statement"
-- NOTE: we here assume that if/while blocks need not be braced, as in the
-- example function `abs`. The alternative is using `braced statements`.
statement :: Parser Statement
statement = choice
[ liftM3 If
(token TIf *> parenthesised expr)
statement
(optional (token TElse *> statement))
, liftM2 While
(token TWhile *> parenthesised expr)
statement
, Eval <$> (expr <* token TSemicolon)
, liftM3 Assign
ident
(many field)
(token TEquals *> expr <* token TSemicolon)
, Return <$> (token TReturn *> optional expr <* token TSemicolon)
, braced statements
]
field :: Parser Field
field = token TDot *> check "field" (\case
TIdent "hd" -> Just Hd
TIdent "tl" -> Just Tl
TIdent "fst" -> Just Fst
TIdent "snd" -> Just Snd
_ -> Nothing)
|