aboutsummaryrefslogtreecommitdiff
path: root/snug-clean/src/Snug/Parse.icl
blob: 4347c34fad053524ba3a2cab53b6f9e21c5f2510 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
implementation module Snug.Parse

import StdEnv

import Control.Applicative
import Control.Monad
import Data.Either
import Data.Error
import Data.Functor
import Data.Maybe
import Data.Tuple
import qualified Text
from Text import class Text, instance Text String
import Text.Parsers.Simple.Core
import qualified Text.Parsers.Simple.Core

import Snug.Syntax

// override infix priority to use easily with <$>
(<<|>) infixr 3 :: !(Parser t a) (Parser t a) -> Parser t a
(<<|>) left right = left 'Text.Parsers.Simple.Core'. <<|> right

parseSnug :: ![Char] -> MaybeError String [Definition]
parseSnug cs = case parse (many definition`) (filterComments (lex cs)) of
	Left errors -> Error ('Text'.join "; " errors)
	Right defs -> Ok defs
where
	filterComments tks = [t \\ t <- tks | not (t=:(TComment _))]

definition` :: Parser Token Definition
definition` = parenthesized def
where
	def
		= liftM3 DataDef
			(pToken (TIdent "data") *> typeIdent)
			(fromMaybe [] <$> optional (simpleList typeVarIdent))
			(list simpleConstructorDef constructorDef)
		<<|> liftM2 TypeDef
			(pToken (TIdent "type") *> typeIdent)
			simpleOrParenthesizedType
		<<|> liftM4 FunDef
			(pToken (TIdent "fun") *> symbolIdent)
			(simpleList (parenthesized typedArgument))
			(pToken TColon *> type)
			(pToken TColon *> expression)
		<<|> liftM4 TestDef
			(pToken (TIdent "test") *> string)
			(pToken TColon *> type)
			(pToken TColon *> simpleOrParenthesizedExpression)
			string

	simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure [])
	constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType)

	typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type)

complexType :: Parser Token Type
complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType)

simpleType :: Parser Token Type
simpleType
	= Type <$> typeIdent
	<<|> TyVar <$> typeVarIdent

type :: Parser Token Type
type = complexType <<|> simpleType

simpleOrParenthesizedType :: Parser Token Type
simpleOrParenthesizedType = simpleType <<|> parenthesized complexType

complexExpression :: Parser Token Expression
complexExpression
	= liftM2 Case
		(pToken (TIdent "case") *> simpleOrParenthesizedExpression)
		(nonEmpty (simpleList (parenthesized caseAlternative)))
	<<|> liftM2 (foldl ExpApp)
		simpleExpression
		(some simpleOrParenthesizedExpression)
where
	caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression)

	complexPattern
		= liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern)
	simplePattern
		= pToken TUnderscore $> Wildcard
		<<|> BasicValuePattern <$> basicValue
		<<|> IdentPattern <$> symbolIdent
		<<|> liftM2 ConstructorPattern constructorIdent (pure [])
	simpleOrParenthesizedPattern = simplePattern <<|> parenthesized complexPattern
	pattern = complexPattern <<|> simplePattern

simpleExpression :: Parser Token Expression
simpleExpression
	= BasicValue <$> basicValue
	<<|> Symbol <$> symbolIdent
	<<|> Constructor <$> constructorIdent

simpleOrParenthesizedExpression :: Parser Token Expression
simpleOrParenthesizedExpression = simpleExpression <<|> parenthesized expression

expression :: Parser Token Expression
expression = complexExpression <<|> simpleExpression

basicValue :: Parser Token BasicValue
basicValue
	= (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _))
	<<|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _))

string :: Parser Token String
string = (\(TString s) -> s) <$> pSatisfy (\t -> t=:(TString _))

typeIdent :: Parser Token TypeIdent
typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent

constructorIdent :: Parser Token ConstructorIdent
constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent

typeVarIdent :: Parser Token TypeVarIdent
typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent

symbolIdent :: Parser Token SymbolIdent
symbolIdent
	= fromIdent <$> pSatisfy isLowerCaseIdent
	<<|> fromIdent <$> pSatisfy isFunnyIdent

fromIdent (TIdent id) :== id

isUpperCaseIdent :: !Token -> Bool
isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0]
isUpperCaseIdent _ = False

isLowerCaseIdent :: !Token -> Bool
isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0]
isLowerCaseIdent _ = False

isFunnyIdent :: !Token -> Bool
isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0]
isFunnyIdent _ = False

parenthesized :: !(Parser Token a) -> Parser Token a
parenthesized p = pToken TParenOpen *> p <* pToken TParenClose

list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a]
list simpleElem elem = simpleList (simpleElem <<|> parenthesized elem)

simpleList :: !(Parser Token a) -> Parser Token [a]
simpleList simpleElem = parenthesized (many simpleElem)

nonEmpty :: !(Parser Token [a]) -> Parser Token [a]
nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs)

:: Token
	= TParenOpen     //* (
	| TParenClose    //* )

	| TColon         //* :
	| TUnderscore    //* _
	| TArrow         //* ->

	| TIdent !String
	| TInt !Int
	| TChar !Char
	| TString !String

	| TComment !String //* (# ... #)
	| TError !Int !Int !String

instance == Token
where
	(==) TParenOpen y = y=:TParenOpen
	(==) TParenClose y = y=:TParenClose
	(==) TColon y = y=:TColon
	(==) TUnderscore y = y=:TUnderscore
	(==) TArrow y = y=:TArrow
	(==) (TIdent x) (TIdent y) = x == y
	(==) (TIdent _) _ = False
	(==) (TInt x) (TInt y) = x == y
	(==) (TInt _) _ = False
	(==) (TChar x) (TChar y) = x == y
	(==) (TChar _) _ = False
	(==) (TString x) (TString y) = x == y
	(==) (TString _) _ = False
	(==) (TComment x) (TComment y) = x == y
	(==) (TComment _) _ = False
	(==) (TError _ _ _) _ = False

lex :: ![Char] -> [Token]
lex cs = lex` 0 0 cs

lex` :: !Int !Int ![Char] -> [Token]
lex` _ _ []
	= []
lex` line col ['(#':cs]
	= stripComment line (col+2) cs 0 []
where
	stripComment line col ['#)':cs] 0 acc
		= [TComment (toString (reverse acc)) : lex` line (col+2) cs]
	stripComment line col ['(#':cs] n acc
		= stripComment line (col+2) cs (n+1) ['#(':acc]
	stripComment line col ['\r\n':cs] n acc
		= stripComment (line+1) 0 cs n ['\n\r':acc]
	stripComment line col ['\n\r':cs] n acc
		= stripComment (line+1) 0 cs n ['\r\n':acc]
	stripComment line col [c:cs] n acc
		| c=='\n' || c=='\r'
			= stripComment (line+1) 0 cs n [c:acc]
			= stripComment line (col+1) cs n [c:acc]
	stripComment line col [] _ _
		= [TError line col "end of file while scanning comment"]
/* This alternative is for characters that can never be part of identifiers: */
lex` line col [c:cs]
	| isJust mbToken
		= [fromJust mbToken : lex` line (col+1) cs]
where
	mbToken = case c of
		'(' -> ?Just TParenOpen
		')' -> ?Just TParenClose
		_ -> ?None
/* This alternative is for characters that can be part of identifiers consisting of symbols: */
lex` line col [c:cs]
	| isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs)))
		= [fromJust mbToken : lex` line (col+1) cs]
where
	mbToken = case c of
		':' -> ?Just TColon
		_ -> ?None
/* This alternative is for characters that can be part of regular identifiers: */
lex` line col [c:cs]
	| isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs)))
		= [fromJust mbToken : lex` line (col+1) cs]
where
	mbToken = case c of
		'_' -> ?Just TUnderscore
		_ -> ?None
lex` line col ['-','>':cs]
	| isEmpty cs || not (isFunnySymbol (hd cs))
		= [TArrow : lex` line (col+2) cs]

/* Whitespace */
lex` line col ['\r','\n':cs]
	= lex` (line+1) 0 cs
lex` line col ['\n','\r':cs]
	= lex` (line+1) 0 cs
lex` line col ['\n':cs]
	= lex` (line+1) 0 cs
lex` line col ['\r':cs]
	= lex` (line+1) 0 cs
lex` line col [c:cs]
	| isSpace c
		= lex` line (col+1) cs

/* Basic values */
lex` line col cs=:[c:_]
	| isDigit c
		# (i,cs) = span isDigit cs
		= [TInt (toInt (toString i)) : lex` line (col + length i) cs]
lex` line col [c:cs]
	| c == '-' && not (isEmpty cs) && isDigit (hd cs)
		# (i,cs) = span isDigit cs
		= [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs]
lex` line col ['\'',c,'\'':cs] // TODO: escape sequences
	= [TChar c : lex` line (col+3) cs]
lex` line col ['"':cs] // TODO: escape sequences; correctly compute new line/col in case of newlines
	= [TString s : lex` line (col + 2 + size s) (tl rest)]
where
	(chars,rest) = span ((<>) '"') cs
	s = {c \\ c <- chars}

/* Identifiers (must come after basic values for correct lexing of negative
 * integers) */
lex` line col cs=:[c:_]
	| isAlpha c
		# (name,cs) = span isIdentChar cs
		= [TIdent (toString name) : lex` line (col + length name) cs]
	| isFunnySymbol c
		# (name,cs) = span isFunnySymbol cs
		= [TIdent (toString name) : lex` line (col + length name) cs]

/* Unparseable input */
lex` line col cs
	= [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")]

isFunnySymbol :: !Char -> Bool
isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~']

isIdentChar :: !Char -> Bool
isIdentChar c = isAlphanum c || c == '_' || c == '\''