Add Clean parser for snug

author: Camil Staps 2023-01-23 22:36:16 +0100
committer: Camil Staps 2023-01-23 22:36:16 +0100
commit: 301a73c63b3fe5e8306e9e8d213269a720b7a089 (patch)
tree: 0593001bc36302e0c759d85e98fcd78dfe87fcef /snug-clean/src/Snug/Parse.icl
parent: Add example of small functional language and run time system (diff)
1 files changed, 243 insertions, 0 deletions
diff --git a/snug-clean/src/Snug/Parse.icl b/snug-clean/src/Snug/Parse.icl
new file mode 100644
index 0000000..63a9fda
--- /dev/null
+++ b/snug-clean/src/Snug/Parse.icl
@@ -0,0 +1,243 @@
+implementation module Snug.Parse
+
+import StdEnv
+
+import Control.Applicative
+import Control.Monad
+import Data.Either
+import Data.Error
+import Data.Functor
+import Data.Maybe
+import Data.Tuple
+import qualified Text
+from Text import class Text, instance Text String
+import Text.Parsers.Simple.Core
+
+import Snug.Syntax
+
+parseSnug :: ![Char] -> MaybeError String [Definition]
+parseSnug cs = case parse (many definition`) (lex cs) of
+	Left errors -> Error ('Text'.join "; " errors)
+	Right defs -> Ok defs
+
+definition` :: Parser Token Definition
+definition` = parenthesized def
+where
+	def
+		= liftM3 DataDef
+			(pToken (TIdent "data") *> typeIdent)
+			(fromMaybe [] <$> optional (simpleList typeVarIdent))
+			(list simpleConstructorDef constructorDef)
+		<|> liftM2 TypeDef
+			(pToken (TIdent "type") *> typeIdent)
+			simpleOrParenthesizedType
+		<|> liftM4 FunDef
+			(pToken (TIdent "fun") *> symbolIdent)
+			(simpleList (parenthesized typedArgument))
+			(pToken TColon *> type)
+			simpleOrParenthesizedExpression
+
+	simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure [])
+	constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType)
+
+	typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type)
+
+complexType :: Parser Token Type
+complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType)
+
+simpleType :: Parser Token Type
+simpleType
+	= Type <$> typeIdent
+	<|> TyVar <$> typeVarIdent
+
+type :: Parser Token Type
+type = complexType <|> simpleType
+
+simpleOrParenthesizedType :: Parser Token Type
+simpleOrParenthesizedType = simpleType <|> parenthesized complexType
+
+complexExpression :: Parser Token Expression
+complexExpression
+	= liftM2 Case
+		(pToken (TIdent "case") *> simpleOrParenthesizedExpression)
+		(nonEmpty (simpleList (parenthesized caseAlternative)))
+	<|> liftM2 (foldl ExpApp)
+		simpleExpression
+		(some simpleOrParenthesizedExpression)
+where
+	caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression)
+
+	complexPattern
+		= liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern)
+	simplePattern
+		= pToken TUnderscore $> Wildcard
+		<|> BasicValuePattern <$> basicValue
+		<|> IdentPattern <$> symbolIdent
+		<|> liftM2 ConstructorPattern constructorIdent (pure [])
+	simpleOrParenthesizedPattern = simplePattern <|> parenthesized complexPattern
+	pattern = complexPattern <|> simplePattern
+
+simpleExpression :: Parser Token Expression
+simpleExpression
+	= BasicValue <$> basicValue
+	<|> Symbol <$> symbolIdent
+	<|> Constructor <$> constructorIdent
+
+simpleOrParenthesizedExpression :: Parser Token Expression
+simpleOrParenthesizedExpression = simpleExpression <|> parenthesized expression
+
+expression :: Parser Token Expression
+expression = complexExpression <|> simpleExpression
+
+basicValue :: Parser Token BasicValue
+basicValue
+	= (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _))
+	<|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _))
+
+typeIdent :: Parser Token TypeIdent
+typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent
+
+constructorIdent :: Parser Token ConstructorIdent
+constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent
+
+typeVarIdent :: Parser Token TypeVarIdent
+typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent
+
+symbolIdent :: Parser Token SymbolIdent
+symbolIdent
+	= fromIdent <$> pSatisfy isLowerCaseIdent
+	<|> fromIdent <$> pSatisfy isFunnyIdent
+
+fromIdent (TIdent id) :== id
+
+isUpperCaseIdent :: !Token -> Bool
+isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0]
+isUpperCaseIdent _ = False
+
+isLowerCaseIdent :: !Token -> Bool
+isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0]
+isLowerCaseIdent _ = False
+
+isFunnyIdent :: !Token -> Bool
+isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0]
+isFunnyIdent _ = False
+
+parenthesized :: !(Parser Token a) -> Parser Token a
+parenthesized p = pToken TParenOpen *> p <* pToken TParenClose
+
+list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a]
+list simpleElem elem = simpleList (simpleElem <|> parenthesized elem)
+
+simpleList :: !(Parser Token a) -> Parser Token [a]
+simpleList simpleElem = parenthesized (many simpleElem)
+
+nonEmpty :: !(Parser Token [a]) -> Parser Token [a]
+nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs)
+
+:: Token
+	= TParenOpen     //* (
+	| TParenClose    //* )
+
+	| TColon         //* :
+	| TUnderscore    //* _
+	| TArrow         //* ->
+
+	| TIdent !String
+	| TInt !Int
+	| TChar !Char
+
+	| TError !Int !Int !String
+
+instance == Token
+where
+	(==) TParenOpen y = y=:TParenOpen
+	(==) TParenClose y = y=:TParenClose
+	(==) TColon y = y=:TColon
+	(==) TUnderscore y = y=:TUnderscore
+	(==) TArrow y = y=:TArrow
+	(==) (TIdent x) (TIdent y) = x == y
+	(==) (TIdent _) _ = False
+	(==) (TInt x) (TInt y) = x == y
+	(==) (TInt _) _ = False
+	(==) (TChar x) (TChar y) = x == y
+	(==) (TChar _) _ = False
+	(==) (TError _ _ _) _ = False
+
+lex :: ![Char] -> [Token]
+lex cs = lex` 0 0 cs
+
+lex` :: !Int !Int ![Char] -> [Token]
+lex` _ _ []
+	= []
+/* This alternative is for characters that can never be part of identifiers: */
+lex` line col [c:cs]
+	| isJust mbToken
+		= [fromJust mbToken : lex` line (col+1) cs]
+where
+	mbToken = case c of
+		'(' -> ?Just TParenOpen
+		')' -> ?Just TParenClose
+		_ -> ?None
+/* This alternative is for characters that can be part of identifiers consisting of symbols: */
+lex` line col [c:cs]
+	| isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs)))
+		= [fromJust mbToken : lex` line (col+1) cs]
+where
+	mbToken = case c of
+		':' -> ?Just TColon
+		_ -> ?None
+/* This alternative is for characters that can be part of regular identifiers: */
+lex` line col [c:cs]
+	| isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs)))
+		= [fromJust mbToken : lex` line (col+1) cs]
+where
+	mbToken = case c of
+		'_' -> ?Just TUnderscore
+		_ -> ?None
+lex` line col ['-','>':cs]
+	| isEmpty cs || not (isFunnySymbol (hd cs))
+		= [TArrow : lex` line (col+2) cs]
+
+/* Whitespace */
+lex` line col ['\r','\n':cs]
+	= lex` (line+1) 0 cs
+lex` line col ['\n','\r':cs]
+	= lex` (line+1) 0 cs
+lex` line col ['\n':cs]
+	= lex` (line+1) 0 cs
+lex` line col ['\r':cs]
+	= lex` (line+1) 0 cs
+lex` line col [c:cs]
+	| isSpace c
+		= lex` line (col+1) cs
+
+/* Identifiers*/
+lex` line col cs=:[c:_]
+	| isAlpha c
+		# (name,cs) = span isIdentChar cs
+		= [TIdent (toString name) : lex` line (col + length name) cs]
+	| isFunnySymbol c
+		# (name,cs) = span isFunnySymbol cs
+		= [TIdent (toString name) : lex` line (col + length name) cs]
+
+/* Basic values */
+lex` line col cs=:[c:_]
+	| isDigit c
+		# (i,cs) = span isDigit cs
+		= [TInt (toInt (toString i)) : lex` line (col + length i) cs]
+lex` line col [c:cs]
+	| c == '-' && not (isEmpty cs) && isDigit (hd cs)
+		# (i,cs) = span isDigit cs
+		= [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs]
+lex` line col ['\'',c,'\'':cs] // TODO: escape sequences
+	= [TChar c : lex` line (col+3) cs]
+
+/* Unparseable input */
+lex` line col cs
+	= [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")]
+
+isFunnySymbol :: !Char -> Bool
+isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~']
+
+isIdentChar :: !Char -> Bool
+isIdentChar c = isAlphanum c || c == '_' || c == '\''
author	Camil Staps	2023-01-23 22:36:16 +0100
committer	Camil Staps	2023-01-23 22:36:16 +0100
commit	301a73c63b3fe5e8306e9e8d213269a720b7a089 (patch)
tree	0593001bc36302e0c759d85e98fcd78dfe87fcef /snug-clean/src/Snug/Parse.icl
parent	Add example of small functional language and run time system (diff)