aboutsummaryrefslogtreecommitdiff
path: root/snug-clean/src/Snug/Parse.icl
diff options
context:
space:
mode:
authorCamil Staps2023-01-23 22:36:16 +0100
committerCamil Staps2023-01-23 22:36:16 +0100
commit301a73c63b3fe5e8306e9e8d213269a720b7a089 (patch)
tree0593001bc36302e0c759d85e98fcd78dfe87fcef /snug-clean/src/Snug/Parse.icl
parentAdd example of small functional language and run time system (diff)
Add Clean parser for snug
Diffstat (limited to 'snug-clean/src/Snug/Parse.icl')
-rw-r--r--snug-clean/src/Snug/Parse.icl243
1 files changed, 243 insertions, 0 deletions
diff --git a/snug-clean/src/Snug/Parse.icl b/snug-clean/src/Snug/Parse.icl
new file mode 100644
index 0000000..63a9fda
--- /dev/null
+++ b/snug-clean/src/Snug/Parse.icl
@@ -0,0 +1,243 @@
+implementation module Snug.Parse
+
+import StdEnv
+
+import Control.Applicative
+import Control.Monad
+import Data.Either
+import Data.Error
+import Data.Functor
+import Data.Maybe
+import Data.Tuple
+import qualified Text
+from Text import class Text, instance Text String
+import Text.Parsers.Simple.Core
+
+import Snug.Syntax
+
+parseSnug :: ![Char] -> MaybeError String [Definition]
+parseSnug cs = case parse (many definition`) (lex cs) of
+ Left errors -> Error ('Text'.join "; " errors)
+ Right defs -> Ok defs
+
+definition` :: Parser Token Definition
+definition` = parenthesized def
+where
+ def
+ = liftM3 DataDef
+ (pToken (TIdent "data") *> typeIdent)
+ (fromMaybe [] <$> optional (simpleList typeVarIdent))
+ (list simpleConstructorDef constructorDef)
+ <|> liftM2 TypeDef
+ (pToken (TIdent "type") *> typeIdent)
+ simpleOrParenthesizedType
+ <|> liftM4 FunDef
+ (pToken (TIdent "fun") *> symbolIdent)
+ (simpleList (parenthesized typedArgument))
+ (pToken TColon *> type)
+ simpleOrParenthesizedExpression
+
+ simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure [])
+ constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType)
+
+ typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type)
+
+complexType :: Parser Token Type
+complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType)
+
+simpleType :: Parser Token Type
+simpleType
+ = Type <$> typeIdent
+ <|> TyVar <$> typeVarIdent
+
+type :: Parser Token Type
+type = complexType <|> simpleType
+
+simpleOrParenthesizedType :: Parser Token Type
+simpleOrParenthesizedType = simpleType <|> parenthesized complexType
+
+complexExpression :: Parser Token Expression
+complexExpression
+ = liftM2 Case
+ (pToken (TIdent "case") *> simpleOrParenthesizedExpression)
+ (nonEmpty (simpleList (parenthesized caseAlternative)))
+ <|> liftM2 (foldl ExpApp)
+ simpleExpression
+ (some simpleOrParenthesizedExpression)
+where
+ caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression)
+
+ complexPattern
+ = liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern)
+ simplePattern
+ = pToken TUnderscore $> Wildcard
+ <|> BasicValuePattern <$> basicValue
+ <|> IdentPattern <$> symbolIdent
+ <|> liftM2 ConstructorPattern constructorIdent (pure [])
+ simpleOrParenthesizedPattern = simplePattern <|> parenthesized complexPattern
+ pattern = complexPattern <|> simplePattern
+
+simpleExpression :: Parser Token Expression
+simpleExpression
+ = BasicValue <$> basicValue
+ <|> Symbol <$> symbolIdent
+ <|> Constructor <$> constructorIdent
+
+simpleOrParenthesizedExpression :: Parser Token Expression
+simpleOrParenthesizedExpression = simpleExpression <|> parenthesized expression
+
+expression :: Parser Token Expression
+expression = complexExpression <|> simpleExpression
+
+basicValue :: Parser Token BasicValue
+basicValue
+ = (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _))
+ <|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _))
+
+typeIdent :: Parser Token TypeIdent
+typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent
+
+constructorIdent :: Parser Token ConstructorIdent
+constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent
+
+typeVarIdent :: Parser Token TypeVarIdent
+typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent
+
+symbolIdent :: Parser Token SymbolIdent
+symbolIdent
+ = fromIdent <$> pSatisfy isLowerCaseIdent
+ <|> fromIdent <$> pSatisfy isFunnyIdent
+
+fromIdent (TIdent id) :== id
+
+isUpperCaseIdent :: !Token -> Bool
+isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0]
+isUpperCaseIdent _ = False
+
+isLowerCaseIdent :: !Token -> Bool
+isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0]
+isLowerCaseIdent _ = False
+
+isFunnyIdent :: !Token -> Bool
+isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0]
+isFunnyIdent _ = False
+
+parenthesized :: !(Parser Token a) -> Parser Token a
+parenthesized p = pToken TParenOpen *> p <* pToken TParenClose
+
+list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a]
+list simpleElem elem = simpleList (simpleElem <|> parenthesized elem)
+
+simpleList :: !(Parser Token a) -> Parser Token [a]
+simpleList simpleElem = parenthesized (many simpleElem)
+
+nonEmpty :: !(Parser Token [a]) -> Parser Token [a]
+nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs)
+
+:: Token
+ = TParenOpen //* (
+ | TParenClose //* )
+
+ | TColon //* :
+ | TUnderscore //* _
+ | TArrow //* ->
+
+ | TIdent !String
+ | TInt !Int
+ | TChar !Char
+
+ | TError !Int !Int !String
+
+instance == Token
+where
+ (==) TParenOpen y = y=:TParenOpen
+ (==) TParenClose y = y=:TParenClose
+ (==) TColon y = y=:TColon
+ (==) TUnderscore y = y=:TUnderscore
+ (==) TArrow y = y=:TArrow
+ (==) (TIdent x) (TIdent y) = x == y
+ (==) (TIdent _) _ = False
+ (==) (TInt x) (TInt y) = x == y
+ (==) (TInt _) _ = False
+ (==) (TChar x) (TChar y) = x == y
+ (==) (TChar _) _ = False
+ (==) (TError _ _ _) _ = False
+
+lex :: ![Char] -> [Token]
+lex cs = lex` 0 0 cs
+
+lex` :: !Int !Int ![Char] -> [Token]
+lex` _ _ []
+ = []
+/* This alternative is for characters that can never be part of identifiers: */
+lex` line col [c:cs]
+ | isJust mbToken
+ = [fromJust mbToken : lex` line (col+1) cs]
+where
+ mbToken = case c of
+ '(' -> ?Just TParenOpen
+ ')' -> ?Just TParenClose
+ _ -> ?None
+/* This alternative is for characters that can be part of identifiers consisting of symbols: */
+lex` line col [c:cs]
+ | isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs)))
+ = [fromJust mbToken : lex` line (col+1) cs]
+where
+ mbToken = case c of
+ ':' -> ?Just TColon
+ _ -> ?None
+/* This alternative is for characters that can be part of regular identifiers: */
+lex` line col [c:cs]
+ | isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs)))
+ = [fromJust mbToken : lex` line (col+1) cs]
+where
+ mbToken = case c of
+ '_' -> ?Just TUnderscore
+ _ -> ?None
+lex` line col ['-','>':cs]
+ | isEmpty cs || not (isFunnySymbol (hd cs))
+ = [TArrow : lex` line (col+2) cs]
+
+/* Whitespace */
+lex` line col ['\r','\n':cs]
+ = lex` (line+1) 0 cs
+lex` line col ['\n','\r':cs]
+ = lex` (line+1) 0 cs
+lex` line col ['\n':cs]
+ = lex` (line+1) 0 cs
+lex` line col ['\r':cs]
+ = lex` (line+1) 0 cs
+lex` line col [c:cs]
+ | isSpace c
+ = lex` line (col+1) cs
+
+/* Identifiers*/
+lex` line col cs=:[c:_]
+ | isAlpha c
+ # (name,cs) = span isIdentChar cs
+ = [TIdent (toString name) : lex` line (col + length name) cs]
+ | isFunnySymbol c
+ # (name,cs) = span isFunnySymbol cs
+ = [TIdent (toString name) : lex` line (col + length name) cs]
+
+/* Basic values */
+lex` line col cs=:[c:_]
+ | isDigit c
+ # (i,cs) = span isDigit cs
+ = [TInt (toInt (toString i)) : lex` line (col + length i) cs]
+lex` line col [c:cs]
+ | c == '-' && not (isEmpty cs) && isDigit (hd cs)
+ # (i,cs) = span isDigit cs
+ = [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs]
+lex` line col ['\'',c,'\'':cs] // TODO: escape sequences
+ = [TChar c : lex` line (col+3) cs]
+
+/* Unparseable input */
+lex` line col cs
+ = [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")]
+
+isFunnySymbol :: !Char -> Bool
+isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~']
+
+isIdentChar :: !Char -> Bool
+isIdentChar c = isAlphanum c || c == '_' || c == '\''