diff options
Diffstat (limited to 'snug-clean/src/Snug/Parse.icl')
-rw-r--r-- | snug-clean/src/Snug/Parse.icl | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/snug-clean/src/Snug/Parse.icl b/snug-clean/src/Snug/Parse.icl new file mode 100644 index 0000000..63a9fda --- /dev/null +++ b/snug-clean/src/Snug/Parse.icl @@ -0,0 +1,243 @@ +implementation module Snug.Parse + +import StdEnv + +import Control.Applicative +import Control.Monad +import Data.Either +import Data.Error +import Data.Functor +import Data.Maybe +import Data.Tuple +import qualified Text +from Text import class Text, instance Text String +import Text.Parsers.Simple.Core + +import Snug.Syntax + +parseSnug :: ![Char] -> MaybeError String [Definition] +parseSnug cs = case parse (many definition`) (lex cs) of + Left errors -> Error ('Text'.join "; " errors) + Right defs -> Ok defs + +definition` :: Parser Token Definition +definition` = parenthesized def +where + def + = liftM3 DataDef + (pToken (TIdent "data") *> typeIdent) + (fromMaybe [] <$> optional (simpleList typeVarIdent)) + (list simpleConstructorDef constructorDef) + <|> liftM2 TypeDef + (pToken (TIdent "type") *> typeIdent) + simpleOrParenthesizedType + <|> liftM4 FunDef + (pToken (TIdent "fun") *> symbolIdent) + (simpleList (parenthesized typedArgument)) + (pToken TColon *> type) + simpleOrParenthesizedExpression + + simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure []) + constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType) + + typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type) + +complexType :: Parser Token Type +complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType) + +simpleType :: Parser Token Type +simpleType + = Type <$> typeIdent + <|> TyVar <$> typeVarIdent + +type :: Parser Token Type +type = complexType <|> simpleType + +simpleOrParenthesizedType :: Parser Token Type +simpleOrParenthesizedType = simpleType <|> parenthesized complexType + +complexExpression :: Parser Token Expression +complexExpression + = liftM2 Case + (pToken (TIdent "case") *> simpleOrParenthesizedExpression) + (nonEmpty (simpleList (parenthesized caseAlternative))) + <|> liftM2 (foldl ExpApp) + simpleExpression + (some simpleOrParenthesizedExpression) +where + caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression) + + complexPattern + = liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern) + simplePattern + = pToken TUnderscore $> Wildcard + <|> BasicValuePattern <$> basicValue + <|> IdentPattern <$> symbolIdent + <|> liftM2 ConstructorPattern constructorIdent (pure []) + simpleOrParenthesizedPattern = simplePattern <|> parenthesized complexPattern + pattern = complexPattern <|> simplePattern + +simpleExpression :: Parser Token Expression +simpleExpression + = BasicValue <$> basicValue + <|> Symbol <$> symbolIdent + <|> Constructor <$> constructorIdent + +simpleOrParenthesizedExpression :: Parser Token Expression +simpleOrParenthesizedExpression = simpleExpression <|> parenthesized expression + +expression :: Parser Token Expression +expression = complexExpression <|> simpleExpression + +basicValue :: Parser Token BasicValue +basicValue + = (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _)) + <|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _)) + +typeIdent :: Parser Token TypeIdent +typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent + +constructorIdent :: Parser Token ConstructorIdent +constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent + +typeVarIdent :: Parser Token TypeVarIdent +typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent + +symbolIdent :: Parser Token SymbolIdent +symbolIdent + = fromIdent <$> pSatisfy isLowerCaseIdent + <|> fromIdent <$> pSatisfy isFunnyIdent + +fromIdent (TIdent id) :== id + +isUpperCaseIdent :: !Token -> Bool +isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0] +isUpperCaseIdent _ = False + +isLowerCaseIdent :: !Token -> Bool +isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0] +isLowerCaseIdent _ = False + +isFunnyIdent :: !Token -> Bool +isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0] +isFunnyIdent _ = False + +parenthesized :: !(Parser Token a) -> Parser Token a +parenthesized p = pToken TParenOpen *> p <* pToken TParenClose + +list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a] +list simpleElem elem = simpleList (simpleElem <|> parenthesized elem) + +simpleList :: !(Parser Token a) -> Parser Token [a] +simpleList simpleElem = parenthesized (many simpleElem) + +nonEmpty :: !(Parser Token [a]) -> Parser Token [a] +nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs) + +:: Token + = TParenOpen //* ( + | TParenClose //* ) + + | TColon //* : + | TUnderscore //* _ + | TArrow //* -> + + | TIdent !String + | TInt !Int + | TChar !Char + + | TError !Int !Int !String + +instance == Token +where + (==) TParenOpen y = y=:TParenOpen + (==) TParenClose y = y=:TParenClose + (==) TColon y = y=:TColon + (==) TUnderscore y = y=:TUnderscore + (==) TArrow y = y=:TArrow + (==) (TIdent x) (TIdent y) = x == y + (==) (TIdent _) _ = False + (==) (TInt x) (TInt y) = x == y + (==) (TInt _) _ = False + (==) (TChar x) (TChar y) = x == y + (==) (TChar _) _ = False + (==) (TError _ _ _) _ = False + +lex :: ![Char] -> [Token] +lex cs = lex` 0 0 cs + +lex` :: !Int !Int ![Char] -> [Token] +lex` _ _ [] + = [] +/* This alternative is for characters that can never be part of identifiers: */ +lex` line col [c:cs] + | isJust mbToken + = [fromJust mbToken : lex` line (col+1) cs] +where + mbToken = case c of + '(' -> ?Just TParenOpen + ')' -> ?Just TParenClose + _ -> ?None +/* This alternative is for characters that can be part of identifiers consisting of symbols: */ +lex` line col [c:cs] + | isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs))) + = [fromJust mbToken : lex` line (col+1) cs] +where + mbToken = case c of + ':' -> ?Just TColon + _ -> ?None +/* This alternative is for characters that can be part of regular identifiers: */ +lex` line col [c:cs] + | isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs))) + = [fromJust mbToken : lex` line (col+1) cs] +where + mbToken = case c of + '_' -> ?Just TUnderscore + _ -> ?None +lex` line col ['-','>':cs] + | isEmpty cs || not (isFunnySymbol (hd cs)) + = [TArrow : lex` line (col+2) cs] + +/* Whitespace */ +lex` line col ['\r','\n':cs] + = lex` (line+1) 0 cs +lex` line col ['\n','\r':cs] + = lex` (line+1) 0 cs +lex` line col ['\n':cs] + = lex` (line+1) 0 cs +lex` line col ['\r':cs] + = lex` (line+1) 0 cs +lex` line col [c:cs] + | isSpace c + = lex` line (col+1) cs + +/* Identifiers*/ +lex` line col cs=:[c:_] + | isAlpha c + # (name,cs) = span isIdentChar cs + = [TIdent (toString name) : lex` line (col + length name) cs] + | isFunnySymbol c + # (name,cs) = span isFunnySymbol cs + = [TIdent (toString name) : lex` line (col + length name) cs] + +/* Basic values */ +lex` line col cs=:[c:_] + | isDigit c + # (i,cs) = span isDigit cs + = [TInt (toInt (toString i)) : lex` line (col + length i) cs] +lex` line col [c:cs] + | c == '-' && not (isEmpty cs) && isDigit (hd cs) + # (i,cs) = span isDigit cs + = [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs] +lex` line col ['\'',c,'\'':cs] // TODO: escape sequences + = [TChar c : lex` line (col+3) cs] + +/* Unparseable input */ +lex` line col cs + = [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")] + +isFunnySymbol :: !Char -> Bool +isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~'] + +isIdentChar :: !Char -> Bool +isIdentChar c = isAlphanum c || c == '_' || c == '\'' |