implementation module Snug.Parse import StdEnv import Control.Applicative import Control.Monad import Data.Either import Data.Error import Data.Functor import Data.Maybe import Data.Tuple import qualified Text from Text import class Text, instance Text String import Text.Parsers.Simple.Core import qualified Text.Parsers.Simple.Core import Snug.Syntax // override infix priority to use easily with <$> (<<|>) infixr 3 :: !(Parser t a) (Parser t a) -> Parser t a (<<|>) left right = left 'Text.Parsers.Simple.Core'. <<|> right parseSnug :: ![Char] -> MaybeError String [Definition] parseSnug cs = case parse (many definition`) (filterComments (lex cs)) of Left errors -> Error ('Text'.join "; " errors) Right defs -> Ok defs where filterComments tks = [t \\ t <- tks | not (t=:(TComment _))] definition` :: Parser Token Definition definition` = parenthesized def where def = liftM3 DataDef (pToken (TIdent "data") *> typeIdent) (fromMaybe [] <$> optional (simpleList typeVarIdent)) (list simpleConstructorDef constructorDef) <<|> liftM2 TypeDef (pToken (TIdent "type") *> typeIdent) simpleOrParenthesizedType <<|> liftM4 FunDef (pToken (TIdent "fun") *> symbolIdent) (simpleList (parenthesized typedArgument)) (pToken TColon *> type) simpleOrParenthesizedExpression simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure []) constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType) typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type) complexType :: Parser Token Type complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType) simpleType :: Parser Token Type simpleType = Type <$> typeIdent <<|> TyVar <$> typeVarIdent type :: Parser Token Type type = complexType <<|> simpleType simpleOrParenthesizedType :: Parser Token Type simpleOrParenthesizedType = simpleType <<|> parenthesized complexType complexExpression :: Parser Token Expression complexExpression = liftM2 Case (pToken (TIdent "case") *> simpleOrParenthesizedExpression) (nonEmpty (simpleList (parenthesized caseAlternative))) <<|> liftM2 (foldl ExpApp) simpleExpression (some simpleOrParenthesizedExpression) where caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression) complexPattern = liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern) simplePattern = pToken TUnderscore $> Wildcard <<|> BasicValuePattern <$> basicValue <<|> IdentPattern <$> symbolIdent <<|> liftM2 ConstructorPattern constructorIdent (pure []) simpleOrParenthesizedPattern = simplePattern <<|> parenthesized complexPattern pattern = complexPattern <<|> simplePattern simpleExpression :: Parser Token Expression simpleExpression = BasicValue <$> basicValue <<|> Symbol <$> symbolIdent <<|> Constructor <$> constructorIdent simpleOrParenthesizedExpression :: Parser Token Expression simpleOrParenthesizedExpression = simpleExpression <<|> parenthesized expression expression :: Parser Token Expression expression = complexExpression <<|> simpleExpression basicValue :: Parser Token BasicValue basicValue = (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _)) <<|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _)) typeIdent :: Parser Token TypeIdent typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent constructorIdent :: Parser Token ConstructorIdent constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent typeVarIdent :: Parser Token TypeVarIdent typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent symbolIdent :: Parser Token SymbolIdent symbolIdent = fromIdent <$> pSatisfy isLowerCaseIdent <<|> fromIdent <$> pSatisfy isFunnyIdent fromIdent (TIdent id) :== id isUpperCaseIdent :: !Token -> Bool isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0] isUpperCaseIdent _ = False isLowerCaseIdent :: !Token -> Bool isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0] isLowerCaseIdent _ = False isFunnyIdent :: !Token -> Bool isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0] isFunnyIdent _ = False parenthesized :: !(Parser Token a) -> Parser Token a parenthesized p = pToken TParenOpen *> p <* pToken TParenClose list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a] list simpleElem elem = simpleList (simpleElem <<|> parenthesized elem) simpleList :: !(Parser Token a) -> Parser Token [a] simpleList simpleElem = parenthesized (many simpleElem) nonEmpty :: !(Parser Token [a]) -> Parser Token [a] nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs) :: Token = TParenOpen //* ( | TParenClose //* ) | TColon //* : | TUnderscore //* _ | TArrow //* -> | TIdent !String | TInt !Int | TChar !Char | TComment !String //* (# ... #) | TError !Int !Int !String instance == Token where (==) TParenOpen y = y=:TParenOpen (==) TParenClose y = y=:TParenClose (==) TColon y = y=:TColon (==) TUnderscore y = y=:TUnderscore (==) TArrow y = y=:TArrow (==) (TIdent x) (TIdent y) = x == y (==) (TIdent _) _ = False (==) (TInt x) (TInt y) = x == y (==) (TInt _) _ = False (==) (TChar x) (TChar y) = x == y (==) (TChar _) _ = False (==) (TComment x) (TComment y) = x == y (==) (TComment _) _ = False (==) (TError _ _ _) _ = False lex :: ![Char] -> [Token] lex cs = lex` 0 0 cs lex` :: !Int !Int ![Char] -> [Token] lex` _ _ [] = [] lex` line col ['(#':cs] = stripComment line (col+2) cs 0 [] where stripComment line col ['#)':cs] 0 acc = [TComment (toString (reverse acc)) : lex` line (col+2) cs] stripComment line col ['(#':cs] n acc = stripComment line (col+2) cs (n+1) ['#(':acc] stripComment line col ['\r\n':cs] n acc = stripComment (line+1) 0 cs n ['\n\r':acc] stripComment line col ['\n\r':cs] n acc = stripComment (line+1) 0 cs n ['\r\n':acc] stripComment line col [c:cs] n acc | c=='\n' || c=='\r' = stripComment (line+1) 0 cs n [c:acc] = stripComment line (col+1) cs n [c:acc] stripComment line col [] _ _ = [TError line col "end of file while scanning comment"] /* This alternative is for characters that can never be part of identifiers: */ lex` line col [c:cs] | isJust mbToken = [fromJust mbToken : lex` line (col+1) cs] where mbToken = case c of '(' -> ?Just TParenOpen ')' -> ?Just TParenClose _ -> ?None /* This alternative is for characters that can be part of identifiers consisting of symbols: */ lex` line col [c:cs] | isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs))) = [fromJust mbToken : lex` line (col+1) cs] where mbToken = case c of ':' -> ?Just TColon _ -> ?None /* This alternative is for characters that can be part of regular identifiers: */ lex` line col [c:cs] | isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs))) = [fromJust mbToken : lex` line (col+1) cs] where mbToken = case c of '_' -> ?Just TUnderscore _ -> ?None lex` line col ['-','>':cs] | isEmpty cs || not (isFunnySymbol (hd cs)) = [TArrow : lex` line (col+2) cs] /* Whitespace */ lex` line col ['\r','\n':cs] = lex` (line+1) 0 cs lex` line col ['\n','\r':cs] = lex` (line+1) 0 cs lex` line col ['\n':cs] = lex` (line+1) 0 cs lex` line col ['\r':cs] = lex` (line+1) 0 cs lex` line col [c:cs] | isSpace c = lex` line (col+1) cs /* Identifiers*/ lex` line col cs=:[c:_] | isAlpha c # (name,cs) = span isIdentChar cs = [TIdent (toString name) : lex` line (col + length name) cs] | isFunnySymbol c # (name,cs) = span isFunnySymbol cs = [TIdent (toString name) : lex` line (col + length name) cs] /* Basic values */ lex` line col cs=:[c:_] | isDigit c # (i,cs) = span isDigit cs = [TInt (toInt (toString i)) : lex` line (col + length i) cs] lex` line col [c:cs] | c == '-' && not (isEmpty cs) && isDigit (hd cs) # (i,cs) = span isDigit cs = [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs] lex` line col ['\'',c,'\'':cs] // TODO: escape sequences = [TChar c : lex` line (col+3) cs] /* Unparseable input */ lex` line col cs = [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")] isFunnySymbol :: !Char -> Bool isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~'] isIdentChar :: !Char -> Bool isIdentChar c = isAlphanum c || c == '_' || c == '\''