-- vim: et ts=2 sw=2 ai: {-# LANGUAGE LambdaCase #-} module SPL.Parse (parse) where import Control.Applicative import Control.Monad import Data.Functor import Text.Parsec (sepBy, ()) import qualified Text.Parsec as P import SPL.Syntax hiding (TInt,TBool,TChar,TArrow,TVar) import qualified SPL.Syntax import SPL.Lex type Parser t = P.Parsec [Token] () t parse :: [Token] -> Either P.ParseError Program parse = P.parse spl "Not a valid program" . filter (not . isCommentToken) check :: String -> (Token -> Maybe a) -> Parser a check e f = P.tokenPrim show (\p _ _ -> P.incSourceColumn p 1) f e -- TODO: We increment the source column to get meaningful error messages; -- the error that is furthest in the parsing process. We need to keep positions -- in the list of tokens so that we can update this properly. token :: Token -> Parser Token token t = check (show t) (\u -> if t == u then Just t else Nothing) choice :: [Parser a] -> Parser a choice = P.choice . map P.try trans :: Token -> a -> Parser a trans t x = token t $> x spl :: Parser Program spl = collect <$> (many toplevel <* P.eof) where collect :: [Either Variable Function] -> Program collect vfs = Program [f | Right f <- vfs] [v | Left v <- vfs] toplevel :: Parser (Either Variable Function) toplevel = choice [ Left <$> var , Right <$> fun ] comment :: Parser String comment = check "comment" $ \case TSingleComment s -> Just s TBlockComment s -> Just s _ -> Nothing parenthesised :: Parser a -> Parser a parenthesised = P.between (token TParenOpen) (token TParenClose) braced :: Parser a -> Parser a braced = P.between (token TBraceOpen) (token TBraceClose) bracked :: Parser a -> Parser a bracked = P.between (token TBrackOpen) (token TBrackClose) var :: Parser Variable var = do t <- Just <$> plainType <|> (token TVar $> Nothing) id <- ident token TEquals val <- expr token TSemicolon return $ Variable id t val fun :: Parser Function fun = do id <- ident args <- parenthesised (ident `sepBy` token TComma) ftype <- optional (token TColonColon *> funType) token TBraceOpen vars <- many (P.try var) stmt <- statements token TBraceClose return $ Function id ftype args vars stmt ident :: Parser Name ident = check "identifier" $ \case TIdent id -> Just id _ -> Nothing funType :: Parser Type funType = do argtypes <- many plainType token TArrow rettype <- plainType return $ SPL.Syntax.TArrow argtypes rettype plainType :: Parser Type plainType = choice [ trans TIntType SPL.Syntax.TInt , trans TBoolType SPL.Syntax.TBool , trans TCharType SPL.Syntax.TChar , trans TVoidType SPL.Syntax.TVoid , TList <$> bracked plainType , parenthesised $ liftM2 TTuple plainType (token TComma *> plainType) , SPL.Syntax.TVar <$> ident ] expr :: Parser Expression expr = rightAssoc (trans TPipePipe Or) $ rightAssoc (trans TAmpAmp And) $ rightAssoc (trans TEqEq Eq <|> trans TExclamEq Ne <|> trans TLtEq Le <|> trans TGtEq Ge <|> trans TLt Lt <|> trans TGt Gt) $ rightAssoc (trans TColon Cons) $ leftAssoc (trans TPlus Add <|> trans TMinus Sub) $ leftAssoc (trans TAsterisk Mul <|> trans TSlash Div) $ leftAssoc (trans TPercent Mod) $ fields $ simpleExpr where rightAssoc :: Parser Op2 -> Parser Expression -> Parser Expression rightAssoc op e = do e1 <- e rest <- optional (liftM2 (,) op (rightAssoc op e)) return $ case rest of Nothing -> e1 Just (op,e2) -> Op2 e1 op e2 leftAssoc :: Parser Op2 -> Parser Expression -> Parser Expression leftAssoc op e = do e1 <- e rest <- many (liftM2 (,) op e) foldM (\e1 (op,e2) -> pure $ Op2 e1 op e2) e1 rest fields :: Parser Expression -> Parser Expression fields p = liftM2 (foldl Field) p (many field) simpleExpr = choice [ liftM2 Op1 (trans TExclam Not <|> trans TMinus Neg) simpleExpr , literal , tuple , funcall , var , parenthesised expr ] literal :: Parser Expression literal = fmap Literal $ nil <|> (check "literal" $ \case TInt i -> Just (LInt i) TBool b -> Just (LBool b) TChar c -> Just (LChar c) _ -> Nothing) where nil = pure LNil <* token TBrackOpen <* token TBrackClose tuple = parenthesised (liftM2 Tuple expr (token TComma *> expr)) funcall = liftM2 FunCall ident (parenthesised $ expr `sepBy` token TComma) var = Var <$> ident statements :: Parser Statement statements = (foldr1 Seq <$> some statement <|> pure Nop) "statement" -- NOTE: we here assume that if/while blocks need not be braced, as in the -- example function `abs`. The alternative is using `braced statements`. statement :: Parser Statement statement = choice [ liftM3 If (token TIf *> parenthesised expr) statement (optional (token TElse *> statement)) , liftM2 While (token TWhile *> parenthesised expr) statement , Eval <$> (expr <* token TSemicolon) , liftM3 Assign ident (many field) (token TEquals *> expr <* token TSemicolon) , Return <$> (token TReturn *> optional expr <* token TSemicolon) , braced statements ] field :: Parser Field field = token TDot *> check "field" (\case TIdent "hd" -> Just Hd TIdent "tl" -> Just Tl TIdent "fst" -> Just Fst TIdent "snd" -> Just Snd _ -> Nothing)