implementation module Sil.Parse import StdBool import StdChar from StdFunc import flip, o import StdInt import StdList import StdString import StdTuple import Control.Applicative import Control.Monad import Data.Error from Data.Func import $ import Data.Functor import Data.List import Data.Maybe import qualified Text as T from Text import <+, class Text, instance Text String import GenEq import Sil.Syntax import Sil.Util.Parser import Sil.Util.Printer derive gEq Token, Literal instance == Token where == a b = gEq{|*|} a b instance toString Token where toString TParenOpen = "(" toString TParenClose = ")" toString TBraceOpen = "{" toString TBraceClose = "}" toString TComma = "," toString TSemicolon = ";" toString TAssign = ":=" toString TTilde = "~" toString TPlus = "+" toString TMinus = "-" toString TStar = "*" toString TSlash = "/" toString TPercent = "%" toString (TLit l) = toString l toString TIf = "if" toString TWhile = "while" toString TReturn = "return" toString (TMachineCode s) = "|~ " +++ s toString (TName s) = s instance name Token where name (TLit _) = "literal" name (TName _) = "name" name (TMachineCode _) = "machine code" name t = toString t instance toString ParseError where toString (Invalid loc sym) = "Invalid token '" <+ sym <+ "' while parsing a " <+ loc <+ "." toString (Expected s) = "Expected " <+ s <+ "." toString UnknownError = "Unknown error." tokenise :: [Char] -> MaybeError ParseError [Token] tokenise cs = reverse <$> tks cs [] where tks :: [Char] [Token] -> MaybeError ParseError [Token] tks [] t = pure t tks [':':'=':r] t = tks r [TAssign :t] tks ['=':'=':r] t = tks r [TDoubleEquals :t] tks ['|':'|':r] t = tks r [TDoubleBar :t] tks ['&':'&':r] t = tks r [TDoubleAmpersand:t] tks ['(':r] t = tks r [TParenOpen :t] tks [')':r] t = tks r [TParenClose :t] tks ['{':r] t = tks r [TBraceOpen :t] tks ['}':r] t = tks r [TBraceClose :t] tks [',':r] t = tks r [TComma :t] tks [';':r] t = tks r [TSemicolon :t] tks ['!':r] t = tks r [TExclamation:t] tks ['~':r] t = tks r [TTilde :t] tks ['+':r] t = tks r [TPlus :t] tks ['-':r] t = tks r [TMinus :t] tks ['*':r] t = tks r [TStar :t] tks ['/':r] t = tks r [TSlash :t] tks ['%':r] t = tks r [TPercent :t] tks ['i':'f' :s:r] t | isSpace s = tks r [TIf :t] tks ['e':'l':'s':'e' :s:r] t | isSpace s = tks r [TElse :t] tks ['w':'h':'i':'l':'e' :s:r] t | isSpace s = tks r [TWhile :t] tks ['r':'e':'t':'u':'r':'n':s:r] t | isSpace s = tks r [TReturn:t] tks ['T':'r':'u':'e' :s:r] t | isSpace s = tks r [TLit $ BLit True :t] tks ['F':'a':'l':'s':'e' :s:r] t | isSpace s = tks r [TLit $ BLit False:t] tks ['|':'~':r] t = tks r` [TMachineCode $ toString c:t] where (c,r`) = span (not o flip isMember ['\r\n']) r tks cs=:[h:_] t | isSpace h = tks (dropWhile isSpace cs) t | isDigit h = tks numrest [TLit $ ILit $ toInt $ toString num:t] | not (isNameChar h) = Error $ Invalid "name" h | otherwise = tks namerest [TName $ toString name:t] where (name,namerest) = span isNameChar cs (num,numrest) = span isDigit cs isNameChar :: Char -> Bool isNameChar c = isAlpha c || isMember c ['_\''] parse :: ([Token] -> MaybeError ParseError Program) parse = fst o runParser program program :: Parser Token Program program = (\fs -> {p_funs=fs}) <$> some function <* eof function :: Parser Token Function function = type >>= \t -> name >>= \n -> item TParenOpen *> seplist TComma arg >>= \args -> item TParenClose *> item TBraceOpen *> codeblock >>= \cb -> item TBraceClose *> pure { f_type = t , f_name = n , f_args = args , f_code = cb } codeblock :: Parser Token CodeBlock codeblock = many initialisation >>= \is -> many statement >>= \s -> pure {cb_init=flatten is, cb_content=s} initialisation :: Parser Token [Initialisation] initialisation = type >>= \t -> seplist TComma name >>= \ns -> item TSemicolon >>= \_ -> pure [{init_type=t, init_name=n} \\ n <- ns] statement :: Parser Token Statement statement = declaration <|> liftM Application (expression <* item TSemicolon) <|> return <|> if` <|> while <|> machinecode where declaration :: Parser Token Statement declaration = liftM2 Declaration name (item TAssign *> expression <* item TSemicolon) return :: Parser Token Statement return = liftM Return (item TReturn *> optional expression <* item TSemicolon) machinecode :: Parser Token Statement machinecode = (\(TMachineCode s) -> MachineStm s) <$> satisfy isMachineCode where isMachineCode (TMachineCode _) = True; isMachineCode _ = False if` :: Parser Token Statement if` = item TIf *> parenthised expression >>= \cond -> braced codeblock >>= \iftrue -> many elseif >>= \elseifs -> optional (item TElse *> braced codeblock) >>= \iffalse -> pure $ If [(cond,iftrue):elseifs] iffalse where elseif = list [TElse, TIf] *> parenthised expression >>= \cond -> braced codeblock >>= \block -> pure (cond, block) while :: Parser Token Statement while = item TWhile *> parenthised expression >>= \cond -> braced codeblock >>= \do -> pure $ While cond do expression :: Parser Token Expression expression = rightAssoc (op TDoubleBar LogOr) $ rightAssoc (op TDoubleAmpersand LogAnd) $ rightAssoc (op TDoubleEquals Equals) $ leftAssoc ( op TPlus Add <|> op TMinus Sub ) $ leftAssoc ( op TStar Mul <|> op TSlash Div <|> op TPercent Rem ) $ noInfix where op :: Token Op2 -> Parser Token Op2 op token operator = item token *> pure operator rightAssoc :: (Parser Token Op2) (Parser Token Expression) -> Parser Token Expression rightAssoc opp appp = appp >>= \e1 -> optional (opp >>= \op -> rightAssoc opp appp >>= \e -> pure (op,e)) >>= pure o maybe e1 (\(op,e2) -> BuiltinApp2 e1 op e2) leftAssoc :: (Parser Token Op2) (Parser Token Expression) -> Parser Token Expression leftAssoc opp appp = appp >>= \e1 -> many (opp >>= \op -> appp >>= \e -> pure (op,e)) >>= foldM (\e (op,e2) -> pure $ BuiltinApp2 e op e2) e1 noInfix :: Parser Token Expression noInfix = liftM2 App name (item TParenOpen *> seplist TComma expression <* item TParenClose) <|> op TTilde Neg <|> op TExclamation Not <|> liftM Literal literal <|> liftM Name name <|> parenthised expression where op :: Token Op1 -> Parser Token Expression op token operator = liftM (BuiltinApp operator) (item token *> noInfix) name :: Parser Token Name name = liftM (\(TName s) -> s) $ satisfy isName Expected "name" where isName (TName _) = True isName _ = False arg :: Parser Token Arg arg = (type >>= \type -> name >>= \name -> pure {arg_type=type, arg_name=name}) Expected "argument" type :: Parser Token Type type = type "Bool" TBool <|> type "Int" TInt <|> type "Void" TVoid Expected "type" where type s t = item (TName s) *> pure t literal :: Parser Token Literal literal = satisfy isLit >>= \(TLit lit) -> pure lit where isLit :: Token -> Bool isLit (TLit _) = True isLit _ = False parenthised :: (Parser Token a) -> Parser Token a parenthised p = item TParenOpen *> p <* item TParenClose braced :: (Parser Token a) -> Parser Token a braced p = item TBraceOpen *> p <* item TBraceClose