implementation module Sil.Parse import StdBool import StdChar from StdFunc import flip, o import StdInt import StdList import StdString import StdTuple import Control.Applicative import Control.Monad import Data.Error from Data.Func import $ import Data.Functor import Data.List import Data.Maybe import qualified Text as T from Text import <+, class Text, instance Text String import Sil.Error import Sil.Syntax import Sil.Types import Sil.Util.Parser import Sil.Util.Printer derive gEq Token, Literal instance == Token where == a b = gEq{|*|} a b instance toString Token where toString TParenOpen = "(" toString TParenClose = ")" toString TBrackOpen = "[" toString TBrackClose = "]" toString TBraceOpen = "{" toString TBraceClose = "}" toString TComma = "," toString TColon = ":" toString TSemicolon = ";" toString (TField f) = "." +++ f toString TAssign = ":=" toString TTilde = "~" toString TExclamation = "!" toString TPlus = "+" toString TMinus = "-" toString TStar = "*" toString TSlash = "/" toString TPercent = "%" toString TEquals = "==" toString TUnequals = "<>" toString TLe = "<=" toString TGe = ">=" toString TLt = "<" toString TGt = ">" toString TDoubleBar = "||" toString TDoubleAmpersand = "&&" toString (TLit l) = toString l toString TIf = "if" toString TElse = "else" toString TWhile = "while" toString TReturn = "return" toString (TMachineCode s) = "|~ " +++ s toString (TName s) = s toString t = "???" instance name Token where name (TField _) = "field" name (TLit _) = "literal" name (TName _) = "name" name (TMachineCode _) = "machine code" name t = toString t tokenise :: [Char] -> MaybeError Error [ParseInput Token] tokenise cs = reverse <$> tks cs [] where tks :: [Char] [ParseInput Token] -> MaybeError Error [ParseInput Token] tks [] t = pure t tks ['/':'/':r] t = tks (dropWhile ((<>) '\n') r) t tks ['/':'*':r] t = tks (skipUntilEndOfComment r) t where skipUntilEndOfComment [] = [] skipUntilEndOfComment ['*':'/':r] = r skipUntilEndOfComment [_:r] = skipUntilEndOfComment r tks ['.':r=:[c:_]] t | isNameChar c = tks r` [PI_Token $ TField $ toString f:t] where (f,r`) = span isNameChar r tks [':':'=':r] t = tks r [PI_Token TAssign :t] tks ['=':'=':r] t = tks r [PI_Token TEquals :t] tks ['<':'>':r] t = tks r [PI_Token TUnequals :t] tks ['<':'=':r] t = tks r [PI_Token TLe :t] tks ['>':'=':r] t = tks r [PI_Token TGe :t] tks ['<' :r] t = tks r [PI_Token TLt :t] tks ['>' :r] t = tks r [PI_Token TGt :t] tks ['|':'|':r] t = tks r [PI_Token TDoubleBar :t] tks ['&':'&':r] t = tks r [PI_Token TDoubleAmpersand:t] tks ['(' :r] t = tks r [PI_Token TParenOpen :t] tks [')' :r] t = tks r [PI_Token TParenClose :t] tks ['[' :r] t = tks r [PI_Token TBrackOpen :t] tks [']' :r] t = tks r [PI_Token TBrackClose :t] tks ['{' :r] t = tks r [PI_Token TBraceOpen :t] tks ['}' :r] t = tks r [PI_Token TBraceClose :t] tks [',' :r] t = tks r [PI_Token TComma :t] tks [':' :r] t = tks r [PI_Token TColon :t] tks [';' :r] t = tks r [PI_Token TSemicolon :t] tks ['!' :r] t = tks r [PI_Token TExclamation :t] tks ['~' :r] t = tks r [PI_Token TTilde :t] tks ['+' :r] t = tks r [PI_Token TPlus :t] tks ['-' :r] t = tks r [PI_Token TMinus :t] tks ['*' :r] t = tks r [PI_Token TStar :t] tks ['/' :r] t = tks r [PI_Token TSlash :t] tks ['%' :r] t = tks r [PI_Token TPercent :t] tks ['i':'f' :r=:[n:_]] t | isNotNameChar n = tks r [PI_Token TIf :t] tks ['e':'l':'s':'e' :r=:[n:_]] t | isNotNameChar n = tks r [PI_Token TElse :t] tks ['w':'h':'i':'l':'e' :r=:[n:_]] t | isNotNameChar n = tks r [PI_Token TWhile :t] tks ['r':'e':'t':'u':'r':'n':r=:[n:_]] t | isNotNameChar n = tks r [PI_Token TReturn:t] tks ['T':'r':'u':'e' :r=:[n:_]] t | isNotNameChar n = tks r [PI_Token $ TLit $ BLit True :t] tks ['F':'a':'l':'s':'e' :r=:[n:_]] t | isNotNameChar n = tks r [PI_Token $ TLit $ BLit False:t] tks ['|':'~':r] t = tks r` [PI_Token $ TMachineCode $ toString c:t] where (c,r`) = span (not o flip isMember ['\r\n']) r tks cs=:[h:r] t | h == '\n' = tks r [PI_NewLine:t] | isSpace h = tks (dropWhile isSpace cs) t | isDigit h = tks numrest [PI_Token $ TLit $ ILit $ toInt $ toString num:t] | not (isNameChar h) = Error $ P_Invalid "name" h | otherwise = tks namerest [PI_Token $ TName $ toString name:t] where (name,namerest) = span isNameChar cs (num,numrest) = span isDigit cs isNameChar :: Char -> Bool isNameChar c = isAlphanum c || isMember c ['_\''] isNotNameChar = not o isNameChar parse :: ([ParseInput Token] -> MaybeError Error Program) parse = fst o runParser program o makeParseState program :: Parser Token Program program = many initialisation >>= \globss -> function until eof >>= \fs -> pure {p_globals=flatten globss, p_funs=reverse fs} function :: Parser Token Function function = type >>= \t -> getPosition >>= \pos -> name >>= \n -> item TParenOpen *> seplistUntil TParenClose TComma arg >>= \args -> codeblock >>= \cb -> pure { f_type = t , f_name = n , f_args = args , f_code = cb , f_pos = pos } codeblock :: Parser Token CodeBlock codeblock = item TBraceOpen *> many initialisation >>= \is -> statement until (item TBraceClose) >>= \s -> pure {cb_init=flatten is, cb_content=s} initialisation :: Parser Token [Initialisation] initialisation = type >>= \t -> seplist TComma (init t) <* item TSemicolon where init t = getPosition >>= \pos -> name >>= \n -> optional (item TAssign *> expression) >>= \v -> pure {init_type=t, init_name=n, init_value=v, init_pos=pos} statement :: Parser Token Statement statement = getPosition >>= \pos -> ( declaration pos <|> liftM (Application pos) (expression <* item TSemicolon) <|> return pos <|> if` pos <|> while pos <|> machinecode pos ) <#> "statement" where declaration :: ParsePosition -> Parser Token Statement declaration p = liftM2 (Declaration p) name (item TAssign *> expression <* item TSemicolon) return :: ParsePosition -> Parser Token Statement return p = liftM (Return p) (item TReturn *> optional expression <* item TSemicolon) machinecode :: ParsePosition -> Parser Token Statement machinecode p = (\(TMachineCode s) -> MachineStm p s) <$> satisfy isMachineCode where isMachineCode (TMachineCode _) = True; isMachineCode _ = False if` :: ParsePosition -> Parser Token Statement if` p = item TIf *> parenthised expression >>= \cond -> codeblock >>= \iftrue -> many elseif >>= \elseifs -> optional (item TElse *> codeblock) >>= \iffalse -> pure $ If p [(cond,iftrue):elseifs] iffalse where elseif = list [TElse, TIf] *> parenthised expression >>= \cond -> codeblock >>= \block -> pure (cond, block) while :: ParsePosition -> Parser Token Statement while p = item TWhile *> parenthised expression >>= \cond -> codeblock >>= \do -> pure $ While p cond do expression :: Parser Token Expression expression = rightAssoc (op TDoubleBar LogOr) $ rightAssoc (op TDoubleAmpersand LogAnd) $ rightAssoc (op TEquals Equals <|> op TUnequals Unequals <|> op TLe CmpLe <|> op TGe CmpGe <|> op TLt CmpLt <|> op TGt CmpGt) $ rightAssoc (op TColon Cons) $ leftAssoc (op TPlus Add <|> op TMinus Sub) $ leftAssoc (op TStar Mul <|> op TSlash Div) $ leftAssoc (op TPercent Rem) $ noInfix where op :: Token Op2 -> Parser Token Op2 op token operator = item token $> operator rightAssoc :: (Parser Token Op2) (Parser Token Expression) -> Parser Token Expression rightAssoc opp appp = appp >>= \e1 -> optional (opp >>= \op -> getPosition >>= \pos -> rightAssoc opp appp >>= \e -> pure (pos,op,e)) >>= pure o maybe e1 (\(pos,op,e2) -> BuiltinApp2 pos e1 op e2) leftAssoc :: (Parser Token Op2) (Parser Token Expression) -> Parser Token Expression leftAssoc opp appp = appp >>= \e1 -> many (opp >>= \op -> getPosition >>= \pos -> appp >>= \e -> pure (pos,op,e)) >>= foldM (\e (pos,op,e2) -> pure $ BuiltinApp2 pos e op e2) e1 noInfix :: Parser Token Expression noInfix = liftM3 App getPosition name (item TParenOpen *> seplistUntil TParenClose TComma expression) <|> op TTilde Neg <|> op TExclamation Not <|> (simpleExpr >>= \e -> foldl (flip $ uncurry Field) e <$> many field) where op :: Token Op1 -> Parser Token Expression op token operator = liftM3 BuiltinApp getPosition (pure operator) (item token *> noInfix) field :: Parser Token (ParsePosition, Name) field = satisfy (\t -> t =: TField _) >>= \(TField f) -> getPosition >>= \p -> pure (p,f) simpleExpr :: Parser Token Expression simpleExpr = liftM2 Literal getPosition literal <|> liftM2 Name getPosition name <|> liftM3 List getPosition (pure <$> bracked type) (pure []) <|> liftM3 List getPosition (pure Nothing) (bracked $ seplist TComma expression) <|> (item TParenOpen *> getPosition >>= \pos -> seplistUntil TParenClose TComma expression >>= \es -> pure $ case es of [x] -> x; _ -> Tuple pos (length es) es) name :: Parser Token Name name = (\(TName n) -> n) <$> satisfy isName <#> "name" where isName (TName _) = True isName _ = False arg :: Parser Token Arg arg = (type >>= \type -> name >>= \name -> pure {arg_type=type, arg_name=name}) <#> "argument" type :: Parser Token Type type = simpletype "Bool" TBool <|> simpletype "Int" TInt <|> simpletype "Void" TVoid <|> (parenthised (min1seplist TComma type) >>= \ts -> pure $ TTuple (length ts) ts) <|> TList <$> bracked type <#> "type" where simpletype s t = item (TName s) $> t literal :: Parser Token Literal literal = satisfy isLit >>= \(TLit lit) -> pure lit where isLit :: Token -> Bool isLit (TLit _) = True isLit _ = False parenthised :: (Parser Token a) -> Parser Token a parenthised p = item TParenOpen *> p <* item TParenClose bracked :: (Parser Token a) -> Parser Token a bracked p = item TBrackOpen *> p <* item TBrackClose min1seplist :: a (Parser a b) -> Parser a [b] | ==, name, toString a min1seplist sep val = val >>= \v1 -> item sep *> seplist sep val >>= \vs -> pure [v1:vs]