-- vim: et ts=2 sw=2 ai: {-# LANGUAGE LambdaCase #-} module SPL.Parse where import Prelude hiding(lex) import Control.Applicative import Control.Monad import Data.Char import Data.List.Utils data Token = TIdent String | TInt Int | TChar Char | TBool Bool | TParenOpen | TParenClose | TBrackOpen | TBrackClose | TBraceOpen | TBraceClose | TDoubleColon | TEquals | TSemicolon | TDot | TIf | TWhile | TReturn | TVar | TVoidType | TCharType | TBoolType | TIntType | TPlus | TMinus | TAsterisk | TSlash | TPercent | TEqEq | TLt | TGt | TLtEq | TGtEq | TExclamEq | TAmpAmp | TPipePipe | TColon | TExclam | TSingleComment String | TBlockComment String deriving (Show) lex :: (Monad m, Alternative m) => String -> m [Token] lex [] = pure [] lex s = (item s <|> ident s <|> int s <|> char s <|> bool s <|> comment s) >>= \(t,s') -> lex s' >>= \ts -> pure (t:ts) where ident :: (Alternative m) => String -> m (Token, String) ident (c:s) | isAlpha c = pure (TIdent (c:cs), s') | otherwise = empty where (cs,s') = span isIdentChar s int :: (Alternative m) => String -> m (Token, String) int (c:s) | isDigit c = pure (TInt $ read (c:cs), s') | otherwise = empty where (cs,s') = span isDigit s char :: (Alternative m) => String -> m (Token, String) char ('\'':c:'\'':s) = pure (TChar c, s) char _ = empty bool :: (Alternative m) => String -> m (Token, String) bool ('F':'a':'l':'s':'e':s) = noIdentifier (TBool False) s bool ('T':'r':'u':'e':s) = noIdentifier (TBool True) s bool _ = empty comment :: (Alternative m) => String -> m (Token, String) comment ('/':'/':s) = pure (TSingleComment cs, s') where (cs, s') = span (/= '\n') s comment ('/':'*':s) = pure (TBlockComment cs, s') where (cs, s') = spanList (\case ('*':'/':s) -> True _ -> False) s comment _ = empty item :: (Alternative m) => String -> m (Token, String) item ('i':'f':s) = noIdentifier TIf s item ('w':'h':'i':'l':'e':s) = noIdentifier TWhile s item ('r':'e':'t':'u':'r':'n':s) = noIdentifier TReturn s item ('v':'a':'r':s) = noIdentifier TVar s item ('V':'o':'i':'d':s) = noIdentifier TVoidType s item ('C':'h':'a':'r':s) = noIdentifier TCharType s item ('B':'o':'o':'l':s) = noIdentifier TBoolType s item ('I':'n':'t':s) = noIdentifier TIntType s item ('=':'=':s) = pure (TEqEq, s) item ('<':'=':s) = pure (TLtEq, s) item ('>':'=':s) = pure (TGtEq, s) item ('!':'=':s) = pure (TExclamEq, s) item ('&':'&':s) = pure (TAmpAmp, s) item ('|':'|':s) = pure (TPipePipe, s) item ('(':s) = pure (TParenOpen, s) item (')':s) = pure (TParenClose, s) item ('[':s) = pure (TBrackOpen, s) item (']':s) = pure (TBrackClose, s) item ('{':s) = pure (TBraceOpen, s) item ('}':s) = pure (TBraceClose, s) item ('=':s) = pure (TEquals, s) item (';':s) = pure (TSemicolon, s) item ('.':s) = pure (TDot, s) item ('+':s) = pure (TPlus, s) item ('-':s) = pure (TMinus, s) item ('*':s) = pure (TAsterisk, s) item ('/':s) = pure (TSlash, s) item ('%':s) = pure (TPercent, s) item ('<':s) = pure (TLt, s) item ('>':s) = pure (TGt, s) item (':':s) = pure (TColon, s) item ('!':s) = pure (TExclam, s) item _ = empty noIdentifier :: (Alternative m) => Token -> String -> m (Token, String) noIdentifier t [] = pure (t, []) noIdentifier t s@(c:_) | isIdentChar c = empty | otherwise = pure (t, s) isIdentChar :: Char -> Bool isIdentChar = liftM2 (||) isAlphaNum (== '_')