diff options
author | Camil Staps | 2018-02-06 11:56:59 +0100 |
---|---|---|
committer | Camil Staps | 2018-02-06 11:56:59 +0100 |
commit | 5a0971b84e5af2a1be707fa7292ac7b19ba1790a (patch) | |
tree | caea90560b690746d0fa02655f4abd149a13bed2 /src/SPL/Parse.hs |
Initial commit
Diffstat (limited to 'src/SPL/Parse.hs')
-rw-r--r-- | src/SPL/Parse.hs | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/src/SPL/Parse.hs b/src/SPL/Parse.hs new file mode 100644 index 0000000..2616d8c --- /dev/null +++ b/src/SPL/Parse.hs @@ -0,0 +1,138 @@ +-- vim: et ts=2 sw=2 ai: +{-# LANGUAGE LambdaCase #-} +module SPL.Parse +where + +import Prelude hiding(lex) + +import Control.Applicative +import Control.Monad +import Data.Char +import Data.List.Utils + +data Token + = TIdent String + | TInt Int + | TChar Char + | TBool Bool + + | TParenOpen + | TParenClose + | TBrackOpen + | TBrackClose + | TBraceOpen + | TBraceClose + | TDoubleColon + | TEquals + | TSemicolon + | TDot + + | TIf + | TWhile + | TReturn + | TVar + | TVoidType + | TCharType + | TBoolType + | TIntType + + | TPlus + | TMinus + | TAsterisk + | TSlash + | TPercent + | TEqEq + | TLt + | TGt + | TLtEq + | TGtEq + | TExclamEq + | TAmpAmp + | TPipePipe + | TColon + | TExclam + + | TSingleComment String + | TBlockComment String + deriving (Show) + +lex :: (Monad m, Alternative m) => String -> m [Token] +lex [] = pure [] +lex s = (item s <|> ident s <|> int s <|> char s <|> bool s <|> comment s) >>= + \(t,s') -> lex s' >>= \ts -> pure (t:ts) + where + ident :: (Alternative m) => String -> m (Token, String) + ident (c:s) + | isAlpha c = pure (TIdent (c:cs), s') + | otherwise = empty + where (cs,s') = span isIdentChar s + + int :: (Alternative m) => String -> m (Token, String) + int (c:s) + | isDigit c = pure (TInt $ read (c:cs), s') + | otherwise = empty + where (cs,s') = span isDigit s + + char :: (Alternative m) => String -> m (Token, String) + char ('\'':c:'\'':s) = pure (TChar c, s) + char _ = empty + + bool :: (Alternative m) => String -> m (Token, String) + bool ('F':'a':'l':'s':'e':s) = noIdentifier (TBool False) s + bool ('T':'r':'u':'e':s) = noIdentifier (TBool True) s + bool _ = empty + + comment :: (Alternative m) => String -> m (Token, String) + comment ('/':'/':s) = pure (TSingleComment cs, s') + where + (cs, s') = span (/= '\n') s + comment ('/':'*':s) = pure (TBlockComment cs, s') + where + (cs, s') = spanList (\case + ('*':'/':s) -> True + _ -> False) s + comment _ = empty + + item :: (Alternative m) => String -> m (Token, String) + item ('i':'f':s) = noIdentifier TIf s + item ('w':'h':'i':'l':'e':s) = noIdentifier TWhile s + item ('r':'e':'t':'u':'r':'n':s) = noIdentifier TReturn s + item ('v':'a':'r':s) = noIdentifier TVar s + item ('V':'o':'i':'d':s) = noIdentifier TVoidType s + item ('C':'h':'a':'r':s) = noIdentifier TCharType s + item ('B':'o':'o':'l':s) = noIdentifier TBoolType s + item ('I':'n':'t':s) = noIdentifier TIntType s + item ('=':'=':s) = pure (TEqEq, s) + item ('<':'=':s) = pure (TLtEq, s) + item ('>':'=':s) = pure (TGtEq, s) + item ('!':'=':s) = pure (TExclamEq, s) + item ('&':'&':s) = pure (TAmpAmp, s) + item ('|':'|':s) = pure (TPipePipe, s) + item ('(':s) = pure (TParenOpen, s) + item (')':s) = pure (TParenClose, s) + item ('[':s) = pure (TBrackOpen, s) + item (']':s) = pure (TBrackClose, s) + item ('{':s) = pure (TBraceOpen, s) + item ('}':s) = pure (TBraceClose, s) + item ('=':s) = pure (TEquals, s) + item (';':s) = pure (TSemicolon, s) + item ('.':s) = pure (TDot, s) + item ('+':s) = pure (TPlus, s) + item ('-':s) = pure (TMinus, s) + item ('*':s) = pure (TAsterisk, s) + item ('/':s) = pure (TSlash, s) + item ('%':s) = pure (TPercent, s) + item ('<':s) = pure (TLt, s) + item ('>':s) = pure (TGt, s) + item (':':s) = pure (TColon, s) + item ('!':s) = pure (TExclam, s) + item _ = empty + + noIdentifier :: (Alternative m) => Token -> String -> m (Token, String) + noIdentifier t [] = pure (t, []) + noIdentifier t s@(c:_) + | isIdentChar c = empty + | otherwise = pure (t, s) + + isIdentChar :: Char -> Bool + isIdentChar = liftM2 (||) isAlphaNum (== '_') |