summaryrefslogtreecommitdiff
path: root/src/SPL/Parse.hs
diff options
context:
space:
mode:
authorCamil Staps2018-02-06 11:56:59 +0100
committerCamil Staps2018-02-06 11:56:59 +0100
commit5a0971b84e5af2a1be707fa7292ac7b19ba1790a (patch)
treecaea90560b690746d0fa02655f4abd149a13bed2 /src/SPL/Parse.hs
Initial commit
Diffstat (limited to 'src/SPL/Parse.hs')
-rw-r--r--src/SPL/Parse.hs138
1 files changed, 138 insertions, 0 deletions
diff --git a/src/SPL/Parse.hs b/src/SPL/Parse.hs
new file mode 100644
index 0000000..2616d8c
--- /dev/null
+++ b/src/SPL/Parse.hs
@@ -0,0 +1,138 @@
+-- vim: et ts=2 sw=2 ai:
+{-# LANGUAGE LambdaCase #-}
+module SPL.Parse
+where
+
+import Prelude hiding(lex)
+
+import Control.Applicative
+import Control.Monad
+import Data.Char
+import Data.List.Utils
+
+data Token
+ = TIdent String
+ | TInt Int
+ | TChar Char
+ | TBool Bool
+
+ | TParenOpen
+ | TParenClose
+ | TBrackOpen
+ | TBrackClose
+ | TBraceOpen
+ | TBraceClose
+ | TDoubleColon
+ | TEquals
+ | TSemicolon
+ | TDot
+
+ | TIf
+ | TWhile
+ | TReturn
+ | TVar
+ | TVoidType
+ | TCharType
+ | TBoolType
+ | TIntType
+
+ | TPlus
+ | TMinus
+ | TAsterisk
+ | TSlash
+ | TPercent
+ | TEqEq
+ | TLt
+ | TGt
+ | TLtEq
+ | TGtEq
+ | TExclamEq
+ | TAmpAmp
+ | TPipePipe
+ | TColon
+ | TExclam
+
+ | TSingleComment String
+ | TBlockComment String
+ deriving (Show)
+
+lex :: (Monad m, Alternative m) => String -> m [Token]
+lex [] = pure []
+lex s = (item s <|> ident s <|> int s <|> char s <|> bool s <|> comment s) >>=
+ \(t,s') -> lex s' >>= \ts -> pure (t:ts)
+ where
+ ident :: (Alternative m) => String -> m (Token, String)
+ ident (c:s)
+ | isAlpha c = pure (TIdent (c:cs), s')
+ | otherwise = empty
+ where (cs,s') = span isIdentChar s
+
+ int :: (Alternative m) => String -> m (Token, String)
+ int (c:s)
+ | isDigit c = pure (TInt $ read (c:cs), s')
+ | otherwise = empty
+ where (cs,s') = span isDigit s
+
+ char :: (Alternative m) => String -> m (Token, String)
+ char ('\'':c:'\'':s) = pure (TChar c, s)
+ char _ = empty
+
+ bool :: (Alternative m) => String -> m (Token, String)
+ bool ('F':'a':'l':'s':'e':s) = noIdentifier (TBool False) s
+ bool ('T':'r':'u':'e':s) = noIdentifier (TBool True) s
+ bool _ = empty
+
+ comment :: (Alternative m) => String -> m (Token, String)
+ comment ('/':'/':s) = pure (TSingleComment cs, s')
+ where
+ (cs, s') = span (/= '\n') s
+ comment ('/':'*':s) = pure (TBlockComment cs, s')
+ where
+ (cs, s') = spanList (\case
+ ('*':'/':s) -> True
+ _ -> False) s
+ comment _ = empty
+
+ item :: (Alternative m) => String -> m (Token, String)
+ item ('i':'f':s) = noIdentifier TIf s
+ item ('w':'h':'i':'l':'e':s) = noIdentifier TWhile s
+ item ('r':'e':'t':'u':'r':'n':s) = noIdentifier TReturn s
+ item ('v':'a':'r':s) = noIdentifier TVar s
+ item ('V':'o':'i':'d':s) = noIdentifier TVoidType s
+ item ('C':'h':'a':'r':s) = noIdentifier TCharType s
+ item ('B':'o':'o':'l':s) = noIdentifier TBoolType s
+ item ('I':'n':'t':s) = noIdentifier TIntType s
+ item ('=':'=':s) = pure (TEqEq, s)
+ item ('<':'=':s) = pure (TLtEq, s)
+ item ('>':'=':s) = pure (TGtEq, s)
+ item ('!':'=':s) = pure (TExclamEq, s)
+ item ('&':'&':s) = pure (TAmpAmp, s)
+ item ('|':'|':s) = pure (TPipePipe, s)
+ item ('(':s) = pure (TParenOpen, s)
+ item (')':s) = pure (TParenClose, s)
+ item ('[':s) = pure (TBrackOpen, s)
+ item (']':s) = pure (TBrackClose, s)
+ item ('{':s) = pure (TBraceOpen, s)
+ item ('}':s) = pure (TBraceClose, s)
+ item ('=':s) = pure (TEquals, s)
+ item (';':s) = pure (TSemicolon, s)
+ item ('.':s) = pure (TDot, s)
+ item ('+':s) = pure (TPlus, s)
+ item ('-':s) = pure (TMinus, s)
+ item ('*':s) = pure (TAsterisk, s)
+ item ('/':s) = pure (TSlash, s)
+ item ('%':s) = pure (TPercent, s)
+ item ('<':s) = pure (TLt, s)
+ item ('>':s) = pure (TGt, s)
+ item (':':s) = pure (TColon, s)
+ item ('!':s) = pure (TExclam, s)
+ item _ = empty
+
+ noIdentifier :: (Alternative m) => Token -> String -> m (Token, String)
+ noIdentifier t [] = pure (t, [])
+ noIdentifier t s@(c:_)
+ | isIdentChar c = empty
+ | otherwise = pure (t, s)
+
+ isIdentChar :: Char -> Bool
+ isIdentChar = liftM2 (||) isAlphaNum (== '_')