blob: 4347c34fad053524ba3a2cab53b6f9e21c5f2510 (
plain) (
tree)
|
|
implementation module Snug.Parse
import StdEnv
import Control.Applicative
import Control.Monad
import Data.Either
import Data.Error
import Data.Functor
import Data.Maybe
import Data.Tuple
import qualified Text
from Text import class Text, instance Text String
import Text.Parsers.Simple.Core
import qualified Text.Parsers.Simple.Core
import Snug.Syntax
// override infix priority to use easily with <$>
(<<|>) infixr 3 :: !(Parser t a) (Parser t a) -> Parser t a
(<<|>) left right = left 'Text.Parsers.Simple.Core'. <<|> right
parseSnug :: ![Char] -> MaybeError String [Definition]
parseSnug cs = case parse (many definition`) (filterComments (lex cs)) of
Left errors -> Error ('Text'.join "; " errors)
Right defs -> Ok defs
where
filterComments tks = [t \\ t <- tks | not (t=:(TComment _))]
definition` :: Parser Token Definition
definition` = parenthesized def
where
def
= liftM3 DataDef
(pToken (TIdent "data") *> typeIdent)
(fromMaybe [] <$> optional (simpleList typeVarIdent))
(list simpleConstructorDef constructorDef)
<<|> liftM2 TypeDef
(pToken (TIdent "type") *> typeIdent)
simpleOrParenthesizedType
<<|> liftM4 FunDef
(pToken (TIdent "fun") *> symbolIdent)
(simpleList (parenthesized typedArgument))
(pToken TColon *> type)
(pToken TColon *> expression)
<<|> liftM4 TestDef
(pToken (TIdent "test") *> string)
(pToken TColon *> type)
(pToken TColon *> simpleOrParenthesizedExpression)
string
simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure [])
constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType)
typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type)
complexType :: Parser Token Type
complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType)
simpleType :: Parser Token Type
simpleType
= Type <$> typeIdent
<<|> TyVar <$> typeVarIdent
type :: Parser Token Type
type = complexType <<|> simpleType
simpleOrParenthesizedType :: Parser Token Type
simpleOrParenthesizedType = simpleType <<|> parenthesized complexType
complexExpression :: Parser Token Expression
complexExpression
= liftM2 Case
(pToken (TIdent "case") *> simpleOrParenthesizedExpression)
(nonEmpty (simpleList (parenthesized caseAlternative)))
<<|> liftM2 (foldl ExpApp)
simpleExpression
(some simpleOrParenthesizedExpression)
where
caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression)
complexPattern
= liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern)
simplePattern
= pToken TUnderscore $> Wildcard
<<|> BasicValuePattern <$> basicValue
<<|> IdentPattern <$> symbolIdent
<<|> liftM2 ConstructorPattern constructorIdent (pure [])
simpleOrParenthesizedPattern = simplePattern <<|> parenthesized complexPattern
pattern = complexPattern <<|> simplePattern
simpleExpression :: Parser Token Expression
simpleExpression
= BasicValue <$> basicValue
<<|> Symbol <$> symbolIdent
<<|> Constructor <$> constructorIdent
simpleOrParenthesizedExpression :: Parser Token Expression
simpleOrParenthesizedExpression = simpleExpression <<|> parenthesized expression
expression :: Parser Token Expression
expression = complexExpression <<|> simpleExpression
basicValue :: Parser Token BasicValue
basicValue
= (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _))
<<|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _))
string :: Parser Token String
string = (\(TString s) -> s) <$> pSatisfy (\t -> t=:(TString _))
typeIdent :: Parser Token TypeIdent
typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent
constructorIdent :: Parser Token ConstructorIdent
constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent
typeVarIdent :: Parser Token TypeVarIdent
typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent
symbolIdent :: Parser Token SymbolIdent
symbolIdent
= fromIdent <$> pSatisfy isLowerCaseIdent
<<|> fromIdent <$> pSatisfy isFunnyIdent
fromIdent (TIdent id) :== id
isUpperCaseIdent :: !Token -> Bool
isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0]
isUpperCaseIdent _ = False
isLowerCaseIdent :: !Token -> Bool
isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0]
isLowerCaseIdent _ = False
isFunnyIdent :: !Token -> Bool
isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0]
isFunnyIdent _ = False
parenthesized :: !(Parser Token a) -> Parser Token a
parenthesized p = pToken TParenOpen *> p <* pToken TParenClose
list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a]
list simpleElem elem = simpleList (simpleElem <<|> parenthesized elem)
simpleList :: !(Parser Token a) -> Parser Token [a]
simpleList simpleElem = parenthesized (many simpleElem)
nonEmpty :: !(Parser Token [a]) -> Parser Token [a]
nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs)
:: Token
= TParenOpen //* (
| TParenClose //* )
| TColon //* :
| TUnderscore //* _
| TArrow //* ->
| TIdent !String
| TInt !Int
| TChar !Char
| TString !String
| TComment !String //* (# ... #)
| TError !Int !Int !String
instance == Token
where
(==) TParenOpen y = y=:TParenOpen
(==) TParenClose y = y=:TParenClose
(==) TColon y = y=:TColon
(==) TUnderscore y = y=:TUnderscore
(==) TArrow y = y=:TArrow
(==) (TIdent x) (TIdent y) = x == y
(==) (TIdent _) _ = False
(==) (TInt x) (TInt y) = x == y
(==) (TInt _) _ = False
(==) (TChar x) (TChar y) = x == y
(==) (TChar _) _ = False
(==) (TString x) (TString y) = x == y
(==) (TString _) _ = False
(==) (TComment x) (TComment y) = x == y
(==) (TComment _) _ = False
(==) (TError _ _ _) _ = False
lex :: ![Char] -> [Token]
lex cs = lex` 0 0 cs
lex` :: !Int !Int ![Char] -> [Token]
lex` _ _ []
= []
lex` line col ['(#':cs]
= stripComment line (col+2) cs 0 []
where
stripComment line col ['#)':cs] 0 acc
= [TComment (toString (reverse acc)) : lex` line (col+2) cs]
stripComment line col ['(#':cs] n acc
= stripComment line (col+2) cs (n+1) ['#(':acc]
stripComment line col ['\r\n':cs] n acc
= stripComment (line+1) 0 cs n ['\n\r':acc]
stripComment line col ['\n\r':cs] n acc
= stripComment (line+1) 0 cs n ['\r\n':acc]
stripComment line col [c:cs] n acc
| c=='\n' || c=='\r'
= stripComment (line+1) 0 cs n [c:acc]
= stripComment line (col+1) cs n [c:acc]
stripComment line col [] _ _
= [TError line col "end of file while scanning comment"]
/* This alternative is for characters that can never be part of identifiers: */
lex` line col [c:cs]
| isJust mbToken
= [fromJust mbToken : lex` line (col+1) cs]
where
mbToken = case c of
'(' -> ?Just TParenOpen
')' -> ?Just TParenClose
_ -> ?None
/* This alternative is for characters that can be part of identifiers consisting of symbols: */
lex` line col [c:cs]
| isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs)))
= [fromJust mbToken : lex` line (col+1) cs]
where
mbToken = case c of
':' -> ?Just TColon
_ -> ?None
/* This alternative is for characters that can be part of regular identifiers: */
lex` line col [c:cs]
| isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs)))
= [fromJust mbToken : lex` line (col+1) cs]
where
mbToken = case c of
'_' -> ?Just TUnderscore
_ -> ?None
lex` line col ['-','>':cs]
| isEmpty cs || not (isFunnySymbol (hd cs))
= [TArrow : lex` line (col+2) cs]
/* Whitespace */
lex` line col ['\r','\n':cs]
= lex` (line+1) 0 cs
lex` line col ['\n','\r':cs]
= lex` (line+1) 0 cs
lex` line col ['\n':cs]
= lex` (line+1) 0 cs
lex` line col ['\r':cs]
= lex` (line+1) 0 cs
lex` line col [c:cs]
| isSpace c
= lex` line (col+1) cs
/* Basic values */
lex` line col cs=:[c:_]
| isDigit c
# (i,cs) = span isDigit cs
= [TInt (toInt (toString i)) : lex` line (col + length i) cs]
lex` line col [c:cs]
| c == '-' && not (isEmpty cs) && isDigit (hd cs)
# (i,cs) = span isDigit cs
= [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs]
lex` line col ['\'',c,'\'':cs] // TODO: escape sequences
= [TChar c : lex` line (col+3) cs]
lex` line col ['"':cs] // TODO: escape sequences; correctly compute new line/col in case of newlines
= [TString s : lex` line (col + 2 + size s) (tl rest)]
where
(chars,rest) = span ((<>) '"') cs
s = {c \\ c <- chars}
/* Identifiers (must come after basic values for correct lexing of negative
* integers) */
lex` line col cs=:[c:_]
| isAlpha c
# (name,cs) = span isIdentChar cs
= [TIdent (toString name) : lex` line (col + length name) cs]
| isFunnySymbol c
# (name,cs) = span isFunnySymbol cs
= [TIdent (toString name) : lex` line (col + length name) cs]
/* Unparseable input */
lex` line col cs
= [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")]
isFunnySymbol :: !Char -> Bool
isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~']
isIdentChar :: !Char -> Bool
isIdentChar c = isAlphanum c || c == '_' || c == '\''
|