implementation module Snug.Parse

import StdEnv

import Control.Applicative
import Control.Monad
import Data.Either
import Data.Error
import Data.Functor
import Data.Maybe
import Data.Tuple
import qualified Text
from Text import class Text, instance Text String
import Text.Parsers.Simple.Core
import qualified Text.Parsers.Simple.Core

import Snug.Syntax

// override infix priority to use easily with <$>
(<<|>) infixr 3 :: !(Parser t a) (Parser t a) -> Parser t a
(<<|>) left right = left 'Text.Parsers.Simple.Core'. <<|> right

parseSnug :: ![Char] -> MaybeError String [Definition]
parseSnug cs = case parse (many definition`) (filterComments (lex cs)) of
	Left errors -> Error ('Text'.join "; " errors)
	Right defs -> Ok defs
where
	filterComments tks = [t \\ t <- tks | not (t=:(TComment _))]

definition` :: Parser Token Definition
definition` = parenthesized def
where
	def
		= liftM3 DataDef
			(pToken (TIdent "data") *> typeIdent)
			(fromMaybe [] <$> optional (simpleList typeVarIdent))
			(list simpleConstructorDef constructorDef)
		<<|> liftM2 TypeDef
			(pToken (TIdent "type") *> typeIdent)
			simpleOrParenthesizedType
		<<|> liftM4 FunDef
			(pToken (TIdent "fun") *> symbolIdent)
			(simpleList (parenthesized typedArgument))
			(pToken TColon *> type)
			simpleOrParenthesizedExpression

	simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure [])
	constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType)

	typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type)

complexType :: Parser Token Type
complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType)

simpleType :: Parser Token Type
simpleType
	= Type <$> typeIdent
	<<|> TyVar <$> typeVarIdent

type :: Parser Token Type
type = complexType <<|> simpleType

simpleOrParenthesizedType :: Parser Token Type
simpleOrParenthesizedType = simpleType <<|> parenthesized complexType

complexExpression :: Parser Token Expression
complexExpression
	= liftM2 Case
		(pToken (TIdent "case") *> simpleOrParenthesizedExpression)
		(nonEmpty (simpleList (parenthesized caseAlternative)))
	<<|> liftM2 (foldl ExpApp)
		simpleExpression
		(some simpleOrParenthesizedExpression)
where
	caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression)

	complexPattern
		= liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern)
	simplePattern
		= pToken TUnderscore $> Wildcard
		<<|> BasicValuePattern <$> basicValue
		<<|> IdentPattern <$> symbolIdent
		<<|> liftM2 ConstructorPattern constructorIdent (pure [])
	simpleOrParenthesizedPattern = simplePattern <<|> parenthesized complexPattern
	pattern = complexPattern <<|> simplePattern

simpleExpression :: Parser Token Expression
simpleExpression
	= BasicValue <$> basicValue
	<<|> Symbol <$> symbolIdent
	<<|> Constructor <$> constructorIdent

simpleOrParenthesizedExpression :: Parser Token Expression
simpleOrParenthesizedExpression = simpleExpression <<|> parenthesized expression

expression :: Parser Token Expression
expression = complexExpression <<|> simpleExpression

basicValue :: Parser Token BasicValue
basicValue
	= (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _))
	<<|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _))

typeIdent :: Parser Token TypeIdent
typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent

constructorIdent :: Parser Token ConstructorIdent
constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent

typeVarIdent :: Parser Token TypeVarIdent
typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent

symbolIdent :: Parser Token SymbolIdent
symbolIdent
	= fromIdent <$> pSatisfy isLowerCaseIdent
	<<|> fromIdent <$> pSatisfy isFunnyIdent

fromIdent (TIdent id) :== id

isUpperCaseIdent :: !Token -> Bool
isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0]
isUpperCaseIdent _ = False

isLowerCaseIdent :: !Token -> Bool
isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0]
isLowerCaseIdent _ = False

isFunnyIdent :: !Token -> Bool
isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0]
isFunnyIdent _ = False

parenthesized :: !(Parser Token a) -> Parser Token a
parenthesized p = pToken TParenOpen *> p <* pToken TParenClose

list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a]
list simpleElem elem = simpleList (simpleElem <<|> parenthesized elem)

simpleList :: !(Parser Token a) -> Parser Token [a]
simpleList simpleElem = parenthesized (many simpleElem)

nonEmpty :: !(Parser Token [a]) -> Parser Token [a]
nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs)

:: Token
	= TParenOpen     //* (
	| TParenClose    //* )

	| TColon         //* :
	| TUnderscore    //* _
	| TArrow         //* ->

	| TIdent !String
	| TInt !Int
	| TChar !Char

	| TComment !String //* (# ... #)
	| TError !Int !Int !String

instance == Token
where
	(==) TParenOpen y = y=:TParenOpen
	(==) TParenClose y = y=:TParenClose
	(==) TColon y = y=:TColon
	(==) TUnderscore y = y=:TUnderscore
	(==) TArrow y = y=:TArrow
	(==) (TIdent x) (TIdent y) = x == y
	(==) (TIdent _) _ = False
	(==) (TInt x) (TInt y) = x == y
	(==) (TInt _) _ = False
	(==) (TChar x) (TChar y) = x == y
	(==) (TChar _) _ = False
	(==) (TComment x) (TComment y) = x == y
	(==) (TComment _) _ = False
	(==) (TError _ _ _) _ = False

lex :: ![Char] -> [Token]
lex cs = lex` 0 0 cs

lex` :: !Int !Int ![Char] -> [Token]
lex` _ _ []
	= []
lex` line col ['(#':cs]
	= stripComment line (col+2) cs 0 []
where
	stripComment line col ['#)':cs] 0 acc
		= [TComment (toString (reverse acc)) : lex` line (col+2) cs]
	stripComment line col ['(#':cs] n acc
		= stripComment line (col+2) cs (n+1) ['#(':acc]
	stripComment line col ['\r\n':cs] n acc
		= stripComment (line+1) 0 cs n ['\n\r':acc]
	stripComment line col ['\n\r':cs] n acc
		= stripComment (line+1) 0 cs n ['\r\n':acc]
	stripComment line col [c:cs] n acc
		| c=='\n' || c=='\r'
			= stripComment (line+1) 0 cs n [c:acc]
			= stripComment line (col+1) cs n [c:acc]
	stripComment line col [] _ _
		= [TError line col "end of file while scanning comment"]
/* This alternative is for characters that can never be part of identifiers: */
lex` line col [c:cs]
	| isJust mbToken
		= [fromJust mbToken : lex` line (col+1) cs]
where
	mbToken = case c of
		'(' -> ?Just TParenOpen
		')' -> ?Just TParenClose
		_ -> ?None
/* This alternative is for characters that can be part of identifiers consisting of symbols: */
lex` line col [c:cs]
	| isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs)))
		= [fromJust mbToken : lex` line (col+1) cs]
where
	mbToken = case c of
		':' -> ?Just TColon
		_ -> ?None
/* This alternative is for characters that can be part of regular identifiers: */
lex` line col [c:cs]
	| isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs)))
		= [fromJust mbToken : lex` line (col+1) cs]
where
	mbToken = case c of
		'_' -> ?Just TUnderscore
		_ -> ?None
lex` line col ['-','>':cs]
	| isEmpty cs || not (isFunnySymbol (hd cs))
		= [TArrow : lex` line (col+2) cs]

/* Whitespace */
lex` line col ['\r','\n':cs]
	= lex` (line+1) 0 cs
lex` line col ['\n','\r':cs]
	= lex` (line+1) 0 cs
lex` line col ['\n':cs]
	= lex` (line+1) 0 cs
lex` line col ['\r':cs]
	= lex` (line+1) 0 cs
lex` line col [c:cs]
	| isSpace c
		= lex` line (col+1) cs

/* Identifiers*/
lex` line col cs=:[c:_]
	| isAlpha c
		# (name,cs) = span isIdentChar cs
		= [TIdent (toString name) : lex` line (col + length name) cs]
	| isFunnySymbol c
		# (name,cs) = span isFunnySymbol cs
		= [TIdent (toString name) : lex` line (col + length name) cs]

/* Basic values */
lex` line col cs=:[c:_]
	| isDigit c
		# (i,cs) = span isDigit cs
		= [TInt (toInt (toString i)) : lex` line (col + length i) cs]
lex` line col [c:cs]
	| c == '-' && not (isEmpty cs) && isDigit (hd cs)
		# (i,cs) = span isDigit cs
		= [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs]
lex` line col ['\'',c,'\'':cs] // TODO: escape sequences
	= [TChar c : lex` line (col+3) cs]

/* Unparseable input */
lex` line col cs
	= [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")]

isFunnySymbol :: !Char -> Bool
isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~']

isIdentChar :: !Char -> Bool
isIdentChar c = isAlphanum c || c == '_' || c == '\''