diff options
author | Camil Staps | 2023-01-23 22:36:16 +0100 |
---|---|---|
committer | Camil Staps | 2023-01-23 22:36:16 +0100 |
commit | 301a73c63b3fe5e8306e9e8d213269a720b7a089 (patch) | |
tree | 0593001bc36302e0c759d85e98fcd78dfe87fcef /snug-clean | |
parent | Add example of small functional language and run time system (diff) |
Add Clean parser for snug
Diffstat (limited to 'snug-clean')
-rw-r--r-- | snug-clean/.gitignore | 5 | ||||
-rw-r--r-- | snug-clean/nitrile-lock.json | 20 | ||||
-rw-r--r-- | snug-clean/nitrile.yml | 22 | ||||
-rw-r--r-- | snug-clean/src/Snug/Parse.dcl | 7 | ||||
-rw-r--r-- | snug-clean/src/Snug/Parse.icl | 243 | ||||
-rw-r--r-- | snug-clean/src/Snug/Syntax.dcl | 39 | ||||
-rw-r--r-- | snug-clean/src/Snug/Syntax.icl | 1 | ||||
-rw-r--r-- | snug-clean/src/snug.icl | 31 |
8 files changed, 368 insertions, 0 deletions
diff --git a/snug-clean/.gitignore b/snug-clean/.gitignore new file mode 100644 index 0000000..fbde75a --- /dev/null +++ b/snug-clean/.gitignore @@ -0,0 +1,5 @@ +*.abc +*.o +nitrile-packages/ + +snug diff --git a/snug-clean/nitrile-lock.json b/snug-clean/nitrile-lock.json new file mode 100644 index 0000000..e1524c4 --- /dev/null +++ b/snug-clean/nitrile-lock.json @@ -0,0 +1,20 @@ +{"packages":{"linux-x64":[{"name":"argenv" + ,"version":"1.0.1"} + ,{"name":"base" + ,"version":"1.0.1"} + ,{"name":"base-clm" + ,"version":"1.4.2"} + ,{"name":"base-code-generator" + ,"version":"2.2.0"} + ,{"name":"base-compiler" + ,"version":"2.0.1"} + ,{"name":"base-rts" + ,"version":"2.1.1"} + ,{"name":"base-stdenv" + ,"version":"2.1.0"} + ,{"name":"clean-platform" + ,"version":"0.3.21"} + ,{"name":"lib-compiler" + ,"version":"3.0.2"} + ,{"name":"tcpip" + ,"version":"2.1.1"}]}} diff --git a/snug-clean/nitrile.yml b/snug-clean/nitrile.yml new file mode 100644 index 0000000..6f05a65 --- /dev/null +++ b/snug-clean/nitrile.yml @@ -0,0 +1,22 @@ +format_version: 0.4.5 +name: snug-clean +type: Application +version: 0.1.0 +maintainer: Camil Staps +contact_email: clean@camilstaps.nl + +dependencies: + base: ^1.0 + clean-platform: ^0.3 + +src: + - src + +build: + compile: + script: + - clm: + main: snug + target: snug + #print_result: false + print_time: false diff --git a/snug-clean/src/Snug/Parse.dcl b/snug-clean/src/Snug/Parse.dcl new file mode 100644 index 0000000..edea809 --- /dev/null +++ b/snug-clean/src/Snug/Parse.dcl @@ -0,0 +1,7 @@ +definition module Snug.Parse + +from Data.Error import :: MaybeError + +from Snug.Syntax import :: Definition + +parseSnug :: ![Char] -> MaybeError String [Definition] diff --git a/snug-clean/src/Snug/Parse.icl b/snug-clean/src/Snug/Parse.icl new file mode 100644 index 0000000..63a9fda --- /dev/null +++ b/snug-clean/src/Snug/Parse.icl @@ -0,0 +1,243 @@ +implementation module Snug.Parse + +import StdEnv + +import Control.Applicative +import Control.Monad +import Data.Either +import Data.Error +import Data.Functor +import Data.Maybe +import Data.Tuple +import qualified Text +from Text import class Text, instance Text String +import Text.Parsers.Simple.Core + +import Snug.Syntax + +parseSnug :: ![Char] -> MaybeError String [Definition] +parseSnug cs = case parse (many definition`) (lex cs) of + Left errors -> Error ('Text'.join "; " errors) + Right defs -> Ok defs + +definition` :: Parser Token Definition +definition` = parenthesized def +where + def + = liftM3 DataDef + (pToken (TIdent "data") *> typeIdent) + (fromMaybe [] <$> optional (simpleList typeVarIdent)) + (list simpleConstructorDef constructorDef) + <|> liftM2 TypeDef + (pToken (TIdent "type") *> typeIdent) + simpleOrParenthesizedType + <|> liftM4 FunDef + (pToken (TIdent "fun") *> symbolIdent) + (simpleList (parenthesized typedArgument)) + (pToken TColon *> type) + simpleOrParenthesizedExpression + + simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure []) + constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType) + + typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type) + +complexType :: Parser Token Type +complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType) + +simpleType :: Parser Token Type +simpleType + = Type <$> typeIdent + <|> TyVar <$> typeVarIdent + +type :: Parser Token Type +type = complexType <|> simpleType + +simpleOrParenthesizedType :: Parser Token Type +simpleOrParenthesizedType = simpleType <|> parenthesized complexType + +complexExpression :: Parser Token Expression +complexExpression + = liftM2 Case + (pToken (TIdent "case") *> simpleOrParenthesizedExpression) + (nonEmpty (simpleList (parenthesized caseAlternative))) + <|> liftM2 (foldl ExpApp) + simpleExpression + (some simpleOrParenthesizedExpression) +where + caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression) + + complexPattern + = liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern) + simplePattern + = pToken TUnderscore $> Wildcard + <|> BasicValuePattern <$> basicValue + <|> IdentPattern <$> symbolIdent + <|> liftM2 ConstructorPattern constructorIdent (pure []) + simpleOrParenthesizedPattern = simplePattern <|> parenthesized complexPattern + pattern = complexPattern <|> simplePattern + +simpleExpression :: Parser Token Expression +simpleExpression + = BasicValue <$> basicValue + <|> Symbol <$> symbolIdent + <|> Constructor <$> constructorIdent + +simpleOrParenthesizedExpression :: Parser Token Expression +simpleOrParenthesizedExpression = simpleExpression <|> parenthesized expression + +expression :: Parser Token Expression +expression = complexExpression <|> simpleExpression + +basicValue :: Parser Token BasicValue +basicValue + = (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _)) + <|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _)) + +typeIdent :: Parser Token TypeIdent +typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent + +constructorIdent :: Parser Token ConstructorIdent +constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent + +typeVarIdent :: Parser Token TypeVarIdent +typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent + +symbolIdent :: Parser Token SymbolIdent +symbolIdent + = fromIdent <$> pSatisfy isLowerCaseIdent + <|> fromIdent <$> pSatisfy isFunnyIdent + +fromIdent (TIdent id) :== id + +isUpperCaseIdent :: !Token -> Bool +isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0] +isUpperCaseIdent _ = False + +isLowerCaseIdent :: !Token -> Bool +isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0] +isLowerCaseIdent _ = False + +isFunnyIdent :: !Token -> Bool +isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0] +isFunnyIdent _ = False + +parenthesized :: !(Parser Token a) -> Parser Token a +parenthesized p = pToken TParenOpen *> p <* pToken TParenClose + +list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a] +list simpleElem elem = simpleList (simpleElem <|> parenthesized elem) + +simpleList :: !(Parser Token a) -> Parser Token [a] +simpleList simpleElem = parenthesized (many simpleElem) + +nonEmpty :: !(Parser Token [a]) -> Parser Token [a] +nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs) + +:: Token + = TParenOpen //* ( + | TParenClose //* ) + + | TColon //* : + | TUnderscore //* _ + | TArrow //* -> + + | TIdent !String + | TInt !Int + | TChar !Char + + | TError !Int !Int !String + +instance == Token +where + (==) TParenOpen y = y=:TParenOpen + (==) TParenClose y = y=:TParenClose + (==) TColon y = y=:TColon + (==) TUnderscore y = y=:TUnderscore + (==) TArrow y = y=:TArrow + (==) (TIdent x) (TIdent y) = x == y + (==) (TIdent _) _ = False + (==) (TInt x) (TInt y) = x == y + (==) (TInt _) _ = False + (==) (TChar x) (TChar y) = x == y + (==) (TChar _) _ = False + (==) (TError _ _ _) _ = False + +lex :: ![Char] -> [Token] +lex cs = lex` 0 0 cs + +lex` :: !Int !Int ![Char] -> [Token] +lex` _ _ [] + = [] +/* This alternative is for characters that can never be part of identifiers: */ +lex` line col [c:cs] + | isJust mbToken + = [fromJust mbToken : lex` line (col+1) cs] +where + mbToken = case c of + '(' -> ?Just TParenOpen + ')' -> ?Just TParenClose + _ -> ?None +/* This alternative is for characters that can be part of identifiers consisting of symbols: */ +lex` line col [c:cs] + | isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs))) + = [fromJust mbToken : lex` line (col+1) cs] +where + mbToken = case c of + ':' -> ?Just TColon + _ -> ?None +/* This alternative is for characters that can be part of regular identifiers: */ +lex` line col [c:cs] + | isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs))) + = [fromJust mbToken : lex` line (col+1) cs] +where + mbToken = case c of + '_' -> ?Just TUnderscore + _ -> ?None +lex` line col ['-','>':cs] + | isEmpty cs || not (isFunnySymbol (hd cs)) + = [TArrow : lex` line (col+2) cs] + +/* Whitespace */ +lex` line col ['\r','\n':cs] + = lex` (line+1) 0 cs +lex` line col ['\n','\r':cs] + = lex` (line+1) 0 cs +lex` line col ['\n':cs] + = lex` (line+1) 0 cs +lex` line col ['\r':cs] + = lex` (line+1) 0 cs +lex` line col [c:cs] + | isSpace c + = lex` line (col+1) cs + +/* Identifiers*/ +lex` line col cs=:[c:_] + | isAlpha c + # (name,cs) = span isIdentChar cs + = [TIdent (toString name) : lex` line (col + length name) cs] + | isFunnySymbol c + # (name,cs) = span isFunnySymbol cs + = [TIdent (toString name) : lex` line (col + length name) cs] + +/* Basic values */ +lex` line col cs=:[c:_] + | isDigit c + # (i,cs) = span isDigit cs + = [TInt (toInt (toString i)) : lex` line (col + length i) cs] +lex` line col [c:cs] + | c == '-' && not (isEmpty cs) && isDigit (hd cs) + # (i,cs) = span isDigit cs + = [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs] +lex` line col ['\'',c,'\'':cs] // TODO: escape sequences + = [TChar c : lex` line (col+3) cs] + +/* Unparseable input */ +lex` line col cs + = [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")] + +isFunnySymbol :: !Char -> Bool +isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~'] + +isIdentChar :: !Char -> Bool +isIdentChar c = isAlphanum c || c == '_' || c == '\'' diff --git a/snug-clean/src/Snug/Syntax.dcl b/snug-clean/src/Snug/Syntax.dcl new file mode 100644 index 0000000..d030df6 --- /dev/null +++ b/snug-clean/src/Snug/Syntax.dcl @@ -0,0 +1,39 @@ +definition module Snug.Syntax + +:: TypeIdent :== String +:: TypeVarIdent :== String +:: ConstructorIdent :== String +:: SymbolIdent :== String + +:: Type + = Type !TypeIdent + | TyVar !TypeVarIdent + | TyApp !Type !Type + +:: ConstructorDef + = ConstructorDef !ConstructorIdent ![Type] + +:: BasicValue + = BVInt !Int + | BVChar !Char + +:: Pattern + = Wildcard + | BasicValuePattern !BasicValue + | IdentPattern !SymbolIdent + | ConstructorPattern !ConstructorIdent ![Pattern] + +:: CaseAlternative + = CaseAlternative !Pattern !Expression + +:: Expression + = BasicValue !BasicValue + | Symbol !SymbolIdent + | Constructor !ConstructorIdent + | Case !Expression ![CaseAlternative] + | ExpApp !Expression !Expression + +:: Definition + = DataDef !TypeIdent ![TypeVarIdent] ![ConstructorDef] + | TypeDef !TypeIdent !Type + | FunDef !SymbolIdent ![(SymbolIdent, Type)] !Type !Expression diff --git a/snug-clean/src/Snug/Syntax.icl b/snug-clean/src/Snug/Syntax.icl new file mode 100644 index 0000000..f886750 --- /dev/null +++ b/snug-clean/src/Snug/Syntax.icl @@ -0,0 +1 @@ +implementation module Snug.Syntax diff --git a/snug-clean/src/snug.icl b/snug-clean/src/snug.icl new file mode 100644 index 0000000..1cdfcf6 --- /dev/null +++ b/snug-clean/src/snug.icl @@ -0,0 +1,31 @@ +module snug + +import StdEnv +import StdMaybe + +import System.CommandLine + +import Snug.Parse + +Start w + # ([prog:args],w) = getCommandLine w + | length args <> 1 = abort ("Usage: " +++ prog +++ " INPUT\n") + # input = hd args + # (mbInput,w) = readFile input w + input = fromJust mbInput + | isNone mbInput = abort "Failed to read input\n" + | otherwise = parseSnug input + +readFile :: !String !*World -> (!?[Char], !*World) +readFile path w + # (ok,f,w) = fopen path FReadData w + | not ok = (?None, w) + # (contents,f) = read [] f + # (_,w) = fclose f w + = (?Just contents, w) +where + read :: ![Char] !*File -> (![Char], !*File) + read acc f + # (ok,c,f) = freadc f + | not ok = (reverse acc, f) + | otherwise = read [c:acc] f |