aboutsummaryrefslogtreecommitdiff
path: root/snug-clean
diff options
context:
space:
mode:
authorCamil Staps2023-01-23 22:36:16 +0100
committerCamil Staps2023-01-23 22:36:16 +0100
commit301a73c63b3fe5e8306e9e8d213269a720b7a089 (patch)
tree0593001bc36302e0c759d85e98fcd78dfe87fcef /snug-clean
parentAdd example of small functional language and run time system (diff)
Add Clean parser for snug
Diffstat (limited to 'snug-clean')
-rw-r--r--snug-clean/.gitignore5
-rw-r--r--snug-clean/nitrile-lock.json20
-rw-r--r--snug-clean/nitrile.yml22
-rw-r--r--snug-clean/src/Snug/Parse.dcl7
-rw-r--r--snug-clean/src/Snug/Parse.icl243
-rw-r--r--snug-clean/src/Snug/Syntax.dcl39
-rw-r--r--snug-clean/src/Snug/Syntax.icl1
-rw-r--r--snug-clean/src/snug.icl31
8 files changed, 368 insertions, 0 deletions
diff --git a/snug-clean/.gitignore b/snug-clean/.gitignore
new file mode 100644
index 0000000..fbde75a
--- /dev/null
+++ b/snug-clean/.gitignore
@@ -0,0 +1,5 @@
+*.abc
+*.o
+nitrile-packages/
+
+snug
diff --git a/snug-clean/nitrile-lock.json b/snug-clean/nitrile-lock.json
new file mode 100644
index 0000000..e1524c4
--- /dev/null
+++ b/snug-clean/nitrile-lock.json
@@ -0,0 +1,20 @@
+{"packages":{"linux-x64":[{"name":"argenv"
+ ,"version":"1.0.1"}
+ ,{"name":"base"
+ ,"version":"1.0.1"}
+ ,{"name":"base-clm"
+ ,"version":"1.4.2"}
+ ,{"name":"base-code-generator"
+ ,"version":"2.2.0"}
+ ,{"name":"base-compiler"
+ ,"version":"2.0.1"}
+ ,{"name":"base-rts"
+ ,"version":"2.1.1"}
+ ,{"name":"base-stdenv"
+ ,"version":"2.1.0"}
+ ,{"name":"clean-platform"
+ ,"version":"0.3.21"}
+ ,{"name":"lib-compiler"
+ ,"version":"3.0.2"}
+ ,{"name":"tcpip"
+ ,"version":"2.1.1"}]}}
diff --git a/snug-clean/nitrile.yml b/snug-clean/nitrile.yml
new file mode 100644
index 0000000..6f05a65
--- /dev/null
+++ b/snug-clean/nitrile.yml
@@ -0,0 +1,22 @@
+format_version: 0.4.5
+name: snug-clean
+type: Application
+version: 0.1.0
+maintainer: Camil Staps
+contact_email: clean@camilstaps.nl
+
+dependencies:
+ base: ^1.0
+ clean-platform: ^0.3
+
+src:
+ - src
+
+build:
+ compile:
+ script:
+ - clm:
+ main: snug
+ target: snug
+ #print_result: false
+ print_time: false
diff --git a/snug-clean/src/Snug/Parse.dcl b/snug-clean/src/Snug/Parse.dcl
new file mode 100644
index 0000000..edea809
--- /dev/null
+++ b/snug-clean/src/Snug/Parse.dcl
@@ -0,0 +1,7 @@
+definition module Snug.Parse
+
+from Data.Error import :: MaybeError
+
+from Snug.Syntax import :: Definition
+
+parseSnug :: ![Char] -> MaybeError String [Definition]
diff --git a/snug-clean/src/Snug/Parse.icl b/snug-clean/src/Snug/Parse.icl
new file mode 100644
index 0000000..63a9fda
--- /dev/null
+++ b/snug-clean/src/Snug/Parse.icl
@@ -0,0 +1,243 @@
+implementation module Snug.Parse
+
+import StdEnv
+
+import Control.Applicative
+import Control.Monad
+import Data.Either
+import Data.Error
+import Data.Functor
+import Data.Maybe
+import Data.Tuple
+import qualified Text
+from Text import class Text, instance Text String
+import Text.Parsers.Simple.Core
+
+import Snug.Syntax
+
+parseSnug :: ![Char] -> MaybeError String [Definition]
+parseSnug cs = case parse (many definition`) (lex cs) of
+ Left errors -> Error ('Text'.join "; " errors)
+ Right defs -> Ok defs
+
+definition` :: Parser Token Definition
+definition` = parenthesized def
+where
+ def
+ = liftM3 DataDef
+ (pToken (TIdent "data") *> typeIdent)
+ (fromMaybe [] <$> optional (simpleList typeVarIdent))
+ (list simpleConstructorDef constructorDef)
+ <|> liftM2 TypeDef
+ (pToken (TIdent "type") *> typeIdent)
+ simpleOrParenthesizedType
+ <|> liftM4 FunDef
+ (pToken (TIdent "fun") *> symbolIdent)
+ (simpleList (parenthesized typedArgument))
+ (pToken TColon *> type)
+ simpleOrParenthesizedExpression
+
+ simpleConstructorDef = liftM2 ConstructorDef constructorIdent (pure [])
+ constructorDef = liftM2 ConstructorDef constructorIdent (many simpleOrParenthesizedType)
+
+ typedArgument = liftM2 tuple symbolIdent (pToken TColon *> type)
+
+complexType :: Parser Token Type
+complexType = liftM2 (foldl TyApp) simpleType (some simpleOrParenthesizedType)
+
+simpleType :: Parser Token Type
+simpleType
+ = Type <$> typeIdent
+ <|> TyVar <$> typeVarIdent
+
+type :: Parser Token Type
+type = complexType <|> simpleType
+
+simpleOrParenthesizedType :: Parser Token Type
+simpleOrParenthesizedType = simpleType <|> parenthesized complexType
+
+complexExpression :: Parser Token Expression
+complexExpression
+ = liftM2 Case
+ (pToken (TIdent "case") *> simpleOrParenthesizedExpression)
+ (nonEmpty (simpleList (parenthesized caseAlternative)))
+ <|> liftM2 (foldl ExpApp)
+ simpleExpression
+ (some simpleOrParenthesizedExpression)
+where
+ caseAlternative = liftM2 CaseAlternative pattern (pToken TArrow *> expression)
+
+ complexPattern
+ = liftM2 ConstructorPattern constructorIdent (many simpleOrParenthesizedPattern)
+ simplePattern
+ = pToken TUnderscore $> Wildcard
+ <|> BasicValuePattern <$> basicValue
+ <|> IdentPattern <$> symbolIdent
+ <|> liftM2 ConstructorPattern constructorIdent (pure [])
+ simpleOrParenthesizedPattern = simplePattern <|> parenthesized complexPattern
+ pattern = complexPattern <|> simplePattern
+
+simpleExpression :: Parser Token Expression
+simpleExpression
+ = BasicValue <$> basicValue
+ <|> Symbol <$> symbolIdent
+ <|> Constructor <$> constructorIdent
+
+simpleOrParenthesizedExpression :: Parser Token Expression
+simpleOrParenthesizedExpression = simpleExpression <|> parenthesized expression
+
+expression :: Parser Token Expression
+expression = complexExpression <|> simpleExpression
+
+basicValue :: Parser Token BasicValue
+basicValue
+ = (\(TInt i) -> BVInt i) <$> pSatisfy (\t -> t=:(TInt _))
+ <|> (\(TChar c) -> BVChar c) <$> pSatisfy (\t -> t=:(TChar _))
+
+typeIdent :: Parser Token TypeIdent
+typeIdent = fromIdent <$> pSatisfy isUpperCaseIdent
+
+constructorIdent :: Parser Token ConstructorIdent
+constructorIdent = fromIdent <$> pSatisfy isUpperCaseIdent
+
+typeVarIdent :: Parser Token TypeVarIdent
+typeVarIdent = fromIdent <$> pSatisfy isLowerCaseIdent
+
+symbolIdent :: Parser Token SymbolIdent
+symbolIdent
+ = fromIdent <$> pSatisfy isLowerCaseIdent
+ <|> fromIdent <$> pSatisfy isFunnyIdent
+
+fromIdent (TIdent id) :== id
+
+isUpperCaseIdent :: !Token -> Bool
+isUpperCaseIdent (TIdent s) = size s > 0 && isUpper s.[0]
+isUpperCaseIdent _ = False
+
+isLowerCaseIdent :: !Token -> Bool
+isLowerCaseIdent (TIdent s) = size s > 0 && isLower s.[0]
+isLowerCaseIdent _ = False
+
+isFunnyIdent :: !Token -> Bool
+isFunnyIdent (TIdent s) = size s > 0 && isFunnySymbol s.[0]
+isFunnyIdent _ = False
+
+parenthesized :: !(Parser Token a) -> Parser Token a
+parenthesized p = pToken TParenOpen *> p <* pToken TParenClose
+
+list :: !(Parser Token a) !(Parser Token a) -> Parser Token [a]
+list simpleElem elem = simpleList (simpleElem <|> parenthesized elem)
+
+simpleList :: !(Parser Token a) -> Parser Token [a]
+simpleList simpleElem = parenthesized (many simpleElem)
+
+nonEmpty :: !(Parser Token [a]) -> Parser Token [a]
+nonEmpty p = p >>= \xs -> if (isEmpty xs) pFail (pure xs)
+
+:: Token
+ = TParenOpen //* (
+ | TParenClose //* )
+
+ | TColon //* :
+ | TUnderscore //* _
+ | TArrow //* ->
+
+ | TIdent !String
+ | TInt !Int
+ | TChar !Char
+
+ | TError !Int !Int !String
+
+instance == Token
+where
+ (==) TParenOpen y = y=:TParenOpen
+ (==) TParenClose y = y=:TParenClose
+ (==) TColon y = y=:TColon
+ (==) TUnderscore y = y=:TUnderscore
+ (==) TArrow y = y=:TArrow
+ (==) (TIdent x) (TIdent y) = x == y
+ (==) (TIdent _) _ = False
+ (==) (TInt x) (TInt y) = x == y
+ (==) (TInt _) _ = False
+ (==) (TChar x) (TChar y) = x == y
+ (==) (TChar _) _ = False
+ (==) (TError _ _ _) _ = False
+
+lex :: ![Char] -> [Token]
+lex cs = lex` 0 0 cs
+
+lex` :: !Int !Int ![Char] -> [Token]
+lex` _ _ []
+ = []
+/* This alternative is for characters that can never be part of identifiers: */
+lex` line col [c:cs]
+ | isJust mbToken
+ = [fromJust mbToken : lex` line (col+1) cs]
+where
+ mbToken = case c of
+ '(' -> ?Just TParenOpen
+ ')' -> ?Just TParenClose
+ _ -> ?None
+/* This alternative is for characters that can be part of identifiers consisting of symbols: */
+lex` line col [c:cs]
+ | isJust mbToken && (isEmpty cs || not (isFunnySymbol (hd cs)))
+ = [fromJust mbToken : lex` line (col+1) cs]
+where
+ mbToken = case c of
+ ':' -> ?Just TColon
+ _ -> ?None
+/* This alternative is for characters that can be part of regular identifiers: */
+lex` line col [c:cs]
+ | isJust mbToken && (isEmpty cs || not (isIdentChar (hd cs)))
+ = [fromJust mbToken : lex` line (col+1) cs]
+where
+ mbToken = case c of
+ '_' -> ?Just TUnderscore
+ _ -> ?None
+lex` line col ['-','>':cs]
+ | isEmpty cs || not (isFunnySymbol (hd cs))
+ = [TArrow : lex` line (col+2) cs]
+
+/* Whitespace */
+lex` line col ['\r','\n':cs]
+ = lex` (line+1) 0 cs
+lex` line col ['\n','\r':cs]
+ = lex` (line+1) 0 cs
+lex` line col ['\n':cs]
+ = lex` (line+1) 0 cs
+lex` line col ['\r':cs]
+ = lex` (line+1) 0 cs
+lex` line col [c:cs]
+ | isSpace c
+ = lex` line (col+1) cs
+
+/* Identifiers*/
+lex` line col cs=:[c:_]
+ | isAlpha c
+ # (name,cs) = span isIdentChar cs
+ = [TIdent (toString name) : lex` line (col + length name) cs]
+ | isFunnySymbol c
+ # (name,cs) = span isFunnySymbol cs
+ = [TIdent (toString name) : lex` line (col + length name) cs]
+
+/* Basic values */
+lex` line col cs=:[c:_]
+ | isDigit c
+ # (i,cs) = span isDigit cs
+ = [TInt (toInt (toString i)) : lex` line (col + length i) cs]
+lex` line col [c:cs]
+ | c == '-' && not (isEmpty cs) && isDigit (hd cs)
+ # (i,cs) = span isDigit cs
+ = [TInt (0 - toInt (toString i)) : lex` line (col + 1 + length i) cs]
+lex` line col ['\'',c,'\'':cs] // TODO: escape sequences
+ = [TChar c : lex` line (col+3) cs]
+
+/* Unparseable input */
+lex` line col cs
+ = [TError line col ("failed to lex '" +++ toString (takeWhile (not o isControl) (take 10 cs)) +++ "...'")]
+
+isFunnySymbol :: !Char -> Bool
+isFunnySymbol c = isMember c ['!#$%&*+-/:;<=>?@\\^|~']
+
+isIdentChar :: !Char -> Bool
+isIdentChar c = isAlphanum c || c == '_' || c == '\''
diff --git a/snug-clean/src/Snug/Syntax.dcl b/snug-clean/src/Snug/Syntax.dcl
new file mode 100644
index 0000000..d030df6
--- /dev/null
+++ b/snug-clean/src/Snug/Syntax.dcl
@@ -0,0 +1,39 @@
+definition module Snug.Syntax
+
+:: TypeIdent :== String
+:: TypeVarIdent :== String
+:: ConstructorIdent :== String
+:: SymbolIdent :== String
+
+:: Type
+ = Type !TypeIdent
+ | TyVar !TypeVarIdent
+ | TyApp !Type !Type
+
+:: ConstructorDef
+ = ConstructorDef !ConstructorIdent ![Type]
+
+:: BasicValue
+ = BVInt !Int
+ | BVChar !Char
+
+:: Pattern
+ = Wildcard
+ | BasicValuePattern !BasicValue
+ | IdentPattern !SymbolIdent
+ | ConstructorPattern !ConstructorIdent ![Pattern]
+
+:: CaseAlternative
+ = CaseAlternative !Pattern !Expression
+
+:: Expression
+ = BasicValue !BasicValue
+ | Symbol !SymbolIdent
+ | Constructor !ConstructorIdent
+ | Case !Expression ![CaseAlternative]
+ | ExpApp !Expression !Expression
+
+:: Definition
+ = DataDef !TypeIdent ![TypeVarIdent] ![ConstructorDef]
+ | TypeDef !TypeIdent !Type
+ | FunDef !SymbolIdent ![(SymbolIdent, Type)] !Type !Expression
diff --git a/snug-clean/src/Snug/Syntax.icl b/snug-clean/src/Snug/Syntax.icl
new file mode 100644
index 0000000..f886750
--- /dev/null
+++ b/snug-clean/src/Snug/Syntax.icl
@@ -0,0 +1 @@
+implementation module Snug.Syntax
diff --git a/snug-clean/src/snug.icl b/snug-clean/src/snug.icl
new file mode 100644
index 0000000..1cdfcf6
--- /dev/null
+++ b/snug-clean/src/snug.icl
@@ -0,0 +1,31 @@
+module snug
+
+import StdEnv
+import StdMaybe
+
+import System.CommandLine
+
+import Snug.Parse
+
+Start w
+ # ([prog:args],w) = getCommandLine w
+ | length args <> 1 = abort ("Usage: " +++ prog +++ " INPUT\n")
+ # input = hd args
+ # (mbInput,w) = readFile input w
+ input = fromJust mbInput
+ | isNone mbInput = abort "Failed to read input\n"
+ | otherwise = parseSnug input
+
+readFile :: !String !*World -> (!?[Char], !*World)
+readFile path w
+ # (ok,f,w) = fopen path FReadData w
+ | not ok = (?None, w)
+ # (contents,f) = read [] f
+ # (_,w) = fclose f w
+ = (?Just contents, w)
+where
+ read :: ![Char] !*File -> (![Char], !*File)
+ read acc f
+ # (ok,c,f) = freadc f
+ | not ok = (reverse acc, f)
+ | otherwise = read [c:acc] f