diff options
Diffstat (limited to 'frontend/scanner.icl')
-rw-r--r-- | frontend/scanner.icl | 1518 |
1 files changed, 1518 insertions, 0 deletions
diff --git a/frontend/scanner.icl b/frontend/scanner.icl new file mode 100644 index 0000000..89e34da --- /dev/null +++ b/frontend/scanner.icl @@ -0,0 +1,1518 @@ +implementation module scanner + +import StdEnv, compare_constructor, StdCompare, general + +from utilities import revCharListToString, isSpecialChar + +/* +Known bug: +functions names starting with '->' require a ';' after the type. Solutions: +1) Make '->' an ordinary token. This implies that we have to write 'a-> .b' instead + of 'a->.b'. +2) re-scan token in new context. Requires substantial changes. +3) Determine offsides before token is generated. Tricky since we do not know the + actual context of the new token or/and have to take care of generating the right + amount of offsides. +*/ +:: SearchPaths :== [String] + +:: * ScanState = + { ss_input :: ScanInput + , ss_offsides :: ! [(Int, Bool) ] // (column, defines newDefinition) + , ss_useLayout :: ! Bool + , ss_tokenBuffer :: ! Buffer LongToken + } + +:: * ScanInput + = Input Input + | PushedToken LongToken ScanInput + +:: * Input = + { inp_stream :: ! InputStream + , inp_filename :: String + , inp_pos :: ! FilePosition + , inp_tabsize :: ! Int + , inp_charBuffer :: ! Buffer (Char,FilePosition) + //, inp_curToken :: ! [ Char ] + } + +:: * InputStream + = InFile ! * File + | InLOC ! [Char] + | OldChar ! Char ! FilePosition ! InputStream + | OldChars ! [Char] ! InputStream + | OldToken ! LongToken ! InputStream + +:: FilePosition = + { fp_line :: ! Int + , fp_col :: ! Int + } + +:: LongToken = + { lt_position :: ! FilePosition // Start position of this token + , lt_token :: ! Token // The token itself +// , lt_chars :: ! [Char] // The chars in this token +// , lt_context :: ! Context // The context of the scanning of this token + } + +:: Buffer x + = Buffer0 + | Buffer1 x + | Buffer2 x x + | Buffer3 x x x // buffer size is 3. + +:: Token + = IdentToken ! .String // an identifier + | IntToken !.String // an integer + | RealToken !.String // a real + | StringToken !.String // a string + | CharToken !.String // a character + | CharListToken !.String // a character list '{char}*' + | BoolToken !Bool // a boolean + | OpenToken // ( + | CloseToken // ) + | CurlyOpenToken // { + | CurlyCloseToken // } + | SquareOpenToken // [ + | SquareCloseToken // ] + + | DotToken // . + | SemicolonToken // ; + | ColonToken // : + | DoubleColonToken // :: + | CommaToken // , + | ExclamationToken // ! + | BarToken // | + | ArrowToken // -> + | DoubleArrowToken // => + | EqualToken // = + | DefinesColonToken // =: + | ColonDefinesToken // :== + | WildCardToken // _ + | BackSlashToken // \ + | DoubleBackSlashToken // \\ + | LeftArrowToken // <- + | LeftArrowColonToken // <-: + | DotDotToken // .. + | AndToken // & + | HashToken // # + | AsteriskToken // * + | LessThanOrEqualToken // <= + + | ModuleToken // module + | ImpModuleToken // implementation + | DefModuleToken // definition + | SysModuleToken // system + + | ImportToken // import + | FromToken // from + | SpecialToken // special + + | IntTypeToken // Int + | CharTypeToken // Char + | RealTypeToken // Real + | BoolTypeToken // Bool + | StringTypeToken // String + | FileTypeToken // File + | WorldTypeToken // World + | VoidTypeToken // Void + | LeftAssocToken // left + | RightAssocToken // right + | ClassToken // class + | InstanceToken // instance + | OtherwiseToken // otherwise + + | IfToken // if + | WhereToken // where + | WithToken // with + | CaseToken // case + | OfToken // of + | LetToken Bool // let!, let + | SeqLetToken Bool // #!, # + | InToken // in + + | DynamicToken // dynamic + | DynamicTypeToken // Dynamic + + | PriorityToken Priority // infixX N + + | CodeToken // code + | InlineToken // inline + | CodeBlockToken [String] // {...} + + | NewDefinitionToken // generated automatically, OffsideToken. + | EndGroupToken // generated automatically + | EndOfFileToken // end of file + | ErrorToken String // an error has occured + +:: Context + = GeneralContext + | TypeContext + | FunctionContext + | CodeContext + +instance == Context +where + (==) co1 co2 = equal_constructor co1 co2 + +:: Assoc + = LeftAssoc + | RightAssoc + | NoAssoc + +:: Priority + = Prio Assoc Int + | NoPrio + +// +// Macros for error messages +// +ScanErrIllegal :== "illegal char in input" +ScanErrCharErr :== "wrong character denotation" +ScanErrNLString :== "new line in string denotation" +ScanErrWild :== "ident should not start with _" + +class getFilename state :: !*state -> (!String,!*state) + +instance getFilename ScanInput +where + getFilename (Input input) + # (filename,input) = input!inp_filename + = (filename,Input input) + getFilename (PushedToken tok input) + # (filename,input) = getFilename input + = (filename,PushedToken tok input) + +instance getFilename ScanState +where + getFilename scanState=:{ss_input} + # (filename,ss_input) = getFilename ss_input + = (filename,{scanState & ss_input = ss_input }) + +class getPosition state :: !*state -> (!FilePosition,!*state) // Position of current Token (or Char) + +instance getPosition ScanState +where + getPosition scanState=:{ss_tokenBuffer} + | isEmptyBuffer ss_tokenBuffer + = getCharPosition scanState + # (ltok,_) = get ss_tokenBuffer + = (ltok.lt_position, scanState) + +instance getPosition Input +where + getPosition input=:{inp_pos} = (inp_pos, input) + +class getCharPosition state :: !*state -> (FilePosition,!*state) + +instance getCharPosition ScanState +where + getCharPosition scanState=:{ss_input=Input input} + # (pos,input) = getPosition input + = (pos,{ scanState & ss_input = Input input }) + getCharPosition scanState=:{ss_input=PushedToken longToken _} + = (longToken.lt_position,scanState) + +instance getCharPosition Input +where getCharPosition input=:{inp_pos} = (inp_pos, input) + +class nextToken state :: !Context !*state -> (!Token, !*state) + +instance nextToken ScanState +where + nextToken newContext scanState=:{ss_input=input=:PushedToken token=:{lt_position,lt_token/*,lt_context*/} rest,ss_tokenBuffer} +// | lt_context == newContext || ~ (contextDependent lt_token) || isGeneratedToken lt_token +// | True + = ( lt_token + , { scanState + & ss_input = rest + , ss_tokenBuffer = store token ss_tokenBuffer + } + ) //-->> ("nextToken: pushed token", lt_token) +/* = nextToken newContext { scanState & ss_input = pushTokensBack input} + where + pushTokensBack input=:(Input _) = input + pushTokensBack (PushedToken token input) + # (Input input=:{inp_stream}) = pushTokensBack input + = Input + { input + & inp_stream = OldToken token inp_stream + } //-->> ("pushTokensBack",token) +*/ nextToken context scanState=:{ss_input=Input inp,ss_tokenBuffer,ss_offsides,ss_useLayout} + # (error, c, inp) = SkipWhites inp + (pos, inp) = inp!inp_pos + scanState = {scanState & ss_input = Input inp } + = case error of + Yes string -> ( ErrorToken string + , { scanState + & ss_tokenBuffer = store + { lt_position = pos + , lt_token = ErrorToken string + // , lt_chars = [] + // , lt_context = context + } + scanState.ss_tokenBuffer + } + ) -->> ("Error token generated",string) + no -> determineToken c scanState + where + determineToken c scanState=:{ss_input=Input inp} + # (eof, inp) = EndOfInput inp + (pos, inp) = inp!inp_pos + | eof && c == NewLineChar + # newToken = EndOfFileToken + = checkOffside pos newToken + { scanState + & ss_tokenBuffer = store + { lt_position = pos + , lt_token = newToken + // , lt_chars = [] + // , lt_context = context + } + scanState.ss_tokenBuffer + , ss_input = Input inp + } // -->> (EndOfFileToken,pos) + // otherwise // ~ (eof && c == NewLineChar) + # (token, inp) = Scan c inp /* {inp & inp_curToken = [c]}*/ context + // # (chars, inp) = inp!inp_curToken + = checkOffside pos token + { scanState + & ss_input = Input inp + , ss_tokenBuffer = store + { lt_position = pos + , lt_token = token + // , lt_chars = reverse chars + // , lt_context = context + } + ss_tokenBuffer + } // -->> (token,pos) + nextToken _ _ = abort "Scanner: Error in nextToken" + +class tokenBack state :: !*state -> !*state + +instance tokenBack ScanState +where + tokenBack scanState=:{ss_tokenBuffer, ss_input} + | isEmptyBuffer ss_tokenBuffer = abort "tokenBack with empty token buffer" + # (tok, buf) = get ss_tokenBuffer + = { scanState + & ss_tokenBuffer = buf + , ss_input = PushedToken tok ss_input + + } // -->> ("tokenBack", tok, buf) + +class currentToken state :: !*state -> (!Token, !*state) + +instance currentToken ScanState +where currentToken scanState=:{ss_tokenBuffer} + | isEmptyBuffer ss_tokenBuffer + = (ErrorToken "dummy", scanState) + = ((head ss_tokenBuffer).lt_token, scanState) + +class insertToken state :: !Token !Context !*state -> *state + +instance insertToken ScanState +where + insertToken t c scanState +/* # chars = if (isGeneratedToken t) + [] + (fromString (toString t)) +*/ # (pos, scanState=:{ss_input}) = getPosition scanState + = { scanState + & ss_input = PushedToken + { lt_position = pos + , lt_token = t + // , lt_chars = chars + // , lt_context = c + } + ss_input + } +/* +isGeneratedToken :: !Token -> Bool +isGeneratedToken NewDefinitionToken = True +isGeneratedToken EndGroupToken = True +isGeneratedToken (CodeBlockToken _) = True +isGeneratedToken _ = False +*/ + +class replaceToken state :: !Token !*state -> *state + +instance replaceToken ScanState +where + replaceToken tok scanState=:{ss_tokenBuffer} + # (longToken,buffer) = get ss_tokenBuffer + = { scanState + & ss_tokenBuffer = store { longToken & lt_token = tok } buffer + } + +SkipWhites :: !Input -> (!Optional String, !Char, !Input) +SkipWhites input + # (eof, c, input) = ReadChar input + | eof = (No, NewLineChar, input) // -->> "EOF in SkipWhites" + | IsWhiteSpace c = SkipWhites input + = TryScanComment c input + +TryScanComment :: !Char !Input -> (!Optional String, !Char, !Input) +TryScanComment c1=:'/' input + # (eof,c2, input) = ReadChar input + | eof = (No, c1, input) + = case c2 of + '/' -> SkipWhites (SkipToEndOfLine input) + '*' -> case ScanComment input of + (No,input) -> SkipWhites input + (er,input) -> (er, c1, input) + _ -> (No, c1, charBack input) +TryScanComment c input + = (No, c, input) + +ScanComment :: !Input -> (!Optional String, !Input) +ScanComment input + # (eof1, c1, input) = ReadChar input + | eof1 = (Yes "end of file encountered inside comment", input) + | c1 == '/' + # (eof2, c2, input) = ReadChar input + | eof2 = (Yes "end of file encountered inside comment", input) + = case c2 of + '/' -> ScanComment (SkipToEndOfLine input) + '*' -> case ScanComment input of + (No, input) -> ScanComment input + error -> error + _ -> ScanComment input + | c1 == '*' + # (eof2, c2, input) = ReadChar input + | eof2 = (Yes "end of file encountered inside comment", input) + | c2 == '/' = (No, input) + = ScanComment input + | otherwise = ScanComment input + +SkipToEndOfLine :: !Input -> !Input +SkipToEndOfLine input + # (eof, c, input) = ReadChar input + | eof = input + | c==NewLineChar = input + = SkipToEndOfLine input +/* +SkipToChar :: ! Char ! Input -> Input +SkipToChar c input + # (eof, c1, input) = ReadChar input + | eof || c1 == c = input + = SkipToChar c input +*/ +Scan :: !Char !Input !Context -> (!Token, !Input) +Scan '(' input co = (OpenToken, input) +Scan ')' input co = (CloseToken, input) +Scan '{' input CodeContext = ScanCodeBlock input +Scan '{' input co = (CurlyOpenToken, input) +Scan '}' input co = (CurlyCloseToken, input) +Scan '[' input co = (SquareOpenToken, input) +Scan ']' input co = (SquareCloseToken, input) +Scan c0=:'|' input co + # (eof, c1, input) = ReadChar input + | eof = (BarToken, input) + | isSpecialChar c1 = ScanOperator 1 input [c1, c0] co + = (BarToken, charBack input) +Scan ',' input co = (CommaToken, input) +Scan ';' input co = (SemicolonToken, input) +Scan '#' input TypeContext = (HashToken, input) +Scan c0=:'#' input co + # (strict, input) = determineStrictness input + | strict + = (SeqLetToken strict, input) + # (eof,c1, input) = ReadChar input + | eof + = (SeqLetToken False, input) + | isSpecialChar c1 + = ScanOperator 1 input [c1, c0] co + // otherwise + = (SeqLetToken strict, charBack input) +Scan '*' input TypeContext = (AsteriskToken, input) +Scan c0=:'&' input co + # (eof, c1, input) = ReadChar input + | eof = (AndToken, input) + | isSpecialChar c1 = ScanOperator 1 input [c1, c0] co + = (AndToken, charBack input) +Scan c0=:'.' input co + = case co of + TypeContext + + -> (DotToken, input) + _ # (eof, c1, input) = ReadChar input + | eof -> (DotToken, input) + | c1 <> '.' -> (DotToken, charBack input) + # (eof, c2, input) = ReadChar input + | eof -> (DotDotToken, input) + | isSpecialChar c2 + -> ScanOperator 2 input [c2, c1, c0] co + -> (DotDotToken, charBack input) +Scan '!' input TypeContext = (ExclamationToken, input) +Scan '\\' input co + # (eof, c, input) = ReadChar input + | eof = (BackSlashToken, input) + | c == '\\' = (DoubleBackSlashToken, input) + = (BackSlashToken, charBack input) +Scan c0=:'_' input co + # (eof, c1, input) = ReadChar input + | eof = (WildCardToken, input) + | IsIdentChar c1 co = ScanIdent 1 input [c1, c0] co +// | isSpecialChar c1 = ScanOperator 1 input [c1, c0] co + = (WildCardToken, charBack input) +Scan c0=:'<' input TypeContext + # (eof, c1, input) = ReadChar input + | eof = (ErrorToken "< just before end of file in TypeContext", input) + | c1 == '=' = (LessThanOrEqualToken, input) + = ScanOperator 0 (charBack input) [c0] TypeContext +Scan c0=:'<' input co + # (eof, c1, input) = ReadChar input + | eof = (IdentToken "<", input) + | c1 <> '-' = ScanOperator 0 (charBack input) [c0] co + # (eof, c2, input) = ReadChar input + | eof = (LeftArrowToken, input) + | c2 == ':' + # (eof, c3, input) = ReadChar input + | eof = (LeftArrowColonToken, input) + | isSpecialChar c3 = ScanOperator 3 input [c3, c2, c1, c0] co + = (LeftArrowColonToken, charBack input) + | isSpecialChar c2 = ScanOperator 2 input [c2, c1, c0] co + = (LeftArrowToken, charBack input) +Scan c0=:'-' input co + # (eof, c1, input) = ReadChar input + | eof = (IdentToken "-", input) + # new = newExp input.inp_charBuffer + | IsDigit c1 && new = ScanNumeral 1 input [c1,c0] + | c1 <> '>' = ScanOperator 0 (charBack input) [c0] co + | co == TypeContext = (ArrowToken, input) // -> is a reserved symbol in a type context + // Can cause an error when token (like ->.) is read in wrong context + # (eof, c2, input) = ReadChar input + | eof = (ArrowToken, input) + | isSpecialChar c2 = ScanOperator 2 input [c2, c1, c0] co + = (ArrowToken, charBack input) +Scan c0=:'+' input co + # (eof, c1, input) = ReadChar input + | eof = (IdentToken "+", input) + # new = newExp input.inp_charBuffer + | IsDigit c1 && new = ScanNumeral 1 input [c1,c0] + = ScanOperator 0 (charBack input) [c0] co +Scan c0=:'=' input co + # (eof, c, input) = ReadChar input + | eof = (EqualToken, input) + | c == ':' = (DefinesColonToken, input) + | c == '>' = (DoubleArrowToken, input) + | isSpecialChar c = ScanOperator 1 input [c, c0] co + = (EqualToken, charBack input) +Scan c0=:':' input co + # (eof,c1, input) = ReadChar input + | eof = (ColonToken, input) + | c1 == ':' = (DoubleColonToken, input) + | c1 <> '=' + | isSpecialChar c1 = ScanOperator 1 input [c1, c0] co + = (ColonToken, charBack input) + # (eof, c2, input) = ReadChar input + | eof = ScanOperator 1 input [c1, c0] co + | c2 == '=' = (ColonDefinesToken, input) + = ScanOperator 1 (charBack input) [c1, c0] co +Scan c0=:'\'' input co = ScanChar input [c0] +Scan c0=:'\"' input co = ScanString 0 input [c0] +Scan c input co + | IsDigit c = ScanNumeral 0 input [c] + | IsIdentChar c co = ScanIdent 0 input [c] co + | isSpecialChar c = ScanOperator 0 input [c] co + = (ErrorToken ScanErrIllegal, input) + +newExp :: !(Buffer (Char,FilePosition)) -> Bool +newExp buffer + # (c, _) = case buffer of + Buffer3 _ _ cp -> cp + _ -> (' ',{fp_line=0,fp_col=0}) + = new_exp_char c +where + new_exp_char ',' = True + new_exp_char '[' = True + new_exp_char '(' = True + new_exp_char '/' = True // to handle end of comment symbol: */ + new_exp_char c = isSpace c + +ScanIdent :: !Int !Input ![Char] !Context -> (!Token, !Input) +ScanIdent n input token co + # (eof, c, input) = ReadChar input + | eof = CheckReserved co (revCharListToString n token) input + | IsIdentChar c co = ScanIdent (n + 1) input [c:token] co + = CheckReserved co (revCharListToString n token) (charBack input) + + +ScanOperator :: !Int !Input ![Char] !Context -> (!Token, !Input) +ScanOperator n input token co + # (eof, c, input) = ReadChar input + | eof = (IdentToken (revCharListToString n token), input) + | isSpecialChar c = ScanOperator (n + 1) input [c:token] co + = CheckReserved co (revCharListToString n token) (charBack input) + +CheckReserved :: !Context !String !Input -> (!Token, !Input) +CheckReserved GeneralContext s i = CheckGeneralContext s i +CheckReserved TypeContext s i = CheckTypeContext s i +CheckReserved FunctionContext s i = CheckFunctContext s i +CheckReserved CodeContext s i = CheckCodeContext s i + +CheckGeneralContext :: !String !Input -> (!Token, !Input) +CheckGeneralContext s input + = case s of + "module" -> (ModuleToken , input) + "definition" -> (DefModuleToken , input) + "implementation" -> (ImpModuleToken , input) + "system" -> (SysModuleToken , input) + "import" -> (ImportToken , input) + "from" -> (FromToken , input) + "in" -> (InToken , input) + s -> CheckEveryContext s input + +CheckEveryContext :: !String !Input -> (!Token, !Input) +CheckEveryContext s input + = case s of + "where" -> (WhereToken , input) + "with" -> (WithToken , input) + "class" -> (ClassToken , input) + "instance" -> (InstanceToken , input) + "otherwise" -> (OtherwiseToken , input) +// "!" -> (ExclamationToken , input) +// "::" -> (DoubleColonToken , input) + "*/" -> (ErrorToken "Unexpected end of comment, */", input) + "infixr" # (error, n, input) = GetPrio input + -> case error of + Yes err -> (ErrorToken err , input) //-->> ("Error token generated: "+err) + No -> (PriorityToken (Prio RightAssoc n) , input) + "infixl" # (error, n, input) = GetPrio input + -> case error of + Yes err -> (ErrorToken err , input) //-->> ("Error token generated: "+err) + No -> (PriorityToken (Prio LeftAssoc n) , input) + "infix" # (error, n, input) = GetPrio input + -> case error of + Yes err -> (ErrorToken err , input) //-->> ("Error token generated: "+err) + No -> (PriorityToken (Prio NoAssoc n) , input) + s -> (IdentToken s , input) + +CheckTypeContext :: !String !Input -> (!Token, !Input) +CheckTypeContext s input + = case s of + "Int" -> (IntTypeToken , input) + "Char" -> (CharTypeToken , input) + "Real" -> (RealTypeToken , input) + "Bool" -> (BoolTypeToken , input) + "String" -> (StringTypeToken , input) + "File" -> (FileTypeToken , input) + "World" -> (WorldTypeToken , input) + "Dynamic" -> (DynamicTypeToken , input) + "special" -> (SpecialToken , input) + "from" -> (FromToken , input) + s -> CheckEveryContext s input + +CheckFunctContext :: !String !Input -> (!Token, !Input) +CheckFunctContext s input + = case s of + "if" -> (IfToken , input) + "True" -> (BoolToken True , input) + "False" -> (BoolToken False , input) + "case" -> (CaseToken , input) + "of" -> (OfToken , input) + "system" -> (SysModuleToken , input) + "import" -> (ImportToken , input) + "from" -> (FromToken , input) + "let" # (strict, input) = determineStrictness input + -> (LetToken strict, input) +// "Let" # (strict, input) = determineStrictness input +// -> (SeqLetToken strict , input) + "in" -> (InToken , input) + "dynamic" -> (DynamicToken , input) + "code" -> (CodeToken , input) + s -> CheckEveryContext s input + +CheckCodeContext :: !String !Input -> (!Token, !Input) +CheckCodeContext s input + = case s of + "inline" -> (InlineToken , input) + s -> CheckEveryContext s input + +GetPrio :: !Input -> (!Optional String, !Int, !Input) +GetPrio input + # (error, c, input) = SkipWhites input + | IsDigit c + = (error, digitToInt c, input) + = (error, defaultPrio , charBack input) +where defaultPrio = 0 + +determineStrictness :: !Input -> (!Bool, !Input) +determineStrictness input + # (eof, c, input) = ReadChar input + | eof = (False, input) + | c == '!' = (True, input) + = (False, charBack input) + +ScanCodeBlock :: !Input -> (!Token, !Input) +ScanCodeBlock input + = scan_code_block [] input +where + scan_code_block :: ![String] !Input -> (!Token,!Input) + scan_code_block acc input + # (eof, c, input) = ReadChar input + | c == '}' + = (CodeBlockToken (reverse acc), input) + | isNewLine c + | eof + = (ErrorToken "eof in code block", input) + = scan_code_block acc input + | IsWhiteSpace c + = scan_code_block acc input + # (line, input) = ReadLine input + = scan_code_block [toString c+stripNewline line:acc] input + +stripNewline :: !String -> String +stripNewline string + # size = size string + = case size of + 0 -> string + 1 | isNewLine string.[0] + -> "" + -> string + _ | isNewLine string.[size-1] + | isNewLine string.[size-2] + -> string%(0,size-3) + -> string%(0,size-2) + -> string + + +ScanNumeral :: !Int !Input [Char] -> (!Token, !Input) +ScanNumeral n input chars=:['0':r] + | isEmpty r || r == ['+'] + # (eof, c, input) = ReadChar input + | eof = (IntToken (revCharListToString n chars), input) + | c == 'x' + # (eof, c1, input) = ReadChar input + | eof = (IntToken "0", charBack input) + | isHexDigit c1 = ScanHexNumeral (hexDigitToInt c1) input + = (IntToken "0", charBack (charBack input)) + | isOctDigit c = ScanOctNumeral (digitToInt c) input + | c == '.' = TestFraction n input chars + = (IntToken "0", charBack input) + | r == ['-'] + # (eof, c, input) = ReadChar input + | eof = (IntToken (revCharListToString n chars), input) + | c == 'x' + # (eof, c1, input) = ReadChar input + | eof = (IntToken "0", charBack input) + | isHexDigit c1 = ScanHexNumeral (~ (hexDigitToInt c1)) input + = (IntToken "0", charBack (charBack input)) + | isOctDigit c = ScanOctNumeral (~ (digitToInt c)) input + | c == '.' = TestFraction n input chars + = (IntToken "0", charBack input) +ScanNumeral n input chars + # (eof, c, input) = ReadChar input + | eof = (IntToken (revCharListToString n chars), input) + | IsDigit c = ScanNumeral (n + 1) input [c:chars] + | c == 'E' = ScanExponentSign (n + 1) input [c:chars] + | c == '.' = TestFraction n input chars + = (IntToken (revCharListToString n chars), charBack input) + +TestFraction :: !Int !Input ![Char] -> (!Token, !Input) +TestFraction n input chars + # (eof, c, input) = ReadChar input + | eof = (ErrorToken ("Incorrect Real at end of file: "+(revCharListToString (n+1) ['.':chars])), input) + | IsDigit c = ScanFraction (n + 2) input [c,'.':chars] + = (IntToken (revCharListToString n chars), charBack (charBack input)) + +ScanFraction :: !Int !Input ![Char] -> (!Token, !Input) +ScanFraction n input chars + # (eof, c, input) = ReadChar input + | eof = (RealToken (revCharListToString n chars), input) + | c == 'E' = case chars of + [c:_] | IsDigit c -> ScanExponentSign (n + 1) input [c:chars] + _ -> ScanExponentSign (n + 2) input [c,'0':chars] + | IsDigit c = ScanFraction (n + 1) input [c:chars] + = case chars of + [c:_] | IsDigit c -> (RealToken (revCharListToString n chars), charBack input) + _ -> (RealToken (revCharListToString (n+1) ['0':chars]), charBack input) + +ScanExponentSign :: !Int !Input ![Char] -> (!Token, !Input) +ScanExponentSign n input chars + # (eof, c, input) = ReadChar input + | eof = (RealToken (revCharListToString n chars), input) + | c == '+' = ScanExponent n input chars + | c == '-' || IsDigit c = ScanExponent (n+1) input [c:chars] + | otherwise = (ErrorToken ("Digit or sign expected after "+revCharListToString n chars), charBack input) + +ScanExponent :: !Int !Input ![Char] -> (!Token, !Input) +ScanExponent n input chars + # (eof, c, input) = ReadChar input + | eof = (RealToken (revCharListToString n chars), input) + | IsDigit c = ScanExponent (n + 1) input [c:chars] + = case chars of + [c:_] | IsDigit c -> (RealToken (revCharListToString n chars), charBack input) + _ -> (ErrorToken ("Digit expected after "+revCharListToString n chars), charBack input) + +ScanHexNumeral :: !Int !Input -> (!Token, !Input) +ScanHexNumeral n input + # (eof, c, input) = ReadChar input + | eof = (IntToken (toString n), input) + | isHexDigit c = ScanHexNumeral (n*16+hexDigitToInt c) input + = (IntToken (toString n), charBack input) + +ScanOctNumeral :: !Int !Input -> (!Token, !Input) +ScanOctNumeral n input + # (eof, c, input) = ReadChar input + | eof = (IntToken (toString n), input) + | isOctDigit c = ScanOctNumeral (n*8+digitToInt c) input + = (IntToken (toString n), charBack input) + +ScanChar :: !Input ![Char] -> (!Token, !Input) +ScanChar input chars + # (eof, c, input) = ReadChar input + | eof = (ErrorToken "End of file inside Char denotation", input) + | '\\' <> c = ScanEndOfChar 1 [c: chars] input + # (chars, n, input) = ScanBSChar 0 chars input + = ScanEndOfChar n chars input + +ScanBSChar :: !Int ![Char] !Input -> (![Char], !Int, !Input) +ScanBSChar n chars input + # (eof, c, input) = ReadChar input + | eof = (chars, n, input) + = case c of + 'n' -> (['n','\\':chars], n + 2, input) + 'r' -> (['r','\\':chars], n + 2, input) + 'f' -> (['f','\\':chars], n + 2, input) + 'b' -> (['b','\\':chars], n + 2, input) + 't' -> (['t','\\':chars], n + 2, input) + 'v' -> (['v','\\':chars], n + 2, input) + '\\' -> (['\\','\\':chars], n + 2, input) + '"' -> (['"' ,'\\':chars], n + 2, input) + '\'' -> (['\'','\\':chars], n + 2, input) + 'x' # (cc,input) = ScanNumChar Hex isHexDigit 2 0 input // max 2 characters + -> to_chars cc n input + 'd' # (cc,input) = ScanNumChar Dec isDigit 3 0 input // max 3 characters + -> to_chars cc n input + c | IsOct c + # (cc,input) = ScanNumChar Oct IsOct 2 (digitToInt c) input // max 3 characters, including current + -> to_chars cc n input + -> ([c:chars], n + 1, input) +where + ScanNumChar base valid 0 acc input + = (acc, input) + ScanNumChar base valid n acc input + # (eof, c, input) = ReadChar input + | eof = (acc, input) + | valid c = ScanNumChar base valid (n-1) (base*acc+digitToInt c) input + = (acc, charBack input) + Hex = 16 + Oct = 8 + Dec = 10 + + to_chars cc n input + = case toChar cc of + '\n' -> (['n','\\':chars], n + 2, input) + '\r' -> (['r','\\':chars], n + 2, input) + '\f' -> (['f','\\':chars], n + 2, input) + '\b' -> (['b','\\':chars], n + 2, input) + '\t' -> (['t','\\':chars], n + 2, input) + '\v' -> (['v','\\':chars], n + 2, input) + '\\' -> (['\\','\\':chars], n + 2, input) +// '"' -> (['"' ,'\\':chars], n + 2, input) + '\'' -> (['\'','\\':chars], n + 2, input) + c -> ([c:chars], n + 1, input) + +ScanEndOfChar :: !Int ![Char] !Input -> (!Token, !Input) +ScanEndOfChar n chars input + # (eof, c, input) = ReadChar input + | eof = (ErrorToken "End of file inside char denotation", input) + | '\'' == c = (CharToken (revCharListToString (n + 1) [c:chars]), input) + = ScanCharList (n+1) [c:chars] input +// = (ErrorToken ScanErrCharErr, input) + +ScanCharList :: !Int ![Char] !Input -> (!Token, !Input) +ScanCharList n chars input + # (eof, c, input) = ReadChar input + | eof = (ErrorToken "End of file inside char list denotation", input) + = case c of + '\'' # charList = revCharListToString n chars % (1,n) // without '\'' + -> (CharListToken charList, input) + '\\' # (chars, n, input) = ScanBSChar n chars input + -> ScanCharList n chars input + NewLineChar -> (ErrorToken "newline in char list", input) + _ -> ScanCharList (n+1) [c:chars] input + +ScanString :: !Int !Input ![Char] -> (!Token, !Input) +ScanString n input chars + # (eof, c, input) = ReadChar input + | eof = (ErrorToken "End of file inside String denotation", input) + = case c of + '\\' # (chars, n, input) = ScanBSChar n chars input + -> ScanString n input chars + '\"' -> (StringToken (revCharListToString (n + 1) [c:chars]), input) + NewLineChar -> (ErrorToken ScanErrNLString, input) + _ -> ScanString (n + 1) input [c:chars] + +/* + some predicates on tokens +*/ + +isLhsStartToken :: ! Token -> Bool +isLhsStartToken OpenToken = True +isLhsStartToken SquareOpenToken = True +isLhsStartToken CurlyOpenToken = True +isLhsStartToken (IdentToken id) = True +isLhsStartToken token = False + +isOffsideToken :: ! Token -> Bool +isOffsideToken NewDefinitionToken = True +isOffsideToken EndGroupToken = True +isOffsideToken EndOfFileToken = True +isOffsideToken token = False + +isEndGroupToken :: ! Token -> Bool +isEndGroupToken EndGroupToken = True +isEndGroupToken CurlyCloseToken = True +isEndGroupToken token = False +/* +contextDependent :: !Token -> Bool +contextDependent HashToken = True +//contextDependent (SeqLetToken _) = True // Do not do this XXXXXX +contextDependent _ = False +*/ +/* + character functions +*/ + +//IsWhiteSpace :: Char -> Bool +IsWhiteSpace c :== isSpace c + +//IsDigit :: Char -> Bool +IsDigit c :== isDigit c + +IsOct c :== '0' <= c && c <= '7' + +//IsHex c :== isDigit c || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f') +/* +isHexDigit :: !Char -> Bool // Defined in StdChar +isHexDigit c + | isDigit c + = True + | c < 'g' + = c >= 'a' + | c < 'G' + = c >= 'A' + = False +*/ +//IsIdentChar :: !Char !Context -> Bool +//IsIdentChar c co +// :== isAlphanum c || c == '_' || c == '`' || (c == '^' && co == TypeContext) + +hexDigitToInt :: !Char -> Int +hexDigitToInt 'a' = 10 +hexDigitToInt 'A' = 10 +hexDigitToInt 'b' = 11 +hexDigitToInt 'B' = 11 +hexDigitToInt 'c' = 12 +hexDigitToInt 'C' = 12 +hexDigitToInt 'd' = 13 +hexDigitToInt 'D' = 13 +hexDigitToInt 'e' = 14 +hexDigitToInt 'E' = 14 +hexDigitToInt 'f' = 15 +hexDigitToInt 'F' = 15 +hexDigitToInt c = digitToInt c + +IsIdentChar :: !Char !Context -> Bool +IsIdentChar c _ | isAlphanum c = True +IsIdentChar '_' _ = True +IsIdentChar '`' _ = True +IsIdentChar '^' TypeContext = True +IsIdentChar _ _ = False + +/* + Input functions +*/ + +EndOfInput :: !Input -> (!Bool, !Input) +EndOfInput input=:{inp_stream = InFile file} + # (endoffile, file) = fend file + = (endoffile, { input & inp_stream = InFile file }) +EndOfInput f=:{inp_stream = InLOC []} = (True, f) +EndOfInput input = (False, input) + +ReadChar :: !Input -> (!Bool, !Char, !Input) // Bool indicates end of file, we read always newlines in an empty file +ReadChar input=:{inp_stream = InFile file, inp_pos, inp_tabsize, inp_charBuffer /*, inp_curToken*/} + #! (s, file) = freadline file + eof = s == "" + | eof + # c = NewLineChar + pos = NextPos c inp_pos inp_tabsize + = ( eof + , c + , { input + & inp_stream = InFile file + , inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c:inp_curToken] + } + ) // -->> ("EOF in " + input.inp_filename + " found in ReadChar") + // otherwise // s <> "" + # chars = fromString s + = ReadChar { input & inp_stream = OldChars chars (InFile file) } +/* #! (eof, file) = fend file // old, too slow + | eof + # c = NewLineChar + pos = NextPos c inp_pos inp_tabsize + = ( eof + , c + , { input + & inp_stream = InFile file + , inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c:inp_curToken] + } + ) // -->> ("EOF in " + input.inp_filename + " found in ReadChar") + #! (ok, c, file) = freadc file + | ok + # pos = NextPos c inp_pos inp_tabsize + (c,input`) = correctNewline c pos inp_tabsize (InFile file) + = ( False + , c + , { input + & inp_stream = input` + , inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c:inp_curToken] + } + ) + = abort "ReadChar failure" */ +ReadChar input =: {inp_stream = OldChar c pos oldfile, inp_charBuffer /*, inp_curToken*/} + = ( False + , c + , { input + & inp_stream = oldfile + , inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c:inp_curToken] + } + ) +ReadChar input =: {inp_stream = OldChars [c:rest] stream, inp_pos, inp_tabsize, inp_charBuffer /*, inp_curToken*/} + # pos = NextPos c inp_pos inp_tabsize + (c,input`) = correctNewline c pos inp_tabsize (OldChars rest stream) + = ( False + , c + , { input + & inp_stream = input` + , inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c:inp_curToken] + } + ) +ReadChar input =: {inp_stream = OldChars [] stream, inp_pos} + = ReadChar {input & inp_stream = stream} +ReadChar input =: {inp_stream = InLOC [c : r], inp_pos, inp_tabsize /*, inp_curToken*/, inp_charBuffer} + # pos = NextPos c inp_pos inp_tabsize + = ( False + , c + , { input + & inp_stream = InLOC r + , inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c:inp_curToken] + } + ) +ReadChar input =: {inp_stream = InLOC [], inp_pos, inp_tabsize /*, inp_curToken*/, inp_charBuffer} + # c = NewLineChar + pos = NextPos c inp_pos inp_tabsize + = ( True + , c + , { input + & inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c:inp_curToken] + } + ) + //-->> ("EOF of InLOC found in ReadChar") +/* +ReadChar input =: {inp_stream = OldToken {lt_chars,lt_position} stream, inp_charBuffer} + # pos = lt_position + c = hd lt_chars + = ( False + , c + , { input + & inp_stream = OldChars (tl lt_chars) stream + , inp_pos = pos + , inp_charBuffer = store (c,pos) inp_charBuffer + // , inp_curToken = [c] + } + ) +*/ +ReadLine :: !Input -> (!String, !Input) +ReadLine input=:{inp_stream = OldChars cs oldfile, inp_pos} + # input = {input & inp_stream = oldfile, inp_pos = NextPos CRChar inp_pos 0} + | isEmpty cs = ReadLine input + | otherwise = (toString cs, input) +ReadLine input=:{inp_stream = InFile infile,inp_pos} + # (eof, file) = fend infile + | eof = ("", {input & inp_stream = InFile file}) + # (l, file ) = freadline file + = (l, {input & inp_stream = InFile file, inp_pos = NextPos CRChar inp_pos 0}) +ReadLine input =: {inp_stream = InLOC [a : c], inp_pos, inp_tabsize} + | a==NewLineChar= ("\n", {input & inp_stream = InLOC c, inp_pos = inp_pos}) + # (line, input) = ReadLine {input & inp_stream = InLOC c, inp_pos = inp_pos} + = (toString a + line, input) +ReadLine input=:{inp_stream = OldChar c p oldfile} + # input = {input & inp_stream = oldfile} + | c==NewLineChar= ("\n", input) + # (line, input) = ReadLine input + = (toString c + line, input) +ReadLine input = ("", input) + +NextPos :: !Char !FilePosition !Int -> FilePosition +NextPos c pos=:{fp_line, fp_col} t + = case c of + LFChar -> NextPos CRChar pos t // -->> "LF in Nextpos" + CRChar -> {fp_line = fp_line + 1, fp_col = 0} // -->> ("line " +toString (fp_line + 1)) + '\t' -> {pos & fp_col = t * (fp_col / t + 1)} + _ -> {pos & fp_col = fp_col + 1} + +correctNewline :: !Char !FilePosition !Int !InputStream -> (!Char, !InputStream) +correctNewline c pos tab_size (InFile file) // Correct newline convention: Mac: CR, Unix: LF, DOS CR LF + = case c of + LFChar -> (NewLineChar,InFile file) //-->> "UNIX newline" + CRChar + # (ok,c2,file) = freadc file + | ok + | c2 == LFChar -> (NewLineChar,InFile file) // -->> "DOS newline corrected" + -> (NewLineChar,OldChar c2 (NextPos c2 pos tab_size) (InFile file)) + -> (NewLineChar, InFile file) + _ -> (c, InFile file) +correctNewline c pos tab_size (OldChars [] input) + = correctNewline c pos tab_size input +correctNewline c pos tab_size (OldChars chars input) + = case c of + LFChar -> (NewLineChar,OldChars chars input) //-->> "UNIX newline" + CRChar + # [c2:rest] = chars + | c2 == LFChar -> (NewLineChar,OldChars rest input) // -->> "DOS newline corrected" + -> (NewLineChar,OldChars chars input) + _ -> (c,OldChars chars input) +correctNewline c _ _ input = (c, input) + +charBack :: !Input -> Input +charBack input=:{inp_stream,inp_charBuffer} + | isEmptyBuffer inp_charBuffer + = abort "charBack with empty character buffer" + # ((c,p),rest) = get inp_charBuffer + = { input + & inp_stream = OldChar c p inp_stream + , inp_charBuffer = rest + } + +qw s :== "\"" + s + "\"" + +instance <<< Token +where + (<<<) f t = f <<< (toString t) + +instance <<< LongToken +where + (<<<) f lt = f <<< lt.lt_token <<< " from " <<< lt.lt_position + +instance <<< FilePosition +where + (<<<) f {fp_line,fp_col} = f <<< fp_line <<< ";" <<< fp_col + +instance toString Token +where + toString (IdentToken id) = id // qw id + toString (IntToken id) = id + toString (RealToken id) = id + toString (StringToken id) = id + toString (CharToken id) = id + toString (CharListToken id) = "['"+id+"']" + toString (BoolToken id) = toString id + toString OpenToken = "(" + toString CloseToken = ")" + toString CurlyOpenToken = "{" + toString CurlyCloseToken = "}" + toString SquareOpenToken = "[" + toString SquareCloseToken = "]" + toString DotToken = "." + toString SemicolonToken = ";" + toString ColonToken = ": (ColonToken)" + toString DoubleColonToken = "::" + toString CommaToken = "," + toString ExclamationToken = "!" + toString BarToken = "|" + toString ArrowToken = "->" + toString DoubleArrowToken = "=>" + toString EqualToken = "=" + toString DefinesColonToken = "=:" + toString ColonDefinesToken = ":==" + toString WildCardToken = "_" + toString BackSlashToken = "\\" + toString DoubleBackSlashToken = "\\\\" + toString LeftArrowToken = "<-" + toString LeftArrowColonToken = "<-:" + toString DotDotToken = ".." + toString AndToken = "&" + toString HashToken = "#" + toString AsteriskToken = "*" + toString LessThanOrEqualToken = "<=" + toString ModuleToken = "module" + toString ImpModuleToken = "implementation" + toString DefModuleToken = "definition" + toString SysModuleToken = "system" + toString ImportToken = "import" + toString FromToken = "from" + toString SpecialToken = "special" + toString IntTypeToken = "Int" + toString CharTypeToken = "Char" + toString RealTypeToken = "Real" + toString BoolTypeToken = "Bool" + toString StringTypeToken = "String" + toString LeftAssocToken = "left" + toString RightAssocToken = "right" + toString ClassToken = "class" + toString InstanceToken = "instance" + toString OtherwiseToken = "otherwise" + toString IfToken = "if" + toString WhereToken = "where" + toString WithToken = "with" + toString CaseToken = "case" + toString OfToken = "of" + toString (LetToken strict) + | strict = "let!" + = "let" + toString (SeqLetToken strict) + | strict = "#!" + = "#" + toString InToken = "in" + + toString DynamicToken = "dynamic" + toString DynamicTypeToken = "Dynamic" + + toString (PriorityToken priority) = toString priority + toString NewDefinitionToken = "offside token (new def)" + toString EndGroupToken = "offside token (end group)" + toString EndOfFileToken = "end of file" + toString (ErrorToken id) = "Scanner error: " + id + toString CodeToken = "code" + toString InlineToken = "inline" + toString (CodeBlockToken the_code) = "<code block>" + toString token = "toString (Token) does not know this token" + +instance == Token +where + (==) token1 token2 + = equal_constructor token1 token2 && equal_args_of_tokens token1 token2 + where + equal_args_of_tokens :: !Token !Token -> Bool + equal_args_of_tokens (IdentToken id1) (IdentToken id2) = id1 == id2 + equal_args_of_tokens (RealToken real1) (RealToken real2) = real1 == real2 + equal_args_of_tokens (StringToken string1) (StringToken string2) = string1 == string2 + equal_args_of_tokens (CharToken char1) (CharToken char2) = char1 == char2 + equal_args_of_tokens (CharListToken chars1) (CharListToken chars2) = chars1 == chars2 + equal_args_of_tokens (BoolToken bool1) (BoolToken bool2) = bool1 == bool2 + equal_args_of_tokens (IntToken int1) (IntToken int2) = int1 == int2 + equal_args_of_tokens (LetToken l1) (LetToken l2) = l1 == l2 + equal_args_of_tokens (SeqLetToken l1) (SeqLetToken l2) = l1 == l2 + equal_args_of_tokens (ErrorToken id1) (ErrorToken id2) = id1 == id2 + equal_args_of_tokens _ _ = True + +instance < Priority +where + (<) (Prio assoc1 prio1) (Prio assoc2 prio2) + = prio1 < prio2 || prio1 == prio2 && assoc1 < assoc2 + (<) _ _ = abort "< of these Priorities (NoPrio) is undefined" + +instance < Assoc +where + (<) _ LeftAssoc = True + (<) LeftAssoc _ = False + (<) _ _ = True + +instance toString Priority +where + toString (Prio assoc prio) = toString assoc + toString prio + toString NoPrio = "infix" + +instance toString Assoc +where + toString LeftAssoc = "infixl " + toString RightAssoc = "infixr " + toString NoAssoc = "infix " + + +openScanner :: !String !SearchPaths !*Files -> (!Optional ScanState, !*Files) +openScanner file_name searchPaths files + = case fopenInSearchPaths file_name searchPaths FReadData files of + (No, files) + -> (No, files) + (Yes file, files) + -> (Yes { ss_input = Input + { inp_stream = InFile file + , inp_filename = file_name + , inp_pos = {fp_line = 1, fp_col = 0} + , inp_tabsize = 4 + , inp_charBuffer = Buffer0 + // , inp_curToken = [] + } + , ss_offsides = [(1,False)] // to generate offsides between global definitions + , ss_useLayout = False + , ss_tokenBuffer = Buffer0 + } + , files + ) + +fopenInSearchPaths :: !{#Char} [!{#Char}] !Int !*f -> (Optional *File,!*f) | FileSystem f +fopenInSearchPaths fileName [] mode f + = (No, f) +fopenInSearchPaths fileName [path : paths] mode f + # (opened, file, f) + = fopen (path + fileName) mode f + | opened + = (Yes file, f) + // otherwise + = fopenInSearchPaths fileName paths mode f + +closeScanner :: !ScanState !*Files -> *Files +closeScanner scanState=:{ss_input=PushedToken _ input} files + = closeScanner {scanState & ss_input = input} files +closeScanner {ss_input=Input {inp_stream}} files + = case get_file inp_stream of + Yes file # (_,files) = fclose file files + -> files + No -> files +where + get_file (InFile file) = Yes file + get_file (OldChar _ _ stream) = get_file stream + get_file (OldChars _ stream) = get_file stream + get_file (OldToken _ stream) = get_file stream + get_file (InLOC _ ) = No + +NewLineChar :== '\n' +LFChar :== '\xA' +CRChar :== '\xD' + +//isNewLine c :== c == LFChar || c == CRChar +isNewLine :: !Char -> Bool +isNewLine LFChar = True +isNewLine CRChar = True +isNewLine _ = False + + //------------------------// + //--- Offside handling ---// +//------------------------// + +UseLayout :: !ScanState -> (!Bool, !ScanState) +UseLayout scanState = scanState!ss_useLayout + +setUseLayout :: !Bool !ScanState -> ScanState +setUseLayout b ss = { ss & ss_useLayout = b } // -->> ("uselayout set to ",b) + +checkOffside :: !FilePosition !Token !ScanState -> (Token,ScanState) +checkOffside pos token scanState=:{ss_offsides,ss_useLayout,ss_input} + | ~ ss_useLayout + = (token, scanState) //-->> (token,pos,"No layout rule applied") + | isEmpty ss_offsides + = newOffside token scanState //-->> "Empty offside stack" + # (os_col, new_def) = hd ss_offsides + col = pos.fp_col + | col == os_col && canBeOffside token + # scanState = tokenBack scanState + newToken = NewDefinitionToken + = ( newToken + , { scanState + & ss_tokenBuffer + = store + { lt_position = pos + , lt_token = newToken + // , lt_chars = [] + // , lt_context = FunctionContext + } + scanState.ss_tokenBuffer + } + ) -->> (token,"NewDefinitionToken generated col==os && canBeOffside",pos,ss_offsides) + | col < os_col && token <> InToken + # (n,os_col,offsides) = scan_offsides 0 col os_col ss_offsides + scanState = { scanState & ss_offsides = offsides } -->> (n,"end groups",offsides,new_def) + scanState = snd (newOffside token scanState) + scanState = case new_def && col == os_col && canBeOffside token of + True + # scanState = tokenBack scanState + newToken = NewDefinitionToken + -> { scanState + & ss_tokenBuffer + = store + { lt_position = pos + , lt_token = newToken + // , lt_chars = [] + // , lt_context = FunctionContext + } + scanState.ss_tokenBuffer + } -->> ("new definition generated",token) + False + -> scanState + = gen_end_groups n scanState + with + newToken = EndGroupToken + scan_offsides n col os_col [] + = (n, os_col, []) + scan_offsides n col _ offsides=:[(os_col,b):r] + | col < os_col + = scan_offsides (inc n) col os_col r + = (n, os_col, offsides) + gen_end_groups n scanState + # scanState = tokenBack scanState // push current token back + scanState = { scanState + & ss_tokenBuffer + = store + { lt_position = pos + , lt_token = newToken + // , lt_chars = [] + // , lt_context = FunctionContext + } + scanState.ss_tokenBuffer + } // insert EndGroupToken + | n == 1 + // # (new_offsides, scanState) = scanState!ss_offsides // for tracing XXX + = (newToken, scanState) // -->> ("new offsides",new_offsides) + = gen_end_groups (dec n) scanState + | token == InToken + # scanState = tokenBack { scanState & ss_offsides = tl ss_offsides } + newToken = EndGroupToken + = ( newToken + , { scanState + & ss_tokenBuffer + = store + { lt_position = pos + , lt_token = newToken + // , lt_chars = [] + // , lt_context = FunctionContext + } + scanState.ss_tokenBuffer + } + ) -->> (token,"EndGroupToken generated: in",pos,ss_offsides) + // otherwise + = newOffside token scanState +where + newOffside token scanState=:{ss_offsides} + | definesOffside token + # ( _, scanState ) = nextToken FunctionContext scanState + ( os_pos, scanState ) = getPosition scanState // next token defines offside position + scanState = tokenBack scanState + os = os_pos.fp_col + | os == 1 + # scanState = tokenBack scanState + newToken = ErrorToken "groups should not start in column 1" + = ( newToken + , { scanState + & ss_tokenBuffer + = store + { lt_position = pos + , lt_token = newToken + // , lt_chars = ['groups should not start in column 1'] + // , lt_context = FunctionContext + } + scanState.ss_tokenBuffer + } + ) + // otherwise // os <> 1 + = ( token + , { scanState + & ss_offsides = [ (os, needsNewDefinitionToken token) : ss_offsides ] + } + ) -->> (token,pos,"New offside defined at ",os_pos,[ (os, token == CaseToken) : ss_offsides ]) + // otherwise // ~ (definesOffside token) + = (token, scanState) -->> (token,pos," not offside") + +definesOffside :: !Token -> Bool +definesOffside (LetToken _) = True +definesOffside (SeqLetToken _) = True +definesOffside WhereToken = True +definesOffside WithToken = True +definesOffside SpecialToken = True +definesOffside OfToken = True +//definesOffside BarToken = True // There are too many BarTokens in Clean +definesOffside _ = False + +needsNewDefinitionToken :: !Token -> Bool +needsNewDefinitionToken OfToken = True +//needsNewDefinitionToken WithToken = True +needsNewDefinitionToken SpecialToken = True +needsNewDefinitionToken _ = False +/* +repeatedOffside :: !Token -> Bool +repeatedOffside BarToken = True +repeatedOffside EqualToken = True +repeatedOffside (SeqLetToken _) = True +repeatedOffside _ = False +*/ +canBeOffside :: !Token -> Bool +canBeOffside EqualToken = False +canBeOffside ColonDefinesToken = False +canBeOffside DefinesColonToken = False +canBeOffside (SeqLetToken _) = False +canBeOffside WhereToken = False +canBeOffside SpecialToken = False +canBeOffside WithToken = False +canBeOffside BarToken = False +//canBeOffside CurlyOpenToken = False // not allowed for record patterns +canBeOffside (CodeBlockToken _) = False +canBeOffside _ = True + +dropOffsidePosition :: !ScanState -> ScanState +dropOffsidePosition scanState=:{ss_offsides} = { scanState & ss_offsides = drop 1 ss_offsides } + +/* +addOffsidePosition :: !ScanState -> (Int, ScanState) +addOffsidePosition scanState=:{ss_useLayout} + | ss_useLayout + # (position,scanState=:{ss_offsides}) = getPosition scanState + new_offside = position.fp_col + = (new_offside, { scanState & ss_offsides = [(new_offside,False): ss_offsides] }) + | otherwise + = (1, scanState) + +atOffsidePosition :: !ScanState -> (!Bool, !ScanState) +atOffsidePosition scanState=:{ss_offsides=[(col,_):_]} + # (position, scanState) = getPosition scanState + = (position.fp_col == col, scanState) -->> ("atOffsidePosition",position.fp_col,col) +atOffsidePosition scanState + = (False, scanState) +*/ + //-----------------------// + //--- Buffer handling ---// +//-----------------------// + +store :: !x !(Buffer x) -> Buffer x +store x Buffer0 = Buffer1 x +store x (Buffer1 y) = Buffer2 x y +store x (Buffer2 y z) = Buffer3 x y z +store x (Buffer3 y z _) = Buffer3 x y z + +isEmptyBuffer :: !(Buffer x) -> Bool +isEmptyBuffer Buffer0 = True +isEmptyBuffer _ = False + +get :: !(Buffer x) -> (x,Buffer x) +get Buffer0 = abort "get from empty buffer" +get (Buffer1 x) = (x, Buffer0) +get (Buffer2 x y) = (x, Buffer1 y) +get (Buffer3 x y z) = (x, Buffer2 y z) + +head :: !(Buffer x) -> x +head Buffer0 = abort "head of empty buffer" +head (Buffer1 x) = x +head (Buffer2 x _) = x +head (Buffer3 x _ _) = x + +instance <<< (Buffer a) | <<< a +where + (<<<) file Buffer0 = file <<< "Empty buffer" + (<<<) file (Buffer1 x) = file <<< "Buffer1 (" <<< x <<< ")" + (<<<) file (Buffer2 x y) = file <<< "Buffer2 (" <<< x <<< ") (" <<< y <<< ")" + (<<<) file (Buffer3 x y z) = file <<< "Buffer3 (" <<< x <<< ") (" <<< y <<< ") (" <<< z <<< ")" + + //---------------// + //--- Tracing ---// +//---------------// + +(-->>) val _ :== val +//(-->>) val message :== val ---> ("Scanner",message) |