From 6d6ae3ee7a280fe71513cb33b37015ebf10350c4 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Sun, 22 May 2022 11:49:38 +0200 Subject: [PATCH] Fixed and improved error reporting and made the parser ignore whitespace/tab (again) --- src/frontend/compiler.nim | 11 ++++--- src/frontend/lexer.nim | 21 ++++++++---- src/frontend/meta/errors.nim | 13 ++++---- src/frontend/parser.nim | 50 ++++++++++++++-------------- src/main.nim | 63 ++++++++++++++++++++++-------------- 5 files changed, 91 insertions(+), 67 deletions(-) diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 0e0be40..426657e 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -163,6 +163,9 @@ proc inferType(self: Compiler, node: Expression): Type ## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) +proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction +proc getFile*(self: COmpiler): string {.inline.} = self.file +proc getModule*(self: COmpiler): string {.inline.} = self.currentModule ## Utility functions @@ -185,10 +188,9 @@ proc done(self: Compiler): bool = result = self.current > self.ast.high() -proc error(self: Compiler, message: string) {.raises: [CompileError, ValueError].} = - ## Raises a formatted CompileError exception - var tok = self.getCurrentNode().token - raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', module '{self.currentModule}' line {tok.line} at '{tok.lexeme}' -> {message}") +proc error(self: Compiler, message: string) {.raises: [CompileError].} = + ## Raises a CompileError exception + raise newException(CompileError, message) proc step(self: Compiler): ASTNode = @@ -601,7 +603,6 @@ proc inferType(self: Compiler, node: Declaration): Type = var node = FunDecl(node) let resolved = self.resolve(node.name) if resolved != nil: - echo resolved[] return resolved.valueType of NodeKind.varDecl: var node = VarDecl(node) diff --git a/src/frontend/lexer.nim b/src/frontend/lexer.nim index aef37e5..fce99cd 100644 --- a/src/frontend/lexer.nim +++ b/src/frontend/lexer.nim @@ -19,6 +19,8 @@ import strutils import parseutils import strformat import tables +import terminal + import meta/token import meta/errors @@ -142,14 +144,19 @@ proc isAlphaNumeric(s: string): bool = return false return true +proc incLine(self: Lexer) + # Simple public getters used for error # formatting and whatnot proc getStart*(self: Lexer): int = self.start +proc getFile*(self: Lexer): string = self.file proc getCurrent*(self: Lexer): int = self.current proc getLine*(self: Lexer): int = self.line proc getSource*(self: Lexer): string = self.source -proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = (if line > - 1: self.lines[line - 2] else: (start: 0, stop: self.current)) +proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = + if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile: + self.incLine() + return self.lines[line - 1] proc newLexer*(self: Lexer = nil): Lexer = @@ -178,9 +185,9 @@ proc incLine(self: Lexer) = ## Increments the lexer's line ## and updates internal line ## metadata - self.lines.add((start: self.lastLine, stop: self.current)) - self.line += 1 + self.lines.add((self.lastLine, self.current)) self.lastLine = self.current + self.line += 1 proc step(self: Lexer, n: int = 1): string = @@ -196,7 +203,7 @@ proc step(self: Lexer, n: int = 1): string = inc(self.current) -proc peek(self: Lexer, distance: int = 0, length: int = 1): string = +proc peek*(self: Lexer, distance: int = 0, length: int = 1): string = ## Returns a stream of characters of ## at most length bytes from the source ## file, starting at the given distance, @@ -219,7 +226,7 @@ proc peek(self: Lexer, distance: int = 0, length: int = 1): string = proc error(self: Lexer, message: string) = ## Raises a lexing error with a formatted ## error message - raise newException(LexingError, &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}") + raise newException(LexingError, message) proc check(self: Lexer, s: string, distance: int = 0): bool = @@ -625,9 +632,11 @@ proc lex*(self: Lexer, source, file: string): seq[Token] = self.symbols = symbols self.source = source self.file = file + self.lines = @[] while not self.done(): self.next() self.start = self.current self.tokens.add(Token(kind: EndOfFile, lexeme: "", line: self.line, pos: (self.current, self.current))) + self.incLine() return self.tokens diff --git a/src/frontend/meta/errors.nim b/src/frontend/meta/errors.nim index 2dc9e78..3345574 100644 --- a/src/frontend/meta/errors.nim +++ b/src/frontend/meta/errors.nim @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. + type - ## Nim exceptions for internal JAPL failures - NimVMException* = object of CatchableError - LexingError* = object of NimVMException - ParseError* = object of NimVMException - CompileError* = object of NimVMException - SerializationError* = object of NimVMException + ## Nim exceptions for internal Peon failures + PeonException* = object of CatchableError + LexingError* = object of PeonException + ParseError* = object of PeonException + CompileError* = object of PeonException + SerializationError* = object of PeonException diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim index e164a9d..0606da5 100644 --- a/src/frontend/parser.nim +++ b/src/frontend/parser.nim @@ -150,6 +150,8 @@ proc getCurrent*(self: Parser): int {.inline.} = self.current proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >= self.tokens.high() or self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1]) +proc getCurrentFunction*(self: Parser): Declaration {.inline.} = self.currentFunction +proc getFile*(self: Parser): string {.inline.} = self.file # Handy templates to make our life easier, thanks nim! template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) @@ -167,10 +169,6 @@ proc peek(self: Parser, distance: int = 0): Token = result = endOfFile else: result = self.tokens[self.current + distance] - ## Hack to ignore whitespace/tab - if result.kind in {TokenType.Whitespace, Tab}: - # self.current += 1 - result = self.peek(distance + 1) proc done(self: Parser): bool {.inline.} = @@ -192,15 +190,9 @@ proc step(self: Parser, n: int = 1): Token = self.current += 1 -proc error(self: Parser, message: string) {.raises: [ParseError, ValueError].} = - ## Raises a formatted ParseError exception - var lexeme = self.peek().lexeme - var fn = "" - if self.currentFunction != nil: - if self.currentFunction.kind == NodeKind.funDecl: - fn = &"inside function '{FunDecl(self.currentFunction).name.token.lexeme}'" - var errorMessage = &"A fatal error occurred while parsing '{self.file}', {fn} line {self.peek().line} at '{lexeme}' -> {message}" - raise newException(ParseError, errorMessage) +proc error(self: Parser, message: string) {.raises: [ParseError].} = + ## Raises a ParseError exception + raise newException(ParseError, message) # Why do we allow strings or enum members of TokenType? Well, it's simple: @@ -882,32 +874,31 @@ proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, - isLambda: bool = false, isOperator: bool = false): Declaration = - ## Parses functions, coroutines, generators, anonymous functions and operators + isLambda: bool = false, isOperator: bool = false): Declaration = # Can't use just FunDecl because it can also return LambdaExpr! + ## Parses all types of functions, coroutines, generators and operators + ## (with or without a name, where applicable) let tok = self.peek(-1) var enclosingFunction = self.currentFunction var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[] var defaults: seq[Expression] = @[] var returnType: Expression - if not isLambda and self.check(Identifier): + if not isLambda and self.match(Identifier): # We do this extra check because we might # be called from a context where it's # ambiguous whether we're parsing a declaration # or an expression. Fortunately anonymous functions # are nameless, so we can sort the ambiguity by checking # if there's an identifier after the keyword - self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") - self.checkDecl(not self.check("*")) - self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()), + self.currentFunction = newFunDecl(newIdentExpr(self.peek(-1)), arguments, defaults, newBlockStmt(@[], Token()), isAsync = isAsync, - isGenerator = isGenerator, - isPrivate = true, + isGenerator = isGenerator, + isPrivate = true, token = tok, pragmas = (@[]), - returnType = nil) - FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1)) + returnType = nil) if self.match("*"): FunDecl(self.currentFunction).isPrivate = false + self.checkDecl(FunDecl(self.currentFunction).isPrivate) elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")): # We do a bit of hacking to pretend we never # wanted to parse this as a declaration in @@ -916,7 +907,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, # go all the way up to primary(), which will # call us back with isLambda=true, allowing us # to actually parse the function as an expression - while not self.check(tok.kind): + while not self.check(tok.kind): # We rewind back to the token that caused us to be called dec(self.current) result = Declaration(self.expressionStatement()) self.currentFunction = enclosingFunction @@ -1003,6 +994,8 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, self.error("cannot declare operator without arguments") elif FunDecl(result).returnType == nil: self.error("operators must have a return type") + elif isLambda: + self.error("cannot declare anonymous operator") for argument in arguments: if argument.valueType == nil: self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") @@ -1126,8 +1119,13 @@ proc declaration(self: Parser): Declaration = proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] = - ## Parses a series of tokens into an AST node - self.tokens = tokens + ## Parses a sequence of tokens into a sequence of AST nodes + self.tokens = @[] + # The parser is not designed to handle these tokens. + # Maybe create a separate syntax checker module? + for token in tokens: + if token.kind notin {TokenType.Whitespace, Tab}: + self.tokens.add(token) self.file = file self.current = 0 self.currentLoop = LoopContext.None diff --git a/src/main.nim b/src/main.nim index 9fe546a..7722b00 100644 --- a/src/main.nim +++ b/src/main.nim @@ -1,5 +1,4 @@ # Builtins & external libs -import sequtils import strformat import strutils import terminal @@ -25,11 +24,11 @@ proc fillSymbolTable(tokenizer: Lexer) proc getLineEditor: LineEditor # Handy dandy compile-time constants -const debugLexer = true -const debugParser = true -const debugCompiler = true -const debugSerializer = true -const debugRuntime = true +const debugLexer = false +const debugParser = false +const debugCompiler = false +const debugSerializer = false +const debugRuntime = false when debugSerializer: import nimSHA2 @@ -125,26 +124,42 @@ when isMainModule: # TODO: The code for error reporting completely # breaks down with multiline input, fix it except LexingError: - # let lineNo = tokenizer.getLine() - # let relPos = tokenizer.getRelPos(lineNo) - # let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() - stderr.styledWriteLine(fgRed, getCurrentExceptionMsg()) - # echo &"Source line: {line}" - # echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + let lineNo = tokenizer.getLine() + let relPos = tokenizer.getRelPos(lineNo) + let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() + stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{tokenizer.getFile()}'", fgRed, ", module ", + fgYellow, &"'{tokenizer.getFile()}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{tokenizer.peek()}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) except ParseError: - # let lineNo = parser.getCurrentToken().line - # let relPos = tokenizer.getRelPos(lineNo) - # let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() - stderr.styledWriteLine(fgRed, getCurrentExceptionMsg()) - # echo &"Source line: {line}" - # echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len()) + let lexeme = parser.getCurrentToken().lexeme + let lineNo = parser.getCurrentToken().line + let relPos = tokenizer.getRelPos(lineNo) + let fn = parser.getCurrentFunction() + let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() + var fnMsg = "" + if fn != nil and fn.kind == funDecl: + fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'" + stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{parser.getFile()}'", fgRed, ", module ", + fgYellow, &"'{parser.getFile()}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) except CompileError: - # let lineNo = compiler.getCurrentNode().token.line - # let relPos = tokenizer.getRelPos(lineNo) - # let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() - stderr.styledWriteLine(fgRed, getCurrentExceptionMsg()) - # echo &"Source line: {line}" - # echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len()) + let lexeme = compiler.getCurrentNode().token.lexeme + let lineNo = compiler.getCurrentNode().token.line + let relPos = tokenizer.getRelPos(lineNo) + let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() + var fn = compiler.getCurrentFunction() + var fnMsg = "" + if fn != nil and fn.kind == funDecl: + fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'" + stderr.styledWriteLine(fgRed, "A fatal error occurred while compiling ", fgYellow, &"'{compiler.getFile()}'", fgRed, ", module ", + fgYellow, &"'{compiler.getModule()}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) except SerializationError: stderr.styledWriteLine(fgRed, getCurrentExceptionMsg()) quit(0)