Fixed and improved error reporting and made the parser ignore whitespace/tab (again)

This commit is contained in:
Mattia Giambirtone 2022-05-22 11:49:38 +02:00
parent 7cf69cf0cf
commit 6d6ae3ee7a
5 changed files with 91 additions and 67 deletions

View File

@ -163,6 +163,9 @@ proc inferType(self: Compiler, node: Expression): Type
## Public getter for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction
proc getFile*(self: COmpiler): string {.inline.} = self.file
proc getModule*(self: COmpiler): string {.inline.} = self.currentModule
## Utility functions
@ -185,10 +188,9 @@ proc done(self: Compiler): bool =
result = self.current > self.ast.high()
proc error(self: Compiler, message: string) {.raises: [CompileError, ValueError].} =
## Raises a formatted CompileError exception
var tok = self.getCurrentNode().token
raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', module '{self.currentModule}' line {tok.line} at '{tok.lexeme}' -> {message}")
proc error(self: Compiler, message: string) {.raises: [CompileError].} =
## Raises a CompileError exception
raise newException(CompileError, message)
proc step(self: Compiler): ASTNode =
@ -601,7 +603,6 @@ proc inferType(self: Compiler, node: Declaration): Type =
var node = FunDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
echo resolved[]
return resolved.valueType
of NodeKind.varDecl:
var node = VarDecl(node)

View File

@ -19,6 +19,8 @@ import strutils
import parseutils
import strformat
import tables
import terminal
import meta/token
import meta/errors
@ -142,14 +144,19 @@ proc isAlphaNumeric(s: string): bool =
return false
return true
proc incLine(self: Lexer)
# Simple public getters used for error
# formatting and whatnot
proc getStart*(self: Lexer): int = self.start
proc getFile*(self: Lexer): string = self.file
proc getCurrent*(self: Lexer): int = self.current
proc getLine*(self: Lexer): int = self.line
proc getSource*(self: Lexer): string = self.source
proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = (if line >
1: self.lines[line - 2] else: (start: 0, stop: self.current))
proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] =
if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
self.incLine()
return self.lines[line - 1]
proc newLexer*(self: Lexer = nil): Lexer =
@ -178,9 +185,9 @@ proc incLine(self: Lexer) =
## Increments the lexer's line
## and updates internal line
## metadata
self.lines.add((start: self.lastLine, stop: self.current))
self.line += 1
self.lines.add((self.lastLine, self.current))
self.lastLine = self.current
self.line += 1
proc step(self: Lexer, n: int = 1): string =
@ -196,7 +203,7 @@ proc step(self: Lexer, n: int = 1): string =
inc(self.current)
proc peek(self: Lexer, distance: int = 0, length: int = 1): string =
proc peek*(self: Lexer, distance: int = 0, length: int = 1): string =
## Returns a stream of characters of
## at most length bytes from the source
## file, starting at the given distance,
@ -219,7 +226,7 @@ proc peek(self: Lexer, distance: int = 0, length: int = 1): string =
proc error(self: Lexer, message: string) =
## Raises a lexing error with a formatted
## error message
raise newException(LexingError, &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}")
raise newException(LexingError, message)
proc check(self: Lexer, s: string, distance: int = 0): bool =
@ -625,9 +632,11 @@ proc lex*(self: Lexer, source, file: string): seq[Token] =
self.symbols = symbols
self.source = source
self.file = file
self.lines = @[]
while not self.done():
self.next()
self.start = self.current
self.tokens.add(Token(kind: EndOfFile, lexeme: "",
line: self.line, pos: (self.current, self.current)))
self.incLine()
return self.tokens

View File

@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
type
## Nim exceptions for internal JAPL failures
NimVMException* = object of CatchableError
LexingError* = object of NimVMException
ParseError* = object of NimVMException
CompileError* = object of NimVMException
SerializationError* = object of NimVMException
## Nim exceptions for internal Peon failures
PeonException* = object of CatchableError
LexingError* = object of PeonException
ParseError* = object of PeonException
CompileError* = object of PeonException
SerializationError* = object of PeonException

View File

@ -150,6 +150,8 @@ proc getCurrent*(self: Parser): int {.inline.} = self.current
proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >=
self.tokens.high() or
self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1])
proc getCurrentFunction*(self: Parser): Declaration {.inline.} = self.currentFunction
proc getFile*(self: Parser): string {.inline.} = self.file
# Handy templates to make our life easier, thanks nim!
template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1)
@ -167,10 +169,6 @@ proc peek(self: Parser, distance: int = 0): Token =
result = endOfFile
else:
result = self.tokens[self.current + distance]
## Hack to ignore whitespace/tab
if result.kind in {TokenType.Whitespace, Tab}:
# self.current += 1
result = self.peek(distance + 1)
proc done(self: Parser): bool {.inline.} =
@ -192,15 +190,9 @@ proc step(self: Parser, n: int = 1): Token =
self.current += 1
proc error(self: Parser, message: string) {.raises: [ParseError, ValueError].} =
## Raises a formatted ParseError exception
var lexeme = self.peek().lexeme
var fn = ""
if self.currentFunction != nil:
if self.currentFunction.kind == NodeKind.funDecl:
fn = &"inside function '{FunDecl(self.currentFunction).name.token.lexeme}'"
var errorMessage = &"A fatal error occurred while parsing '{self.file}', {fn} line {self.peek().line} at '{lexeme}' -> {message}"
raise newException(ParseError, errorMessage)
proc error(self: Parser, message: string) {.raises: [ParseError].} =
## Raises a ParseError exception
raise newException(ParseError, message)
# Why do we allow strings or enum members of TokenType? Well, it's simple:
@ -882,32 +874,31 @@ proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr,
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
isLambda: bool = false, isOperator: bool = false): Declaration =
## Parses functions, coroutines, generators, anonymous functions and operators
isLambda: bool = false, isOperator: bool = false): Declaration = # Can't use just FunDecl because it can also return LambdaExpr!
## Parses all types of functions, coroutines, generators and operators
## (with or without a name, where applicable)
let tok = self.peek(-1)
var enclosingFunction = self.currentFunction
var arguments: seq[tuple[name: IdentExpr, valueType: Expression,
mutable: bool, isRef: bool, isPtr: bool]] = @[]
var defaults: seq[Expression] = @[]
var returnType: Expression
if not isLambda and self.check(Identifier):
if not isLambda and self.match(Identifier):
# We do this extra check because we might
# be called from a context where it's
# ambiguous whether we're parsing a declaration
# or an expression. Fortunately anonymous functions
# are nameless, so we can sort the ambiguity by checking
# if there's an identifier after the keyword
self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'")
self.checkDecl(not self.check("*"))
self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()),
self.currentFunction = newFunDecl(newIdentExpr(self.peek(-1)), arguments, defaults, newBlockStmt(@[], Token()),
isAsync = isAsync,
isGenerator = isGenerator,
isPrivate = true,
isGenerator = isGenerator,
isPrivate = true,
token = tok, pragmas = (@[]),
returnType = nil)
FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1))
returnType = nil)
if self.match("*"):
FunDecl(self.currentFunction).isPrivate = false
self.checkDecl(FunDecl(self.currentFunction).isPrivate)
elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")):
# We do a bit of hacking to pretend we never
# wanted to parse this as a declaration in
@ -916,7 +907,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
# go all the way up to primary(), which will
# call us back with isLambda=true, allowing us
# to actually parse the function as an expression
while not self.check(tok.kind):
while not self.check(tok.kind): # We rewind back to the token that caused us to be called
dec(self.current)
result = Declaration(self.expressionStatement())
self.currentFunction = enclosingFunction
@ -1003,6 +994,8 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
self.error("cannot declare operator without arguments")
elif FunDecl(result).returnType == nil:
self.error("operators must have a return type")
elif isLambda:
self.error("cannot declare anonymous operator")
for argument in arguments:
if argument.valueType == nil:
self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
@ -1126,8 +1119,13 @@ proc declaration(self: Parser): Declaration =
proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] =
## Parses a series of tokens into an AST node
self.tokens = tokens
## Parses a sequence of tokens into a sequence of AST nodes
self.tokens = @[]
# The parser is not designed to handle these tokens.
# Maybe create a separate syntax checker module?
for token in tokens:
if token.kind notin {TokenType.Whitespace, Tab}:
self.tokens.add(token)
self.file = file
self.current = 0
self.currentLoop = LoopContext.None

View File

@ -1,5 +1,4 @@
# Builtins & external libs
import sequtils
import strformat
import strutils
import terminal
@ -25,11 +24,11 @@ proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor
# Handy dandy compile-time constants
const debugLexer = true
const debugParser = true
const debugCompiler = true
const debugSerializer = true
const debugRuntime = true
const debugLexer = false
const debugParser = false
const debugCompiler = false
const debugSerializer = false
const debugRuntime = false
when debugSerializer:
import nimSHA2
@ -125,26 +124,42 @@ when isMainModule:
# TODO: The code for error reporting completely
# breaks down with multiline input, fix it
except LexingError:
# let lineNo = tokenizer.getLine()
# let relPos = tokenizer.getRelPos(lineNo)
# let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
stderr.styledWriteLine(fgRed, getCurrentExceptionMsg())
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
let lineNo = tokenizer.getLine()
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{tokenizer.getFile()}'", fgRed, ", module ",
fgYellow, &"'{tokenizer.getFile()}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{tokenizer.peek()}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except ParseError:
# let lineNo = parser.getCurrentToken().line
# let relPos = tokenizer.getRelPos(lineNo)
# let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
stderr.styledWriteLine(fgRed, getCurrentExceptionMsg())
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len())
let lexeme = parser.getCurrentToken().lexeme
let lineNo = parser.getCurrentToken().line
let relPos = tokenizer.getRelPos(lineNo)
let fn = parser.getCurrentFunction()
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
var fnMsg = ""
if fn != nil and fn.kind == funDecl:
fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'"
stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{parser.getFile()}'", fgRed, ", module ",
fgYellow, &"'{parser.getFile()}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except CompileError:
# let lineNo = compiler.getCurrentNode().token.line
# let relPos = tokenizer.getRelPos(lineNo)
# let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
stderr.styledWriteLine(fgRed, getCurrentExceptionMsg())
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len())
let lexeme = compiler.getCurrentNode().token.lexeme
let lineNo = compiler.getCurrentNode().token.line
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
var fn = compiler.getCurrentFunction()
var fnMsg = ""
if fn != nil and fn.kind == funDecl:
fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'"
stderr.styledWriteLine(fgRed, "A fatal error occurred while compiling ", fgYellow, &"'{compiler.getFile()}'", fgRed, ", module ",
fgYellow, &"'{compiler.getModule()}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except SerializationError:
stderr.styledWriteLine(fgRed, getCurrentExceptionMsg())
quit(0)