From 48d1c3fc8cf2f66c9b616573a31d269bc24b4fd5 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Tue, 24 May 2022 22:26:45 +0200 Subject: [PATCH] Initial work on CFI-like functionality for better debugging --- src/backend/vm.nim | 2 +- src/frontend/compiler.nim | 41 +++++++++-- src/frontend/meta/bytecode.nim | 14 +++- src/main.nim | 11 ++- src/tests.pn | 6 +- src/util/debugger.nim | 126 +++++++++++++++++++++++---------- src/util/multibyte.nim | 38 +++++++++- src/util/serializer.nim | 82 ++++++++++----------- 8 files changed, 227 insertions(+), 93 deletions(-) diff --git a/src/backend/vm.nim b/src/backend/vm.nim index 22a0aa7..9c043f3 100644 --- a/src/backend/vm.nim +++ b/src/backend/vm.nim @@ -239,7 +239,7 @@ proc dispatch*(self: PeonVM) = # pushing it on the stack let retVal = self.pop() let frame = self.frames.pop() - for i in countdown(self.stack.high(), frame): + for i in countdown(0, frame): discard self.pop() self.ip = int(self.pop().uInt) self.push(retVal) diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index c0cfd74..5b27fa9 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -290,14 +290,22 @@ proc patchJump(self: Compiler, offset: int) = case OpCode(self.chunk.code[offset]): of LongJumpForwards: self.chunk.code[offset] = JumpForwards.uint8() + jump -= 4 of LongJumpBackwards: self.chunk.code[offset] = JumpBackwards.uint8() + jump -= 4 of LongJumpIfFalse: self.chunk.code[offset] = JumpIfFalse.uint8() of LongJumpIfFalsePop: self.chunk.code[offset] = JumpIfFalsePop.uint8() of LongJumpIfFalseOrPop: self.chunk.code[offset] = JumpIfFalseOrPop.uint8() + of JumpForwards, JumpBackwards: + # We do this because a relative jump + # does not normally take into account + # its argument, which is hardcoded in + # the bytecode itself + jump -= 3 else: discard self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty) @@ -308,14 +316,22 @@ proc patchJump(self: Compiler, offset: int) = case OpCode(self.chunk.code[offset]): of JumpForwards: self.chunk.code[offset] = LongJumpForwards.uint8() + jump -= 3 of JumpBackwards: self.chunk.code[offset] = LongJumpBackwards.uint8() + jump -= 3 of JumpIfFalse: self.chunk.code[offset] = LongJumpIfFalse.uint8() of JumpIfFalsePop: self.chunk.code[offset] = LongJumpIfFalsePop.uint8() of JumpIfFalseOrPop: self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() + of LongJumpForwards, LongJumpBackwards: + # We do this because a relative jump + # does not normally take into account + # its argument, which is hardcoded in + # the bytecode itself + jump -= 4 else: discard let offsetArray = jump.toTriple() @@ -751,7 +767,7 @@ proc matchImpl(self: Compiler, name: string, kind: Type): Name = if name.valueType.kind != Function: msg &= ", not a callable" elif kind.args.len() != name.valueType.args.len(): - msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})\n" + msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" else: for i, arg in kind.args: if not self.compareTypes(arg, name.valueType.args[i]): @@ -930,7 +946,7 @@ proc identifier(self: Compiler, node: IdentExpr) = if not t.closedOver: # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitByte(LoadVar) - self.emitBytes((index - self.frames[^1]).toTriple()) + self.emitBytes((index - self.frames[self.scopeDepth]).toTriple()) else: if self.closedOver.len() == 0: self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") @@ -995,6 +1011,7 @@ proc beginScope(self: Compiler) = ## Begins a new local scope by incrementing the current ## scope's depth inc(self.scopeDepth) + proc endScope(self: Compiler) = @@ -1181,7 +1198,6 @@ proc returnStmt(self: Compiler, node: ReturnStmt) = self.emitByte(OpCode.ReturnValue) else: self.emitByte(OpCode.Return) - discard self.frames.pop() proc yieldStmt(self: Compiler, node: YieldStmt) = @@ -1302,10 +1318,11 @@ proc funDecl(self: Compiler, node: FunDecl) = ## Compiles function declarations # A function's code is just compiled linearly # and then jumped over - let jmp = self.emitJump(Jump) + let jmp = self.emitJump(JumpForwards) var function = self.currentFunction self.declareName(node) self.frames.add(self.names.high()) + # TODO: Forward declarations if node.body != nil: if BlockStmt(node.body).code.len() == 0: @@ -1334,6 +1351,8 @@ proc funDecl(self: Compiler, node: FunDecl) = # the try/finally block with the deferred # code var deferStart = self.deferred.len() + # We let our debugger know a function is starting + self.chunk.cfi.add(self.chunk.code.high().toTriple()) self.blockStmt(BlockStmt(node.body)) # Yup, we're done. That was easy, huh? @@ -1346,6 +1365,19 @@ proc funDecl(self: Compiler, node: FunDecl) = # are resolved properly). There's a need for a bit # of boilerplate code to make closures work, but # that's about it + + # Function is ending! + self.chunk.cfi.add(self.chunk.code.high().toTriple()) + self.chunk.cfi.add(self.frames[^1].toTriple()) + self.chunk.cfi.add(uint8(node.arguments.len())) + if not system.`==`(node.name, nil): + self.chunk.cfi.add(node.name.token.lexeme.len().toDouble()) + var s = node.name.token.lexeme + if node.name.token.lexeme.len() >= uint16.high().int: + s = node.name.token.lexeme[0..uint16.high()] + self.chunk.cfi.add(s.toBytes()) + else: + self.chunk.cfi.add(0.toDouble()) case self.currentFunction.kind: of NodeKind.funDecl: if not self.currentFunction.hasExplicitReturn: @@ -1368,6 +1400,7 @@ proc funDecl(self: Compiler, node: FunDecl) = self.patchJump(jmp) # This makes us compile nested functions correctly self.currentFunction = function + discard self.frames.pop() proc patchReturnAddress(self: Compiler, retAddr: int) = diff --git a/src/frontend/meta/bytecode.nim b/src/frontend/meta/bytecode.nim index ed010ef..cf0536b 100644 --- a/src/frontend/meta/bytecode.nim +++ b/src/frontend/meta/bytecode.nim @@ -38,9 +38,21 @@ type ## are 3 and 4" ## This is more efficient than using the naive approach, which would encode ## the same line number multiple times and waste considerable amounts of space. + ## cfi represents Call Frame Information and encodes the following information: + ## - Function name + ## - Stack bottom + ## - Argument count + ## The encoding for CFI data is the following: + ## - First, the position into the bytecode where the function begins is encoded + ## - Second, the position into the bytecode where the function ends is encoded + ## - Then, the frame's stack bottom is encoded as a 3 byte integer + ## - After the frame's stack bottom follows the argument count as a 1 byte integer + ## - Lastly, the function's name (optional) is encoded in ASCII, prepended with + ## its size as a 2-byte integer consts*: seq[uint8] code*: seq[uint8] lines*: seq[int] + cfi*: seq[uint8] OpCode* {.pure.} = enum ## Enum of Peon's bytecode opcodes @@ -165,7 +177,7 @@ const jumpInstructions* = {Jump, LongJump, JumpIfFalse, JumpIfFalsePop, proc newChunk*: Chunk = ## Initializes a new, empty chunk - result = Chunk(consts: @[], code: @[], lines: @[]) + result = Chunk(consts: @[], code: @[], lines: @[], cfi: @[]) proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])""" diff --git a/src/main.nim b/src/main.nim index dccc4c6..43242a1 100644 --- a/src/main.nim +++ b/src/main.nim @@ -49,6 +49,7 @@ proc repl = tokenizer = newLexer() parser = newParser() compiler = newCompiler() + debugger = newDebugger() serializer = newSerializer() vm = newPeonVM() editor = getLineEditor() @@ -91,8 +92,9 @@ proc repl = styledEcho fgCyan, "Compilation step:" styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]" styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]" + styledEcho fgCyan, "\tCFI data: ", fgGreen, "[", fgYellow, compiled.cfi.join(", "), fgGreen, "]" styledEcho fgCyan, "\nBytecode disassembler output below:\n" - disassembleChunk(compiled, "stdin") + debugger.disassembleChunk(compiled, "stdin") echo "" serializer.dumpFile(compiled, input, "stdin", "stdin.pbc") @@ -175,6 +177,7 @@ proc runFile(f: string) = tokenizer = newLexer() parser = newParser() compiler = newCompiler() + debugger = newDebugger() serializer = newSerializer() vm = newPeonVM() input: string @@ -205,8 +208,10 @@ proc runFile(f: string) = styledEcho fgCyan, "Compilation step:" styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]" styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]" + styledEcho fgCyan, "\tCFI data: ", fgGreen, "[", fgYellow, compiled.cfi.join(", "), fgGreen, "]" + styledEcho fgCyan, "\nBytecode disassembler output below:\n" - disassembleChunk(compiled, f) + debugger.disassembleChunk(compiled, f) echo "" serializer.dumpFile(compiled, input, f, splitFile(f).name & ".pbc") @@ -215,7 +220,7 @@ proc runFile(f: string) = var hashMatches = computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash styledEcho fgCyan, "Serialization step: " styledEcho fgBlue, &"\t- File hash: ", fgYellow, serialized.fileHash, fgBlue, " (", if hashMatches: fgGreen else: fgRed, if hashMatches: "OK" else: "Fail", fgBlue, ")" - styledEcho fgBlue, "\t- Peon version: ", fgYellow, &"{serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch}", fgBlue, " (commit ", fgYellow, serialized.commitHash[0..8], fgBlue, ") on branch ", fgYellow, serialized.peonBranch + styledEcho fgBlue, "\t- Peon version: ", fgYellow, &"{serialized.version.major}.{serialized.version.minor}.{serialized.version.patch}", fgBlue, " (commit ", fgYellow, serialized.commit[0..8], fgBlue, ") on branch ", fgYellow, serialized.branch stdout.styledWriteLine(fgBlue, "\t- Compilation date & time: ", fgYellow, fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss")) stdout.styledWrite(fgBlue, &"\t- Constants segment: ") if serialized.chunk.consts == compiled.consts: diff --git a/src/tests.pn b/src/tests.pn index 07d7f74..a422187 100644 --- a/src/tests.pn +++ b/src/tests.pn @@ -6,7 +6,11 @@ operator `+`(a: int32): int32 { return a; } +fn `+`(a, b: int): int { + return a + b; +} + var `+`: int = 1; # hehehehe +1; # Works: defined for int64 -+1'u8; # Nope! +# +1'u8; # Nope! diff --git a/src/util/debugger.nim b/src/util/debugger.nim index 6119b53..773e28c 100644 --- a/src/util/debugger.nim +++ b/src/util/debugger.nim @@ -21,6 +21,22 @@ import strutils import terminal +type + CFIElement = ref object + start, stop, bottom, argc: int + name: string + Debugger* = ref object + chunk: Chunk + cfiData: seq[CFIElement] + + +proc newDebugger*: Debugger = + ## Initializes a new, empty + ## debugger object + new(result) + result.cfiData = @[] + + proc nl = stdout.write("\n") @@ -44,16 +60,29 @@ proc printInstruction(instruction: OpCode, newline: bool = false) = nl() -proc simpleInstruction(instruction: OpCode, offset: int): int = +proc checkFrame(self: Debugger, n: int) = + for i, e in self.cfiData: + if n == e.start: + styledEcho fgBlue, "==== Peon Bytecode Debugger - Begin Frame ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ====" + styledEcho fgGreen, "\t- Start offset: ", fgYellow, $e.start + styledEcho fgGreen, "\t- End offset: ", fgYellow, $e.stop + styledEcho fgGreen, "\t- Stack bottom: ", fgYellow, $e.bottom + styledEcho fgGreen, "\t- Argument count: ", fgYellow, $e.argc + echo "" + elif n == e.stop: + styledEcho fgBlue, "==== Peon Bytecode Debugger - End Frame ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ====\n" + + +proc simpleInstruction(self: Debugger, instruction: OpCode, offset: int): int = printInstruction(instruction) nl() return offset + 1 -proc stackTripleInstruction(instruction: OpCode, chunk: Chunk, +proc stackTripleInstruction(self: Debugger, instruction: OpCode, offset: int): int = ## Debugs instructions that operate on a single value on the stack using a 24-bit operand - var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[ + var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[ offset + 3]].fromTriple() printInstruction(instruction) stdout.styledWrite(fgGreen, &", points to index ") @@ -61,10 +90,10 @@ proc stackTripleInstruction(instruction: OpCode, chunk: Chunk, return offset + 4 -proc stackDoubleInstruction(instruction: OpCode, chunk: Chunk, +proc stackDoubleInstruction(self: Debugger, instruction: OpCode, offset: int): int = ## Debugs instructions that operate on a single value on the stack using a 16-bit operand - var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble() + var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2]].fromDouble() printInstruction(instruction) stdout.write(&", points to index ") stdout.styledWrite(fgGreen, &", points to index ") @@ -72,28 +101,28 @@ proc stackDoubleInstruction(instruction: OpCode, chunk: Chunk, return offset + 3 -proc argumentDoubleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = +proc argumentDoubleInstruction(self: Debugger, instruction: OpCode, offset: int): int = ## Debugs instructions that operate on a hardcoded value on the stack using a 16-bit operand - var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble() + var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2]].fromDouble() printInstruction(instruction) stdout.styledWrite(fgGreen, &", has argument ") stdout.styledWriteLine(fgYellow, $slot) return offset + 3 -proc argumentTripleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = +proc argumentTripleInstruction(self: Debugger, instruction: OpCode, offset: int): int = ## Debugs instructions that operate on a hardcoded value on the stack using a 24-bit operand - var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple() + var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[offset + 3]].fromTriple() printInstruction(instruction) stdout.styledWrite(fgGreen, ", has argument ") stdout.styledWriteLine(fgYellow, $slot) - return offset + 4 + return offset + 3 -proc callInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = +proc callInstruction(self: Debugger, instruction: OpCode, offset: int): int = ## Debugs function calls - var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple() - var args = [chunk.code[offset + 4], chunk.code[offset + 5], chunk.code[offset + 6]].fromTriple() + var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[offset + 3]].fromTriple() + var args = [self.chunk.code[offset + 4], self.chunk.code[offset + 5], self.chunk.code[offset + 6]].fromTriple() printInstruction(instruction) stdout.styledWrite(fgGreen, &", jumps to address ", fgYellow, $slot, fgGreen, " with ", fgYellow, $args, fgGreen, " argument") if args > 1: @@ -102,27 +131,27 @@ proc callInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = return offset + 7 -proc constantInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = +proc constantInstruction(self: Debugger, instruction: OpCode, offset: int): int = ## Debugs instructions that operate on the constant table - var constant = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[ + var constant = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[ offset + 3]].fromTriple() printInstruction(instruction) stdout.styledWrite(fgGreen, &", points to constant at position ", fgYellow, $constant) nl() printDebug("Operand: ") - stdout.styledWriteLine(fgYellow, &"{chunk.consts[constant]}") + stdout.styledWriteLine(fgYellow, &"{self.chunk.consts[constant]}") return offset + 4 -proc jumpInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = +proc jumpInstruction(self: Debugger, instruction: OpCode, offset: int): int = ## Debugs jumps var jump: int case instruction: of Jump, JumpIfFalse, JumpIfTrue, JumpIfFalsePop, JumpForwards, JumpBackwards: - jump = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble().int() + jump = [self.chunk.code[offset + 1], self.chunk.code[offset + 2]].fromDouble().int() of LongJump, LongJumpIfFalse, LongJumpIfTrue, LongJumpIfFalsePop, LongJumpForwards, LongJumpBackwards: - jump = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[ + jump = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[ offset + 3]].fromTriple().int() else: discard # Unreachable @@ -133,42 +162,67 @@ proc jumpInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = return offset + 3 -proc disassembleInstruction*(chunk: Chunk, offset: int): int = +proc disassembleInstruction*(self: Debugger, offset: int): int = ## Takes one bytecode instruction and prints it + self.checkFrame(offset) printDebug("Offset: ") stdout.styledWriteLine(fgYellow, $offset) printDebug("Line: ") - stdout.styledWriteLine(fgYellow, &"{chunk.getLine(offset)}") - var opcode = OpCode(chunk.code[offset]) + stdout.styledWriteLine(fgYellow, &"{self.chunk.getLine(offset)}") + var opcode = OpCode(self.chunk.code[offset]) case opcode: of simpleInstructions: - result = simpleInstruction(opcode, offset) + result = self.simpleInstruction(opcode, offset) of constantInstructions: - result = constantInstruction(opcode, chunk, offset) + result = self.constantInstruction(opcode, offset) of stackDoubleInstructions: - result = stackDoubleInstruction(opcode, chunk, offset) + result = self.stackDoubleInstruction(opcode, offset) of stackTripleInstructions: - result = stackTripleInstruction(opcode, chunk, offset) + result = self.stackTripleInstruction(opcode, offset) of argumentDoubleInstructions: - result = argumentDoubleInstruction(opcode, chunk, offset) + result = self.argumentDoubleInstruction(opcode, offset) of argumentTripleInstructions: - result = argumentTripleInstruction(opcode, chunk, offset) + result = self.argumentTripleInstruction(opcode, offset) of callInstructions: - result = callInstruction(opcode, chunk, offset) + result = self.callInstruction(opcode, offset) of jumpInstructions: - result = jumpInstruction(opcode, chunk, offset) + result = self.jumpInstruction(opcode, offset) else: echo &"DEBUG - Unknown opcode {opcode} at index {offset}" result = offset + 1 -proc disassembleChunk*(chunk: Chunk, name: string) = - ## Takes a chunk of bytecode, and prints it - echo &"==== Peon Bytecode Debugger - Chunk '{name}' ====\n" +proc parseCFIData(self: Debugger) = + var + start, stop, bottom, argc: int + fnName: string + idx = 0 + size = 0 + while idx < len(self.chunk.cfi): + start = int([self.chunk.cfi[idx], self.chunk.cfi[idx + 1], self.chunk.cfi[idx + 2]].fromTriple()) + idx += 3 + stop = int([self.chunk.cfi[idx], self.chunk.cfi[idx + 1], self.chunk.cfi[idx + 2]].fromTriple()) + idx += 3 + bottom = int([self.chunk.cfi[idx], self.chunk.cfi[idx + 1], self.chunk.cfi[idx + 2]].fromTriple()) + idx += 3 + argc = int(self.chunk.cfi[idx]) + inc(idx) + size = int([self.chunk.cfi[idx], self.chunk.cfi[idx + 1]].fromDouble()) + idx += 2 + fnName = self.chunk.cfi[idx..