diff --git a/src/config.nim b/src/config.nim index 6014eb9..1fda235 100644 --- a/src/config.nim +++ b/src/config.nim @@ -27,7 +27,7 @@ when len(PEON_COMMIT_HASH) != 40: const PEON_BRANCH* = "master" when len(PEON_BRANCH) > 255: {.fatal: "The git branch name's length must be less than or equal to 255 characters".} -const DEBUG_TRACE_VM* = true # Traces VM execution +const DEBUG_TRACE_VM* = false # Traces VM execution const DEBUG_TRACE_GC* = false # Traces the garbage collector (TODO) const DEBUG_TRACE_ALLOCATION* = false # Traces memory allocation/deallocation const DEBUG_TRACE_COMPILER* = false # Traces the compiler diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 4f99823..7fc4315 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -79,10 +79,9 @@ type isLet: bool # The name's type valueType: Type - # For variables, the position in the bytecode - # where its StoreVar instruction was emitted. # For functions, this marks where the function's - # code begins + # code begins. For variables, this stores their + # position in the stack (used for closures) codePos: int Loop = object ## A "loop object" used @@ -144,7 +143,7 @@ type # be empty) deferred: seq[uint8] # List of closed-over variables - closedOver: seq[IdentExpr] + closedOver: seq[Name] @@ -181,8 +180,8 @@ proc patchReturnAddress(self: Compiler, retAddr: int) proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction -proc getFile*(self: COmpiler): string {.inline.} = self.file -proc getModule*(self: COmpiler): string {.inline.} = self.currentModule +proc getFile*(self: Compiler): string {.inline.} = self.file +proc getModule*(self: Compiler): string {.inline.} = self.currentModule ## Utility functions @@ -351,7 +350,7 @@ proc resolve(self: Compiler, name: IdentExpr, for obj in reversed(self.names): if obj.name.token.lexeme == name.token.lexeme: if obj.isPrivate and obj.owner != self.currentModule: - continue # There may be a name in the current module that + continue # There may be a name in the current module that # matches, so we skip this return obj return nil @@ -370,18 +369,20 @@ proc getStackPos(self: Compiler, name: IdentExpr, if name.name.lexeme == variable.name.name.lexeme: if variable.isPrivate and variable.owner != self.currentModule: continue - if variable.depth == depth or variable.depth == 0: + elif variable.depth == depth or variable.depth == 0: # variable.depth == 0 for globals! return (false, i) elif variable.depth > 0: - for j, closure in reversed(self.closedOver): - if closure.name.lexeme == name.name.lexeme: + var j: int = self.closedOver.high() + for closure in reversed(self.closedOver): + if closure.name.token.lexeme == name.name.lexeme: return (true, j) + inc(j) dec(i) return (false, -1) -proc detectClosureVariable(self: Compiler, name: IdentExpr, +proc detectClosureVariable(self: Compiler, name: Name, depth: int = self.scopeDepth) = ## Detects if the given name is used in a local scope deeper ## than the given one and modifies the code emitted for it @@ -391,24 +392,23 @@ proc detectClosureVariable(self: Compiler, name: IdentExpr, ## each time a name is referenced in order for closed-over variables ## to be emitted properly, otherwise the runtime may behave ## unpredictably or crash - let entry = self.resolve(name) - if entry == nil: + if name == nil: return - if entry.depth < depth: + if name.depth < depth: # Ding! The given name is closed over: we need to - # change the StoreVar instruction that created this - # name entry into a StoreHeap. We don't need to change + # change the NoOp instructions that self.declareName + # put in place for us into a StoreHeap. We don't need to change # other pieces of code because self.identifier() already # emits LoadHeap if it detects the variable is closed over, # whether or not this function is called - self.closedOver.add(entry.name) + self.closedOver.add(name) if self.closedOver.len() >= 16777216: self.error("too many consecutive closed-over variables (max is 16777216)") let idx = self.closedOver.high().toTriple() - self.chunk.code[entry.codePos] = StoreHeap.uint8 - self.chunk.code[entry.codePos + 1] = idx[0] - self.chunk.code[entry.codePos + 2] = idx[1] - self.chunk.code[entry.codePos + 3] = idx[2] + self.chunk.code[name.codePos] = StoreHeap.uint8 + self.chunk.code[name.codePos + 1] = idx[0] + self.chunk.code[name.codePos + 2] = idx[1] + self.chunk.code[name.codePos + 3] = idx[2] proc compareTypesWithNullNode(self: Compiler, a, b: Type): bool = @@ -874,10 +874,10 @@ proc declareName(self: Compiler, node: Declaration) = isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, - valueType: Type(kind: self.inferType( - node.value).kind, node: node), + valueType: Type(kind: self.inferType(node.value).kind, node: node), codePos: self.chunk.code.len(), isLet: node.isLet)) + self.emitBytes([NoOp, NoOp, NoOp, NoOp]) of NodeKind.funDecl: var node = FunDecl(node) # TODO: Emit some optional debugging @@ -932,13 +932,11 @@ proc identifier(self: Compiler, node: IdentExpr) = if s == nil: self.error(&"reference to undeclared name '{node.token.lexeme}'") elif s.isConst: - # Constants are emitted as, you guessed it, LoadConstant instructions - # no matter the scope depth. If optimizations are enabled, the compiler - # will reuse the same constant every time it is referenced instead of - # allocating a new one each time + # Constants are always emitted as Load* instructions + # no matter the scope depth self.emitConstant(node, self.inferType(node)) else: - self.detectClosureVariable(s.name) + self.detectClosureVariable(s) let t = self.getStackPos(node) var index = t.pos # We don't check if index is -1 because if it @@ -950,8 +948,6 @@ proc identifier(self: Compiler, node: IdentExpr) = inc(index) # Skip the return address! self.emitBytes((index - self.frames[self.scopeDepth]).toTriple()) else: - if self.closedOver.len() == 0: - self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics. # This makes closures work as expected and is not comparatively slower than indexing our stack (since they're both # dynamic arrays at runtime anyway) @@ -1015,16 +1011,22 @@ proc beginScope(self: Compiler) = inc(self.scopeDepth) - proc endScope(self: Compiler) = ## Ends the current local scope if self.scopeDepth == 0: self.error("cannot call endScope with scopeDepth == 0 (This is an internal error and most likely a bug)") dec(self.scopeDepth) var popped: int = 0 + var name: Name for i, ident in reversed(self.names): if ident.depth > self.scopeDepth: inc(popped) + name = self.names[self.names.high() - i] + if name.valueType.kind != Function and OpCode(self.chunk.code[name.codePos]) == NoOp: + self.chunk.code.delete(name.codePos) + self.chunk.code.delete(name.codePos + 1) + self.chunk.code.delete(name.codePos + 2) + self.chunk.code.delete(name.codePos + 3) self.names.delete(self.names.len() - i) if not self.enableOptimizations: # All variables with a scope depth larger than the current one @@ -1233,8 +1235,7 @@ proc breakStmt(self: Compiler, node: BreakStmt) = # Emits dummy jump offset, this is # patched later - discard self.emitJump(OpCode.Jump) - self.currentLoop.breakPos.add(self.chunk.code.high() - 4) + self.currentLoop.breakPos.add(self.emitJump(OpCode.Jump)) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope self.endScope() @@ -1325,7 +1326,6 @@ proc funDecl(self: Compiler, node: FunDecl) = var function = self.currentFunction self.declareName(node) self.frames.add(self.names.high()) - # TODO: Forward declarations if node.body != nil: if BlockStmt(node.body).code.len() == 0: @@ -1355,11 +1355,11 @@ proc funDecl(self: Compiler, node: FunDecl) = # code var deferStart = self.deferred.len() # We let our debugger know a function is starting - self.chunk.cfi.add(self.chunk.code.high().toTriple()) + let start = self.chunk.code.high() self.blockStmt(BlockStmt(node.body)) # Yup, we're done. That was easy, huh? - # But after all functions are just named + # But, after all, functions are just named # scopes, and we compile them just like that: # we declare their name and arguments (before # their body so recursion works) and then just @@ -1370,6 +1370,7 @@ proc funDecl(self: Compiler, node: FunDecl) = # that's about it # Function is ending! + self.chunk.cfi.add(start.toTriple()) self.chunk.cfi.add(self.chunk.code.high().toTriple()) self.chunk.cfi.add(self.frames[^1].toTriple()) self.chunk.cfi.add(uint8(node.arguments.len())) @@ -1385,10 +1386,10 @@ proc funDecl(self: Compiler, node: FunDecl) = of NodeKind.funDecl: if not self.currentFunction.hasExplicitReturn: let typ = self.inferType(self.currentFunction) - if self.currentFunction.returnType == nil and typ != nil: + if self.currentFunction.returnType == nil and typ.returnType != nil: self.error("non-empty return statement is not allowed in void functions") if self.currentFunction.returnType != nil: - self.error("function has an explicit return type, but no explicit return statement was found") + self.error("function has an explicit return type, but no return statement was found") self.emitByte(OpCode.Return) of NodeKind.lambdaExpr: if not LambdaExpr(Declaration(self.currentFunction)).hasExplicitReturn: @@ -1448,3 +1449,4 @@ proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = result = self.chunk if self.ast.len() > 0 and self.scopeDepth != 0: self.error(&"invalid state: invalid scopeDepth value (expected 0, got {self.scopeDepth}), did you forget to call endScope/beginScope?") + diff --git a/src/frontend/meta/bytecode.nim b/src/frontend/meta/bytecode.nim index 2ec0027..9c75869 100644 --- a/src/frontend/meta/bytecode.nim +++ b/src/frontend/meta/bytecode.nim @@ -69,7 +69,8 @@ type # or 24 bit numbers that are defined statically # at compilation time into the bytecode - # These push a constant onto the stack + # These push a constant at position x in the + # constant table onto the stack LoadInt64 = 0u8, LoadUInt64, LoadInt32, diff --git a/src/main.nim b/src/main.nim index 86ac54a..4ab11fc 100644 --- a/src/main.nim +++ b/src/main.nim @@ -32,7 +32,7 @@ proc getLineEditor: LineEditor # Handy dandy compile-time constants const debugLexer = false const debugParser = false -const debugCompiler = false +const debugCompiler = true const debugSerializer = false const debugRuntime = false @@ -88,11 +88,7 @@ proc repl = echo "" compiled = compiler.compile(tree, "stdin") when debugCompiler: - styledEcho fgCyan, "Compilation step:" - styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]" - styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]" - styledEcho fgCyan, "\tCFI data: ", fgGreen, "[", fgYellow, compiled.cfi.join(", "), fgGreen, "]" - styledEcho fgCyan, "\nBytecode disassembler output below:\n" + styledEcho fgCyan, "Compilation step:\n" debugger.disassembleChunk(compiled, "stdin") echo "" @@ -209,17 +205,12 @@ proc runFile(f: string) = echo "" compiled = compiler.compile(tree, f) when debugCompiler: - styledEcho fgCyan, "Compilation step:" - styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]" - styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]" - styledEcho fgCyan, "\tCFI data: ", fgGreen, "[", fgYellow, compiled.cfi.join(", "), fgGreen, "]" - - styledEcho fgCyan, "\nBytecode disassembler output below:\n" + styledEcho fgCyan, "Compilation step:\n" debugger.disassembleChunk(compiled, f) echo "" - serializer.dumpFile(compiled, input, f, splitFile(f).name & ".pbc") - serialized = serializer.loadFile(splitFile(f).name & ".pbc") + serializer.dumpFile(compiled, input, f, splitFile(f).dir & "/" & splitFile(f).name & ".pbc") + serialized = serializer.loadFile(splitFile(f).dir & "/" & splitFile(f).name & ".pbc") when debugSerializer: var hashMatches = computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash styledEcho fgCyan, "Serialization step: " diff --git a/src/tests.pn b/src/tests.pn deleted file mode 100644 index 0631a8f..0000000 --- a/src/tests.pn +++ /dev/null @@ -1,16 +0,0 @@ -operator `+`(a: int): int { - return a; -} - -operator `+`(a: int32): int32 { - return a; -} - -fn `+`(a, b: int): int32 { - return 0'i32; # Just to test error messages -} - -var `+`: int = 1; # Can't call a value! - -+1; # Works: defined for int64 -+1'u8; # No implementation for uint8, error! diff --git a/src/util/debugger.nim b/src/util/debugger.nim index 773e28c..19e7913 100644 --- a/src/util/debugger.nim +++ b/src/util/debugger.nim @@ -28,6 +28,7 @@ type Debugger* = ref object chunk: Chunk cfiData: seq[CFIElement] + current: int proc newDebugger*: Debugger = @@ -60,145 +61,148 @@ proc printInstruction(instruction: OpCode, newline: bool = false) = nl() -proc checkFrame(self: Debugger, n: int) = +proc checkFrameStart(self: Debugger, n: int) = for i, e in self.cfiData: if n == e.start: - styledEcho fgBlue, "==== Peon Bytecode Debugger - Begin Frame ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ====" + styledEcho fgBlue, "\n==== Peon Bytecode Debugger - Begin Frame ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ====" styledEcho fgGreen, "\t- Start offset: ", fgYellow, $e.start styledEcho fgGreen, "\t- End offset: ", fgYellow, $e.stop styledEcho fgGreen, "\t- Stack bottom: ", fgYellow, $e.bottom styledEcho fgGreen, "\t- Argument count: ", fgYellow, $e.argc - echo "" - elif n == e.stop: - styledEcho fgBlue, "==== Peon Bytecode Debugger - End Frame ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ====\n" -proc simpleInstruction(self: Debugger, instruction: OpCode, offset: int): int = +proc checkFrameEnd(self: Debugger, n: int) = + for i, e in self.cfiData: + if n == e.stop: + styledEcho fgBlue, "\n==== Peon Bytecode Debugger - End Frame ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ====" + + +proc simpleInstruction(self: Debugger, instruction: OpCode) = printInstruction(instruction) nl() - return offset + 1 + self.current += 1 + if instruction in {Return, ReturnValue}: + self.checkFrameEnd(self.current - 1) + self.checkFrameEnd(self.current) -proc stackTripleInstruction(self: Debugger, instruction: OpCode, - offset: int): int = +proc stackTripleInstruction(self: Debugger, instruction: OpCode) = ## Debugs instructions that operate on a single value on the stack using a 24-bit operand - var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[ - offset + 3]].fromTriple() + var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[ + self.current + 3]].fromTriple() printInstruction(instruction) - stdout.styledWrite(fgGreen, &", points to index ") - stdout.styledWriteLine(fgYellow, &"{slot}") - return offset + 4 + stdout.styledWriteLine(fgGreen, &", points to index ", fgYellow, $slot) + self.current += 4 - -proc stackDoubleInstruction(self: Debugger, instruction: OpCode, - offset: int): int = +proc stackDoubleInstruction(self: Debugger, instruction: OpCode) = ## Debugs instructions that operate on a single value on the stack using a 16-bit operand - var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2]].fromDouble() + var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2]].fromDouble() printInstruction(instruction) stdout.write(&", points to index ") - stdout.styledWrite(fgGreen, &", points to index ") - stdout.styledWriteLine(fgYellow, &"{slot}") - return offset + 3 + stdout.styledWriteLine(fgGreen, &", points to index ", fgYellow, $slot) + self.current += 3 -proc argumentDoubleInstruction(self: Debugger, instruction: OpCode, offset: int): int = +proc argumentDoubleInstruction(self: Debugger, instruction: OpCode) = ## Debugs instructions that operate on a hardcoded value on the stack using a 16-bit operand - var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2]].fromDouble() + var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2]].fromDouble() printInstruction(instruction) - stdout.styledWrite(fgGreen, &", has argument ") - stdout.styledWriteLine(fgYellow, $slot) - return offset + 3 + stdout.styledWriteLine(fgGreen, &", has argument ", fgYellow, $slot) + self.current += 3 -proc argumentTripleInstruction(self: Debugger, instruction: OpCode, offset: int): int = +proc argumentTripleInstruction(self: Debugger, instruction: OpCode) = ## Debugs instructions that operate on a hardcoded value on the stack using a 24-bit operand - var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[offset + 3]].fromTriple() + var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple() printInstruction(instruction) - stdout.styledWrite(fgGreen, ", has argument ") - stdout.styledWriteLine(fgYellow, $slot) - return offset + 3 + stdout.styledWrite(fgGreen, ", has argument ", fgYellow, $slot) + self.current += 4 -proc callInstruction(self: Debugger, instruction: OpCode, offset: int): int = +proc callInstruction(self: Debugger, instruction: OpCode) = ## Debugs function calls - var slot = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[offset + 3]].fromTriple() - var args = [self.chunk.code[offset + 4], self.chunk.code[offset + 5], self.chunk.code[offset + 6]].fromTriple() + var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple() + var args = [self.chunk.code[self.current + 4], self.chunk.code[self.current + 5], self.chunk.code[self.current + 6]].fromTriple() printInstruction(instruction) stdout.styledWrite(fgGreen, &", jumps to address ", fgYellow, $slot, fgGreen, " with ", fgYellow, $args, fgGreen, " argument") if args > 1: stdout.styledWrite(fgYellow, "s") nl() - return offset + 7 + self.current += 7 -proc constantInstruction(self: Debugger, instruction: OpCode, offset: int): int = +proc constantInstruction(self: Debugger, instruction: OpCode) = ## Debugs instructions that operate on the constant table - var constant = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[ - offset + 3]].fromTriple() + var constant = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[ + self.current + 3]].fromTriple() printInstruction(instruction) stdout.styledWrite(fgGreen, &", points to constant at position ", fgYellow, $constant) nl() printDebug("Operand: ") stdout.styledWriteLine(fgYellow, &"{self.chunk.consts[constant]}") - return offset + 4 + self.current += 4 -proc jumpInstruction(self: Debugger, instruction: OpCode, offset: int): int = +proc jumpInstruction(self: Debugger, instruction: OpCode) = ## Debugs jumps + var orig = self.current var jump: int case instruction: of Jump, JumpIfFalse, JumpIfTrue, JumpIfFalsePop, JumpForwards, JumpBackwards: - jump = [self.chunk.code[offset + 1], self.chunk.code[offset + 2]].fromDouble().int() + jump = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2]].fromDouble().int() of LongJump, LongJumpIfFalse, LongJumpIfTrue, LongJumpIfFalsePop, LongJumpForwards, LongJumpBackwards: - jump = [self.chunk.code[offset + 1], self.chunk.code[offset + 2], self.chunk.code[ - offset + 3]].fromTriple().int() + jump = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[ + self.current + 3]].fromTriple().int() + self.current += 1 else: discard # Unreachable printInstruction(instruction, true) printDebug("Jump size: ") stdout.styledWrite(fgYellow, $jump) nl() - return offset + 3 + self.current += 3 + for i in countup(orig, self.current + 1): + self.checkFrameStart(i) -proc disassembleInstruction*(self: Debugger, offset: int): int = +proc disassembleInstruction*(self: Debugger) = ## Takes one bytecode instruction and prints it - self.checkFrame(offset) printDebug("Offset: ") - stdout.styledWriteLine(fgYellow, $offset) + stdout.styledWriteLine(fgYellow, $self.current) printDebug("Line: ") - stdout.styledWriteLine(fgYellow, &"{self.chunk.getLine(offset)}") - var opcode = OpCode(self.chunk.code[offset]) + stdout.styledWriteLine(fgYellow, &"{self.chunk.getLine(self.current)}") + var opcode = OpCode(self.chunk.code[self.current]) case opcode: of simpleInstructions: - result = self.simpleInstruction(opcode, offset) + self.simpleInstruction(opcode) of constantInstructions: - result = self.constantInstruction(opcode, offset) + self.constantInstruction(opcode) of stackDoubleInstructions: - result = self.stackDoubleInstruction(opcode, offset) + self.stackDoubleInstruction(opcode) of stackTripleInstructions: - result = self.stackTripleInstruction(opcode, offset) + self.stackTripleInstruction(opcode) of argumentDoubleInstructions: - result = self.argumentDoubleInstruction(opcode, offset) + self.argumentDoubleInstruction(opcode) of argumentTripleInstructions: - result = self.argumentTripleInstruction(opcode, offset) + self.argumentTripleInstruction(opcode) of callInstructions: - result = self.callInstruction(opcode, offset) + self.callInstruction(opcode) of jumpInstructions: - result = self.jumpInstruction(opcode, offset) + self.jumpInstruction(opcode) else: - echo &"DEBUG - Unknown opcode {opcode} at index {offset}" - result = offset + 1 + echo &"DEBUG - Unknown opcode {opcode} at index {self.current}" + self.current += 1 proc parseCFIData(self: Debugger) = + ## Parses CFI information in the chunk var start, stop, bottom, argc: int - fnName: string + name: string idx = 0 size = 0 - while idx < len(self.chunk.cfi): + while idx < len(self.chunk.cfi) - 1: start = int([self.chunk.cfi[idx], self.chunk.cfi[idx + 1], self.chunk.cfi[idx + 2]].fromTriple()) idx += 3 stop = int([self.chunk.cfi[idx], self.chunk.cfi[idx + 1], self.chunk.cfi[idx + 2]].fromTriple()) @@ -209,20 +213,21 @@ proc parseCFIData(self: Debugger) = inc(idx) size = int([self.chunk.cfi[idx], self.chunk.cfi[idx + 1]].fromDouble()) idx += 2 - fnName = self.chunk.cfi[idx..