# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## The Peon runtime environment import ../config # Sorry, but there only is enough space # for one GC in this VM :( {.push checks:enableVMChecks.} # The VM is a critical point where checks are deleterious when defined(gcOrc): GC_disableOrc() when not defined(gcArc) and not defined(gcOrc): GC_disable() GC_disableMarkAndSweep() import std/math import std/segfaults import std/strutils import std/sets import std/monotimes import ../frontend/compiler/targets/bytecode/opcodes import ../frontend/compiler/targets/bytecode/util/multibyte when debugVM or debugMem or debugGC or debugAlloc: import std/strformat import std/sequtils import std/terminal when debugVM: proc clearerr(stream: File) {.header: "stdio.h", importc.} type ObjectKind* = enum ## A tag for heap-allocated ## peon objects String, List, Dict, Tuple, CustomType, HeapObject* = object ## A tagged box for a heap-allocated ## peon object marked*: bool # Used in the GC phase case kind*: ObjectKind of String: str*: ptr UncheckedArray[char] len*: int else: discard # TODO PeonGC* = object ## A simple Mark&Sweep collector ## to manage peon's heap space. ## All heap allocation goes through ## this system and is not handled ## manually by the VM bytesAllocated: tuple[total, current: int] when debugGC or debugAlloc: cycles: int nextGC: int pointers: HashSet[uint64] PeonVM* = object ## The Peon Virtual Machine. ## Note how the only data ## type we handle here is ## a 64-bit unsigned integer: ## This is to allow the use ## of unboxed primitive types. ## For more complex types, the ## value represents a pointer to ## some stack- or heap-allocated ## object. The VM has no concept ## of type by itself: everything ## is lost after the compilation ## phase ip: uint64 # The instruction pointer chunk: Chunk # The chunk of bytecode to execute calls: seq[uint64] # The call stack operands: seq[uint64] # The operand stack cache: array[6, uint64] # The singletons cache frames: seq[uint64] # Stores the bottom of stack frames results: seq[uint64] # Stores function return values gc: PeonGC # A reference to the VM's garbage collector when debugVM: breakpoints: seq[uint64] # Breakpoints where we call our debugger debugNext: bool # Whether to debug the next instruction lastDebugCommand: string # The last debugging command input by the user # Implementation of peon's memory manager proc newPeonGC*: PeonGC = ## Initializes a new, blank ## garbage collector result.bytesAllocated = (0, 0) result.nextGC = FirstGC when debugGC or debugAlloc: result.cycles = 0 proc collect*(self: var PeonVM) proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): pointer = ## Simple wrapper around realloc with ## built-in garbage collection self.gc.bytesAllocated.current += newSize - oldSize try: when debugMem: if newSize == 0 and not p.isNil(): if oldSize > 1: echo &"DEBUG - MM: Deallocating {oldSize} bytes of memory" else: echo "DEBUG - MM: Deallocating 1 byte of memory" if (oldSize > 0 and not p.isNil() and newSize > oldSize) or oldSize == 0: when debugMem: if oldSize == 0: if newSize > 1: echo &"DEBUG - MM: Allocating {newSize} bytes of memory" else: echo "DEBUG - MM: Allocating 1 byte of memory" else: echo &"DEBUG - M: Resizing {oldSize} bytes of memory to {newSize} bytes" self.gc.bytesAllocated.total += newSize - oldSize when debugStressGC: self.collect() else: if self.gc.bytesAllocated.current >= self.gc.nextGC: self.collect() result = realloc(p, newSize) except NilAccessDefect: stderr.writeLine("Peon: could not manage memory, segmentation fault") quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit template resizeArray(self: var PeonVM, kind: untyped, p: pointer, oldCount, newCount: int): untyped = ## Handy template to resize a dynamic array cast[ptr UncheckedArray[kind]](reallocate(self, p, sizeof(kind) * oldCount, sizeof(kind) * newCount)) template freeArray(self: var PeonVM, kind: untyped, p: pointer, size: int): untyped = ## Frees a dynamic array discard reallocate(self, p, sizeof(kind) * size, 0) template free(self: var PeonVM, kind: typedesc, p: pointer): untyped = ## Frees a pointer by reallocating its ## size to 0 discard reallocate(self, p, sizeof(kind), 0) template setKind[T, K](t: var T, kind: untyped, target: K) = ## Thanks to https://forum.nim-lang.org/t/8312 cast[ptr K](cast[int](addr t) + offsetOf(typeof(t), kind))[] = target proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): ptr HeapObject {.inline.} = ## Allocates an object on the heap and adds its ## location to the internal pointer list of the ## garbage collector result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject))) setkind(result[], kind, kind) result.marked = false case kind: of String: result.str = cast[ptr UncheckedArray[char]](self.reallocate(nil, 0, sizeof(size) * count)) result.len = count else: discard # TODO self.gc.pointers.incl(cast[uint64](result)) when debugAlloc: echo &"DEBUG - GC: Allocated new object: {result[]}" echo &"DEBUG - GC: Current heap size: {self.gc.bytesAllocated.current}" echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}" echo &"DEBUG - GC: Tracked objects: {self.gc.pointers.len()}" echo &"DEBUG - GC: Completed GC cycles: {self.gc.cycles}" proc mark(self: ptr HeapObject): bool = ## Marks a single object if self.marked: return false self.marked = true return true proc markRoots(self: var PeonVM): HashSet[ptr HeapObject] = ## Marks root objects *not* to be ## collected by the GC and returns ## their addresses when debugGC: echo "DEBUG - GC: Starting mark phase" # Unlike what Bob does in his book, we keep track # of objects another way, mainly due to the difference # of our respective designs. Specifically, our VM only # handles a single type (uint64) while Lox stores all objects # in heap-allocated structs (which is convenient, but slow). # What we do is store the pointers to the objects we allocated in # a hash set and then, at collection time, do a set difference # between the reachable objects and the whole set and discard # whatever is left; Unfortunately, this means that if a primitive # object's value happens to collide with an active pointer the GC # will mistakenly assume the object to be reachable, potentially # leading to a nasty memory leak. Let's just hope a 48+ bit address # space makes this occurrence rare enough not to be a problem # handles a single type (uint64), while Lox has a stack # of heap-allocated structs (which is convenient, but slow). # What we do instead is store all pointers allocated by us # in a hash set and then check if any source of roots contained # any of the integer values that we're keeping track of. Note # that this means that if a primitive object's value happens to # collide with an active pointer, the GC will mistakenly assume # the object to be reachable (potentially leading to a nasty # memory leak). Hopefully, in a 64-bit address space, this # occurrence is rare enough for us to ignore var result = initHashSet[uint64](self.gc.pointers.len()) for obj in self.calls: if obj in self.gc.pointers: result.incl(obj) for obj in self.operands: if obj in self.gc.pointers: result.incl(obj) var obj: ptr HeapObject for p in result: obj = cast[ptr HeapObject](p) if obj.mark(): when debugMarkGC: echo &"DEBUG - GC: Marked object: {obj[]}" when debugGC: echo "DEBUG - GC: Mark phase complete" proc trace(self: var PeonVM, roots: HashSet[ptr HeapObject]) = ## Traces references to other ## objects starting from the ## roots. The second argument ## is the output of the mark ## phase. To speak in terms ## of the tricolor abstraction, ## this is where we blacken gray ## objects when debugGC: if len(roots) > 0: echo &"DEBUG - GC: Tracing indirect references from {len(roots)} root{(if len(roots) > 1: \"s\" else: \"\")}" var count = 0 for root in roots: case root.kind: of String: discard # Strings hold no additional references else: discard # TODO: Other types when debugGC: echo &"DEBUG - GC: Traced {count} indirect reference{(if count != 1: \"s\" else: \"\")}" proc free(self: var PeonVM, obj: ptr HeapObject) = ## Frees a single heap-allocated ## peon object and all the memory ## it directly or indirectly owns. Note ## that the pointer itself is not released ## from the GC's internal table and must be ## handled by the caller when debugAlloc: echo &"DEBUG - GC: Freeing object: {obj[]}" case obj.kind: of String: # Strings only own their # underlying character array if obj.len > 0 and not obj.str.isNil(): self.freeArray(char, obj.str, obj.len) else: discard # TODO self.free(HeapObject, obj) when debugAlloc: echo &"DEBUG - GC: Current heap size: {self.gc.bytesAllocated.current}" echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}" echo &"DEBUG - GC: Tracked objects: {self.gc.pointers.len()}" echo &"DEBUG - GC: Completed GC cycles: {self.gc.cycles}" proc sweep(self: var PeonVM) = ## Sweeps unmarked objects ## that have been left behind ## during the mark phase. when debugGC: echo "DEBUG - GC: Beginning sweeping phase" var count = 0 var current: ptr HeapObject var freed: HashSet[uint64] for p in self.gc.pointers: current = cast[ptr HeapObject](p) if current.marked: # Object is marked: don't touch it, # but reset its mark so that it doesn't # stay alive forever when debugMarkGC: echo &"DEBUG - GC: Unmarking object: {current[]}" current.marked = false else: # Object is unmarked: its memory is # fair game self.free(current) freed.incl(p) when debugGC: inc(count) # Set difference self.gc.pointers = self.gc.pointers - freed when debugGC: echo &"DEBUG - GC: Swept {count} object{(if count > 1: \"s\" else: \"\")}" proc collect(self: var PeonVM) = ## Attempts to reclaim some ## memory from unreachable ## objects onto the heap when debugGC: let before = self.gc.bytesAllocated.current let time = getMonoTime().ticks().float() / 1_000_000 echo "" echo &"DEBUG - GC: Starting collection cycle at heap size {self.gc.bytesAllocated.current}" echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}" echo &"DEBUG - GC: Tracked objects: {self.gc.pointers.len()}" echo &"DEBUG - GC: Completed GC cycles: {self.gc.cycles}" inc(self.gc.cycles) self.trace(self.markRoots()) self.sweep() self.gc.nextGC = self.gc.bytesAllocated.current * HeapGrowFactor if self.gc.nextGC == 0: self.gc.nextGC = FirstGC when debugGC: echo &"DEBUG - GC: Collection cycle has terminated in {getMonoTime().ticks().float() / 1_000_000 - time:.2f} ms, collected {before - self.gc.bytesAllocated.current} bytes of memory in total" echo &"DEBUG - GC: Next cycle at {self.gc.nextGC} bytes" echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}" echo &"DEBUG - GC: Tracked objects: {self.gc.pointers.len()}" echo &"DEBUG - GC: Completed GC cycles: {self.gc.cycles}" # Implementation of the peon VM proc initCache*(self: var PeonVM) = ## Initializes the VM's ## singletons cache self.cache[0] = 0x0 # False self.cache[1] = 0x1 # True self.cache[2] = 0x2 # Nil self.cache[3] = 0x3 # Positive inf self.cache[4] = 0x4 # Negative inf self.cache[5] = 0x5 # NaN proc newPeonVM*: PeonVM = ## Initializes a new, blank VM ## for executing Peon bytecode result.ip = 0 result.initCache() result.gc = newPeonGC() result.frames = @[] result.operands = @[] result.results = @[] result.calls = @[] # Getters for singleton types {.push inline.} func getNil*(self: var PeonVM): uint64 = self.cache[2] func getBool*(self: var PeonVM, value: bool): uint64 = if value: return self.cache[1] return self.cache[0] func getInf*(self: var PeonVM, positive: bool): uint64 = if positive: return self.cache[3] return self.cache[4] func getNan*(self: var PeonVM): uint64 = self.cache[5] # Thanks to nim's *genius* idea of making x > y a template # for y < x (which by itself is fine) together with the fact # that the order of evaluation of templates with the same # expression is fucking stupid (see https://nim-lang.org/docs/manual.html#order-of-evaluation # and https://github.com/nim-lang/Nim/issues/10425 and try not to # bang your head against the nearest wall), we need a custom operator # that preserves the natural order of evaluation func `!>`[T](a, b: T): auto = b < a proc `!>=`[T](a, b: T): auto {.used.} = b <= a # Stack primitives. Note that all accesses to the call stack # that go through the (get|set|peek)c wrappers are frame-relative, # meaning that the given index is added to the current stack frame's # bottom to obtain an absolute stack index func push(self: var PeonVM, obj: uint64) = ## Pushes a value object onto the ## operand stack self.operands.add(obj) func pop(self: var PeonVM): uint64 = ## Pops a value off the operand ## stack and returns it return self.operands.pop() func peekb(self: PeonVM, distance: BackwardsIndex = ^1): uint64 = ## Returns the value at the given (backwards) ## distance from the top of the operand stack ## without consuming it return self.operands[distance] func peek(self: PeonVM, distance: int = 0): uint64 = ## Returns the value at the given ## distance from the top of the ## operand stack without consuming it if distance < 0: return self.peekb(^(-int(distance))) return self.operands[self.operands.high() + distance] func pushc(self: var PeonVM, val: uint64) = ## Pushes a value onto the ## call stack self.calls.add(val) func popc(self: var PeonVM): uint64 = ## Pops a value off the call ## stack and returns it return self.calls.pop() func peekc(self: PeonVM, distance: int = 0): uint64 {.used.} = ## Returns the value at the given ## distance from the top of the ## call stack without consuming it return self.calls[self.calls.high() + distance] func getc(self: PeonVM, idx: int): uint64 = ## Getter method that abstracts ## indexing our call stack through ## stack frames return self.calls[idx.uint64 + self.frames[^1]] func setc(self: var PeonVM, idx: int, val: uint64) = ## Setter method that abstracts ## indexing our call stack through ## stack frames self.calls[idx.uint + self.frames[^1]] = val # Byte-level primitives to read and decode # bytecode proc readByte(self: var PeonVM): uint8 = ## Reads a single byte from the ## bytecode and returns it as an ## unsigned 8 bit integer inc(self.ip) return self.chunk.code[self.ip - 1] proc readShort(self: var PeonVM): uint16 = ## Reads two bytes from the ## bytecode and returns them ## as an unsigned 16 bit ## integer return [self.readByte(), self.readByte()].fromDouble() proc readLong(self: var PeonVM): uint32 = ## Reads three bytes from the ## bytecode and returns them ## as an unsigned 32 bit ## integer. Note however that ## the boundary is capped at ## 24 bits instead of 32 return uint32([self.readByte(), self.readByte(), self.readByte()].fromTriple()) proc readUInt(self: var PeonVM): uint32 {.used.} = ## Reads three bytes from the ## bytecode and returns them ## as an unsigned 32 bit ## integer return uint32([self.readByte(), self.readByte(), self.readByte(), self.readByte()].fromQuad()) # Functions to read primitives from the chunk's # constants table proc constReadInt64(self: var PeonVM, idx: int): int64 = ## Reads a constant from the ## chunk's constant table and ## returns it as an int64 var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], self.chunk.consts[idx + 2], self.chunk.consts[idx + 3], self.chunk.consts[idx + 4], self.chunk.consts[idx + 5], self.chunk.consts[idx + 6], self.chunk.consts[idx + 7], ] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadUInt64(self: var PeonVM, idx: int): uint64 = ## Reads a constant from the ## chunk's constant table and ## returns it as an uint64 var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], self.chunk.consts[idx + 2], self.chunk.consts[idx + 3], self.chunk.consts[idx + 4], self.chunk.consts[idx + 5], self.chunk.consts[idx + 6], self.chunk.consts[idx + 7], ] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadUInt32(self: var PeonVM, idx: int): uint32 = ## Reads a constant from the ## chunk's constant table and ## returns it as an int32 var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadInt32(self: var PeonVM, idx: int): int32 = ## Reads a constant from the ## chunk's constant table and ## returns it as an uint32 var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadInt16(self: var PeonVM, idx: int): int16 = ## Reads a constant from the ## chunk's constant table and ## returns it as an int16 var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1]] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadUInt16(self: var PeonVM, idx: int): uint16 = ## Reads a constant from the ## chunk's constant table and ## returns it as an uint16 var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1]] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadInt8(self: var PeonVM, idx: int): int8 = ## Reads a constant from the ## chunk's constant table and ## returns it as an int8 result = int8(self.chunk.consts[idx]) proc constReadUInt8(self: var PeonVM, idx: int): uint8 = ## Reads a constant from the ## chunk's constant table and ## returns it as an uint8 result = self.chunk.consts[idx] proc constReadFloat32(self: var PeonVM, idx: int): float32 = ## Reads a constant from the ## chunk's constant table and ## returns it as a float32 var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadFloat64(self: var PeonVM, idx: int): float = ## Reads a constant from the ## chunk's constant table and ## returns it as a float var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], self.chunk.consts[idx + 2], self.chunk.consts[idx + 3], self.chunk.consts[idx + 4], self.chunk.consts[idx + 5], self.chunk.consts[idx + 6], self.chunk.consts[idx + 7]] copyMem(result.addr, arr.addr, sizeof(arr)) proc constReadString(self: var PeonVM, size, idx: int): ptr HeapObject = ## Reads a constant from the ## chunk's constant table and ## returns it as a pointer to ## a heap-allocated string let str = self.chunk.consts[idx.. ") stdout.flushFile() try: command = readLine(stdin) except EOFError: styledEcho(fgYellow, "Use Ctrl+C to exit") clearerr(stdin) break except IOError: styledEcho(fgRed, "An error occurred while reading command: ", fgYellow, getCurrentExceptionMsg()) break if command == "": if self.lastDebugCommand == "": command = "n" else: command = self.lastDebugCommand case command: of "n", "next": self.debugNext = true break of "c", "continue": self.debugNext = false break of "s", "stack": stdout.styledWrite(fgGreen, "Call Stack: ", fgMagenta, "[") for i, e in self.calls: stdout.styledWrite(fgYellow, $e) if i < self.calls.high(): stdout.styledWrite(fgYellow, ", ") styledEcho fgMagenta, "]" of "o", "operands": stdout.styledWrite(fgBlue, "Operand Stack: ", fgMagenta, "[") for i, e in self.operands: stdout.styledWrite(fgYellow, $e) if i < self.operands.high(): stdout.styledWrite(fgYellow, ", ") styledEcho fgMagenta, "]" of "f", "frame": stdout.styledWrite(fgCyan, "Current Frame: ", fgMagenta, "[") if self.frames.len() > 0: for i, e in self.calls[self.frames[^1]..^1]: stdout.styledWrite(fgYellow, $e) if i < (self.calls.high() - self.frames[^1].int): stdout.styledWrite(fgYellow, ", ") styledEcho fgMagenta, "]", fgCyan of "frames": stdout.styledWrite(fgRed, "Live stack frames: ", fgMagenta, "[") for i, e in self.frames: stdout.styledWrite(fgYellow, $e) if i < self.frames.high(): stdout.styledWrite(fgYellow, ", ") styledEcho fgMagenta, "]" of "r", "results": stdout.styledWrite(fgYellow, "Function Results: ", fgMagenta, "[") for i, e in self.results: stdout.styledWrite(fgYellow, $e) if i < self.results.high(): stdout.styledWrite(fgYellow, ", ") styledEcho fgMagenta, "]" of "clear": stdout.write("\x1Bc") else: styledEcho(fgRed, "Unknown command ", fgYellow, &"'{command}'") proc dispatch*(self: var PeonVM) = ## Main bytecode dispatch loop var instruction {.register.}: OpCode while true: {.computedgoto.} # https://nim-lang.org/docs/manual.html#pragmas-computedgoto-pragma when debugVM: if self.ip in self.breakpoints or self.debugNext: self.debug() instruction = OpCode(self.readByte()) case instruction: # Constant loading instructions of LoadTrue: self.push(self.getBool(true)) of LoadFalse: self.push(self.getBool(false)) of LoadNan: self.push(self.getNan()) of LoadNil: self.push(self.getNil()) of LoadInf: self.push(self.getInf(true)) of LoadNInf: self.push(self.getInf(false)) of LoadInt64: self.push(uint64(self.constReadInt64(int(self.readLong())))) of LoadUInt64: self.push(uint64(self.constReadUInt64(int(self.readLong())))) of LoadUInt32: self.push(uint64(self.constReadUInt32(int(self.readLong())))) of LoadInt32: self.push(uint64(self.constReadInt32(int(self.readLong())))) of LoadInt16: self.push(uint64(self.constReadInt16(int(self.readLong())))) of LoadUInt16: self.push(uint64(self.constReadUInt16(int(self.readLong())))) of LoadInt8: self.push(uint64(self.constReadInt8(int(self.readLong())))) of LoadUInt8: self.push(uint64(self.constReadUInt8(int(self.readLong())))) of LoadString: # Loads the string's pointer onto the stack self.push(cast[uint64](self.constReadString(int(self.readLong()), int(self.readLong())))) of LoadFloat32: self.push(cast[uint64](self.constReadFloat32(int(self.readLong())))) of LoadFloat64: self.push(cast[uint64](self.constReadFloat64(int(self.readLong())))) of Call: # Calls a peon function. The calling convention here # is pretty simple: the first value in the frame is # the new instruction pointer to jump to, then a # 64-bit return address follows. After that, all # arguments and locals follow. Note that, due to # how the stack works, all arguments before the call # are in the reverse order in which they are passed # to the function let argc = self.readLong().int let retAddr = self.peek(-argc - 1) # Return address let jmpAddr = self.peek(-argc - 2) # Function address self.ip = jmpAddr self.pushc(jmpAddr) self.pushc(retAddr) # Creates a new result slot for the # function's return value self.results.add(self.getNil()) # Creates a new call frame self.frames.add(uint64(self.calls.len() - 2)) # Loads the arguments onto the stack for _ in 0..