diff --git a/src/backend/vm.nim b/src/backend/vm.nim index 66d15ce..be312d6 100644 --- a/src/backend/vm.nim +++ b/src/backend/vm.nim @@ -111,9 +111,19 @@ proc newPeonGC*: PeonGC = proc collect*(self: var PeonVM) +# Our pointer tagging routines +template tag(p: untyped): untyped = cast[pointer](cast[uint64](p) or (1'u64 shl 63'u64)) +template untag(p: untyped): untyped = cast[pointer](cast[uint64](p) and 0x7fffffffffffffff'u64) +template getTag(p: untyped): untyped = (p and (1'u64 shl 63'u64)) == 0 + + proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): pointer = ## Simple wrapper around realloc with - ## built-in garbage collection + ## built-in garbage collection. Callers + ## should keep in mind that the returned + ## pointer is tagged (bit 63 is set to 1) + ## and should be passed to untag() before + ## being dereferenced or otherwise used self.gc.bytesAllocated.current += newSize - oldSize try: when debugMem: @@ -137,7 +147,7 @@ proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): poin else: if self.gc.bytesAllocated.current >= self.gc.nextGC: self.collect() - result = realloc(p, newSize) + result = tag(realloc(untag(p), newSize)) except NilAccessDefect: stderr.writeLine("Peon: could not manage memory, segmentation fault") quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit @@ -168,12 +178,12 @@ proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): p ## Allocates an object on the heap and adds its ## location to the internal pointer list of the ## garbage collector - result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject))) + result = cast[ptr HeapObject](untag(self.reallocate(nil, 0, sizeof(HeapObject)))) setkind(result[], kind, kind) result.marked = false case kind: of String: - result.str = cast[ptr UncheckedArray[char]](self.reallocate(nil, 0, sizeof(size) * count)) + result.str = cast[ptr UncheckedArray[char]](untag(self.reallocate(nil, 0, sizeof(size) * count))) result.len = count else: discard # TODO @@ -200,32 +210,31 @@ proc markRoots(self: var PeonVM): HashSet[ptr HeapObject] = ## their addresses when debugGC: echo "DEBUG - GC: Starting mark phase" - # Unlike what Bob does in his book, - # we keep track of objects in a different - # way due to the difference of our design. - # Specifically, we don't have neat structs for - # all peon objects: When we allocate() an object, - # we keep track of the small wrapper it created - # along with its type and other metadata. Then, - # we can go through the various sources of roots - # in the VM, see if they match any pointers we - # already know about (we store them in a hash set so - # it's really fast), and then we can be sure that - # anything that's in the difference (i.e. mathematical - # set difference) between our full list of pointers - # and the live ones is not a root object, so if it's - # not indirectly reachable through a root itself, it - # can be freed. I'm not sure if I can call this GC - # strategy precise, since technically there is a chance - # for a regular value to collide with one of the pointers - # we allocated and that would cause a memory leak, but - # with a 64-bit address-space it probably hardly matters, - # so I guess this is a mostly-precise Mark&Sweep collector + # Unlike what Bob does in his book, we keep track + # of objects another way, mainly due to the difference + # of our respective designs. Specifically, our VM only + # handles a single type (uint64), while Lox has a stack + # of heap-allocated structs (which is convenient, but slow). + # The previous implementation would just store all pointers + # allocated by us in a hash set and then check if any source + # of roots contained any of the integer values that it was + # keeping track of, but this meant that if a primitive object's + # value happened to collide with an active pointer the GC would + # mistakenly assume the object was reachable, potentially leading + # to a nasty memory leak. The current implementation uses pointer + # tagging: we know that modern CPUs never use bit 63 in addresses, + # so if it set we know it cannot be a pointer, and if it is set we + # just need to check if it's in our list of active addresses or not. + # This should resolve the potential memory leak (hopefully) var result = initHashSet[uint64](self.gc.pointers.len()) for obj in self.calls: + if not obj.getTag(): + continue if obj in self.gc.pointers: result.incl(obj) for obj in self.operands: + if not obj.getTag(): + continue if obj in self.gc.pointers: result.incl(obj) var obj: ptr HeapObject