Added pointer tagging to GC to resolve potential memory leak

This commit is contained in:
Mattia Giambirtone 2023-02-25 17:35:57 +01:00
parent f2dbfcbd0a
commit f4752a9f7c
1 changed files with 34 additions and 25 deletions

View File

@ -111,9 +111,19 @@ proc newPeonGC*: PeonGC =
proc collect*(self: var PeonVM)
# Our pointer tagging routines
template tag(p: untyped): untyped = cast[pointer](cast[uint64](p) or (1'u64 shl 63'u64))
template untag(p: untyped): untyped = cast[pointer](cast[uint64](p) and 0x7fffffffffffffff'u64)
template getTag(p: untyped): untyped = (p and (1'u64 shl 63'u64)) == 0
proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): pointer =
## Simple wrapper around realloc with
## built-in garbage collection
## built-in garbage collection. Callers
## should keep in mind that the returned
## pointer is tagged (bit 63 is set to 1)
## and should be passed to untag() before
## being dereferenced or otherwise used
self.gc.bytesAllocated.current += newSize - oldSize
try:
when debugMem:
@ -137,7 +147,7 @@ proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): poin
else:
if self.gc.bytesAllocated.current >= self.gc.nextGC:
self.collect()
result = realloc(p, newSize)
result = tag(realloc(untag(p), newSize))
except NilAccessDefect:
stderr.writeLine("Peon: could not manage memory, segmentation fault")
quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit
@ -168,12 +178,12 @@ proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): p
## Allocates an object on the heap and adds its
## location to the internal pointer list of the
## garbage collector
result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject)))
result = cast[ptr HeapObject](untag(self.reallocate(nil, 0, sizeof(HeapObject))))
setkind(result[], kind, kind)
result.marked = false
case kind:
of String:
result.str = cast[ptr UncheckedArray[char]](self.reallocate(nil, 0, sizeof(size) * count))
result.str = cast[ptr UncheckedArray[char]](untag(self.reallocate(nil, 0, sizeof(size) * count)))
result.len = count
else:
discard # TODO
@ -200,32 +210,31 @@ proc markRoots(self: var PeonVM): HashSet[ptr HeapObject] =
## their addresses
when debugGC:
echo "DEBUG - GC: Starting mark phase"
# Unlike what Bob does in his book,
# we keep track of objects in a different
# way due to the difference of our design.
# Specifically, we don't have neat structs for
# all peon objects: When we allocate() an object,
# we keep track of the small wrapper it created
# along with its type and other metadata. Then,
# we can go through the various sources of roots
# in the VM, see if they match any pointers we
# already know about (we store them in a hash set so
# it's really fast), and then we can be sure that
# anything that's in the difference (i.e. mathematical
# set difference) between our full list of pointers
# and the live ones is not a root object, so if it's
# not indirectly reachable through a root itself, it
# can be freed. I'm not sure if I can call this GC
# strategy precise, since technically there is a chance
# for a regular value to collide with one of the pointers
# we allocated and that would cause a memory leak, but
# with a 64-bit address-space it probably hardly matters,
# so I guess this is a mostly-precise Mark&Sweep collector
# Unlike what Bob does in his book, we keep track
# of objects another way, mainly due to the difference
# of our respective designs. Specifically, our VM only
# handles a single type (uint64), while Lox has a stack
# of heap-allocated structs (which is convenient, but slow).
# The previous implementation would just store all pointers
# allocated by us in a hash set and then check if any source
# of roots contained any of the integer values that it was
# keeping track of, but this meant that if a primitive object's
# value happened to collide with an active pointer the GC would
# mistakenly assume the object was reachable, potentially leading
# to a nasty memory leak. The current implementation uses pointer
# tagging: we know that modern CPUs never use bit 63 in addresses,
# so if it set we know it cannot be a pointer, and if it is set we
# just need to check if it's in our list of active addresses or not.
# This should resolve the potential memory leak (hopefully)
var result = initHashSet[uint64](self.gc.pointers.len())
for obj in self.calls:
if not obj.getTag():
continue
if obj in self.gc.pointers:
result.incl(obj)
for obj in self.operands:
if not obj.getTag():
continue
if obj in self.gc.pointers:
result.incl(obj)
var obj: ptr HeapObject