Added pointer tagging to GC to resolve potential memory leak
This commit is contained in:
parent
f2dbfcbd0a
commit
f4752a9f7c
|
@ -111,9 +111,19 @@ proc newPeonGC*: PeonGC =
|
|||
proc collect*(self: var PeonVM)
|
||||
|
||||
|
||||
# Our pointer tagging routines
|
||||
template tag(p: untyped): untyped = cast[pointer](cast[uint64](p) or (1'u64 shl 63'u64))
|
||||
template untag(p: untyped): untyped = cast[pointer](cast[uint64](p) and 0x7fffffffffffffff'u64)
|
||||
template getTag(p: untyped): untyped = (p and (1'u64 shl 63'u64)) == 0
|
||||
|
||||
|
||||
proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): pointer =
|
||||
## Simple wrapper around realloc with
|
||||
## built-in garbage collection
|
||||
## built-in garbage collection. Callers
|
||||
## should keep in mind that the returned
|
||||
## pointer is tagged (bit 63 is set to 1)
|
||||
## and should be passed to untag() before
|
||||
## being dereferenced or otherwise used
|
||||
self.gc.bytesAllocated.current += newSize - oldSize
|
||||
try:
|
||||
when debugMem:
|
||||
|
@ -137,7 +147,7 @@ proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): poin
|
|||
else:
|
||||
if self.gc.bytesAllocated.current >= self.gc.nextGC:
|
||||
self.collect()
|
||||
result = realloc(p, newSize)
|
||||
result = tag(realloc(untag(p), newSize))
|
||||
except NilAccessDefect:
|
||||
stderr.writeLine("Peon: could not manage memory, segmentation fault")
|
||||
quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit
|
||||
|
@ -168,12 +178,12 @@ proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): p
|
|||
## Allocates an object on the heap and adds its
|
||||
## location to the internal pointer list of the
|
||||
## garbage collector
|
||||
result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject)))
|
||||
result = cast[ptr HeapObject](untag(self.reallocate(nil, 0, sizeof(HeapObject))))
|
||||
setkind(result[], kind, kind)
|
||||
result.marked = false
|
||||
case kind:
|
||||
of String:
|
||||
result.str = cast[ptr UncheckedArray[char]](self.reallocate(nil, 0, sizeof(size) * count))
|
||||
result.str = cast[ptr UncheckedArray[char]](untag(self.reallocate(nil, 0, sizeof(size) * count)))
|
||||
result.len = count
|
||||
else:
|
||||
discard # TODO
|
||||
|
@ -200,32 +210,31 @@ proc markRoots(self: var PeonVM): HashSet[ptr HeapObject] =
|
|||
## their addresses
|
||||
when debugGC:
|
||||
echo "DEBUG - GC: Starting mark phase"
|
||||
# Unlike what Bob does in his book,
|
||||
# we keep track of objects in a different
|
||||
# way due to the difference of our design.
|
||||
# Specifically, we don't have neat structs for
|
||||
# all peon objects: When we allocate() an object,
|
||||
# we keep track of the small wrapper it created
|
||||
# along with its type and other metadata. Then,
|
||||
# we can go through the various sources of roots
|
||||
# in the VM, see if they match any pointers we
|
||||
# already know about (we store them in a hash set so
|
||||
# it's really fast), and then we can be sure that
|
||||
# anything that's in the difference (i.e. mathematical
|
||||
# set difference) between our full list of pointers
|
||||
# and the live ones is not a root object, so if it's
|
||||
# not indirectly reachable through a root itself, it
|
||||
# can be freed. I'm not sure if I can call this GC
|
||||
# strategy precise, since technically there is a chance
|
||||
# for a regular value to collide with one of the pointers
|
||||
# we allocated and that would cause a memory leak, but
|
||||
# with a 64-bit address-space it probably hardly matters,
|
||||
# so I guess this is a mostly-precise Mark&Sweep collector
|
||||
# Unlike what Bob does in his book, we keep track
|
||||
# of objects another way, mainly due to the difference
|
||||
# of our respective designs. Specifically, our VM only
|
||||
# handles a single type (uint64), while Lox has a stack
|
||||
# of heap-allocated structs (which is convenient, but slow).
|
||||
# The previous implementation would just store all pointers
|
||||
# allocated by us in a hash set and then check if any source
|
||||
# of roots contained any of the integer values that it was
|
||||
# keeping track of, but this meant that if a primitive object's
|
||||
# value happened to collide with an active pointer the GC would
|
||||
# mistakenly assume the object was reachable, potentially leading
|
||||
# to a nasty memory leak. The current implementation uses pointer
|
||||
# tagging: we know that modern CPUs never use bit 63 in addresses,
|
||||
# so if it set we know it cannot be a pointer, and if it is set we
|
||||
# just need to check if it's in our list of active addresses or not.
|
||||
# This should resolve the potential memory leak (hopefully)
|
||||
var result = initHashSet[uint64](self.gc.pointers.len())
|
||||
for obj in self.calls:
|
||||
if not obj.getTag():
|
||||
continue
|
||||
if obj in self.gc.pointers:
|
||||
result.incl(obj)
|
||||
for obj in self.operands:
|
||||
if not obj.getTag():
|
||||
continue
|
||||
if obj in self.gc.pointers:
|
||||
result.incl(obj)
|
||||
var obj: ptr HeapObject
|
||||
|
|
Loading…
Reference in New Issue