Simplified GC implementation by removing unnecessary metadata

This commit is contained in:
Mattia Giambirtone 2023-02-25 16:17:38 +01:00
parent ae819daac4
commit f2dbfcbd0a
3 changed files with 53 additions and 48 deletions

View File

@ -71,7 +71,6 @@ type
cycles: int
nextGC: int
pointers: HashSet[uint64]
objects: seq[ptr HeapObject]
PeonVM* = object
## The Peon Virtual Machine.
## Note how the only data
@ -105,7 +104,6 @@ proc newPeonGC*: PeonGC =
## Initializes a new, blank
## garbage collector
result.bytesAllocated = (0, 0)
result.objects = @[]
result.nextGC = FirstGC
result.cycles = 0
@ -137,7 +135,7 @@ proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): poin
when debugStressGC:
if self.gc.bytesAllocated.current > self.gc.nextGC:
if self.gc.bytesAllocated.current >= self.gc.nextGC:
result = realloc(p, newSize)
except NilAccessDefect:
@ -167,7 +165,9 @@ template setKind[T, K](t: var T, kind: untyped, target: K) =
proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): ptr HeapObject {.inline.} =
## Allocates an object on the heap
## Allocates an object on the heap and adds its
## location to the internal pointer list of the
## garbage collector
result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject)))
setkind(result[], kind, kind)
result.marked = false
@ -177,7 +177,6 @@ proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): p
result.len = count
discard # TODO
when debugAlloc:
echo &"DEBUG - GC: Allocated new object: {result[]}"
@ -195,13 +194,13 @@ proc mark(self: ptr HeapObject): bool =
return true
proc markRoots(self: var PeonVM): seq[ptr HeapObject] =
proc markRoots(self: var PeonVM): HashSet[ptr HeapObject] =
## Marks root objects *not* to be
## collected by the GC and returns
## their addresses
when debugGC:
echo "DEBUG - GC: Starting mark phase"
# Unlike what bob does in his book,
# Unlike what Bob does in his book,
# we keep track of objects in a different
# way due to the difference of our design.
# Specifically, we don't have neat structs for
@ -222,27 +221,24 @@ proc markRoots(self: var PeonVM): seq[ptr HeapObject] =
# we allocated and that would cause a memory leak, but
# with a 64-bit address-space it probably hardly matters,
# so I guess this is a mostly-precise Mark&Sweep collector
var live = initHashSet[uint64](self.gc.pointers.len())
var result = initHashSet[uint64](self.gc.pointers.len())
for obj in self.calls:
if obj in self.gc.pointers:
for obj in self.operands:
if obj in self.gc.pointers:
# We preallocate the space on the seq
result = newSeqOfCap[ptr HeapObject](len(live))
var obj: ptr HeapObject
for p in live:
for p in result:
obj = cast[ptr HeapObject](p)
if obj.mark():
when debugGC:
when debugMarkGC:
echo &"DEBUG - GC: Marked object: {obj[]}"
when debugGC:
echo "DEBUG - GC: Mark phase complete"
proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) =
proc trace(self: var PeonVM, roots: HashSet[ptr HeapObject]) =
## Traces references to other
## objects starting from the
## roots. The second argument
@ -252,7 +248,8 @@ proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) =
## this is where we blacken gray
## objects
when debugGC:
echo &"DEBUG - GC: Tracing indirect references from {len(roots)} roots"
if len(roots) > 0:
echo &"DEBUG - GC: Tracing indirect references from {len(roots)} root{(if len(roots) > 1: \"s\" else: \"\")}"
var count = 0
for root in roots:
case root.kind:
@ -261,13 +258,16 @@ proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) =
discard # TODO: Other types
when debugGC:
echo &"DEBUG - GC: Traced {count} indirect references"
echo &"DEBUG - GC: Traced {count} indirect reference{(if count != 1: \"s\" else: \"\")}"
proc free(self: var PeonVM, obj: ptr HeapObject) =
## Frees a single heap-allocated
## peon object and all the memory
## it directly or indirectly owns
## it directly or indirectly owns. Note
## that the pointer itself is not released
## from the GC's internal table and must be
## handled by the caller
when debugAlloc:
echo &"DEBUG - GC: Freeing object: {obj[]}"
case obj.kind:
@ -279,7 +279,6 @@ proc free(self: var PeonVM, obj: ptr HeapObject) =
discard # TODO, obj)
when debugAlloc:
echo &"DEBUG - GC: Current heap size: {self.gc.bytesAllocated.current}"
echo &"DEBUG - GC: Total bytes allocated: {}"
@ -291,37 +290,32 @@ proc sweep(self: var PeonVM) =
## Sweeps unmarked objects
## that have been left behind
## during the mark phase.
## This is more convoluted
## than it needs to be because
## nim disallows changing the
## size of a sequence during
## iteration
when debugGC:
echo "DEBUG - GC: Beginning sweeping phase"
var j = -1
var idx = 0
when debugGC:
var count = 0
while j < self.gc.objects.high():
if self.gc.objects[j].marked:
var current: ptr HeapObject
var freed: HashSet[uint64]
for p in self.gc.pointers:
current = cast[ptr HeapObject](p)
if current.marked:
# Object is marked: don't touch it,
# but reset its mark so that it doesn't
# stay alive forever
when debugGC:
echo &"DEBUG - GC: Unmarking object: {self.gc.objects[j][]}"
self.gc.objects[j].marked = false
when debugMarkGC:
echo &"DEBUG - GC: Unmarking object: {current[]}"
current.marked = false
# Object is unmarked: its memory is
# fair game[idx])
when debugGC:
# Set difference
self.gc.pointers = self.gc.pointers - freed
when debugGC:
echo &"DEBUG - GC: Swept {count} objects"
echo &"DEBUG - GC: Swept {count} object{(if count > 1: \"s\" else: \"\")}"
proc collect(self: var PeonVM) =
@ -331,6 +325,7 @@ proc collect(self: var PeonVM) =
when debugGC:
let before = self.gc.bytesAllocated.current
let time = getMonoTime().ticks().float() / 1_000_000
echo ""
echo &"DEBUG - GC: Starting collection cycle at heap size {self.gc.bytesAllocated.current}"
echo &"DEBUG - GC: Total bytes allocated: {}"
echo &"DEBUG - GC: Tracked objects: {self.gc.pointers.len()}"
@ -339,6 +334,8 @@ proc collect(self: var PeonVM) =
self.gc.nextGC = self.gc.bytesAllocated.current * HeapGrowFactor
if self.gc.nextGC == 0:
self.gc.nextGC = FirstGC
when debugGC:
echo &"DEBUG - GC: Collection cycle has terminated in {getMonoTime().ticks().float() / 1_000_000 - time:.2f} ms, collected {before - self.gc.bytesAllocated.current} bytes of memory in total"
echo &"DEBUG - GC: Next cycle at {self.gc.nextGC} bytes"

View File

@ -24,6 +24,7 @@ const debugAlloc* {.booldefine.} = false # Trace object allocation (extrem
const debugMem* {.booldefine.} = false # Debug the memory allocator (extremely verbose)
const debugSerializer* {.booldefine.} = false # Validate the bytecode serializer's output
const debugStressGC* {.booldefine.} = false # Make the GC run a collection at every allocation (VERY SLOW!)
const debugMarkGC* {.booldefine.} = false # Trace the marking phase object by object (extremely verbose)
const PeonBytecodeMarker* = "PEON_BYTECODE" # Magic value at the beginning of bytecode files
const HeapGrowFactor* = 2 # The growth factor used by the GC to schedule the next collection
const FirstGC* = 1024 * 1024; # How many bytes to allocate before running the first GC

View File

@ -1,17 +1,24 @@
import std;
var x = 10000000;
var y = "just a test";
const max = 50000;
var x = max;
var s = "just a test";
print("Starting GC torture test");
while x > 0 {
x = x - 1;
y = "test";
x = max;
while x > 0 {
x = x - 1;
s = "test";
"end of the world";