diff --git a/src/backend/vm.nim b/src/backend/vm.nim index 6ed6b65..66d15ce 100644 --- a/src/backend/vm.nim +++ b/src/backend/vm.nim @@ -71,7 +71,6 @@ type cycles: int nextGC: int pointers: HashSet[uint64] - objects: seq[ptr HeapObject] PeonVM* = object ## The Peon Virtual Machine. ## Note how the only data @@ -105,7 +104,6 @@ proc newPeonGC*: PeonGC = ## Initializes a new, blank ## garbage collector result.bytesAllocated = (0, 0) - result.objects = @[] result.nextGC = FirstGC result.cycles = 0 @@ -137,7 +135,7 @@ proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): poin when debugStressGC: self.collect() else: - if self.gc.bytesAllocated.current > self.gc.nextGC: + if self.gc.bytesAllocated.current >= self.gc.nextGC: self.collect() result = realloc(p, newSize) except NilAccessDefect: @@ -167,7 +165,9 @@ template setKind[T, K](t: var T, kind: untyped, target: K) = proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): ptr HeapObject {.inline.} = - ## Allocates an object on the heap + ## Allocates an object on the heap and adds its + ## location to the internal pointer list of the + ## garbage collector result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject))) setkind(result[], kind, kind) result.marked = false @@ -177,7 +177,6 @@ proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): p result.len = count else: discard # TODO - self.gc.objects.add(result) self.gc.pointers.incl(cast[uint64](result)) when debugAlloc: echo &"DEBUG - GC: Allocated new object: {result[]}" @@ -195,13 +194,13 @@ proc mark(self: ptr HeapObject): bool = return true -proc markRoots(self: var PeonVM): seq[ptr HeapObject] = +proc markRoots(self: var PeonVM): HashSet[ptr HeapObject] = ## Marks root objects *not* to be ## collected by the GC and returns ## their addresses when debugGC: echo "DEBUG - GC: Starting mark phase" - # Unlike what bob does in his book, + # Unlike what Bob does in his book, # we keep track of objects in a different # way due to the difference of our design. # Specifically, we don't have neat structs for @@ -222,27 +221,24 @@ proc markRoots(self: var PeonVM): seq[ptr HeapObject] = # we allocated and that would cause a memory leak, but # with a 64-bit address-space it probably hardly matters, # so I guess this is a mostly-precise Mark&Sweep collector - var live = initHashSet[uint64](self.gc.pointers.len()) + var result = initHashSet[uint64](self.gc.pointers.len()) for obj in self.calls: if obj in self.gc.pointers: - live.incl(obj) + result.incl(obj) for obj in self.operands: if obj in self.gc.pointers: - live.incl(obj) - # We preallocate the space on the seq - result = newSeqOfCap[ptr HeapObject](len(live)) + result.incl(obj) var obj: ptr HeapObject - for p in live: + for p in result: obj = cast[ptr HeapObject](p) if obj.mark(): - result.add(obj) - when debugGC: + when debugMarkGC: echo &"DEBUG - GC: Marked object: {obj[]}" when debugGC: echo "DEBUG - GC: Mark phase complete" -proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) = +proc trace(self: var PeonVM, roots: HashSet[ptr HeapObject]) = ## Traces references to other ## objects starting from the ## roots. The second argument @@ -252,7 +248,8 @@ proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) = ## this is where we blacken gray ## objects when debugGC: - echo &"DEBUG - GC: Tracing indirect references from {len(roots)} roots" + if len(roots) > 0: + echo &"DEBUG - GC: Tracing indirect references from {len(roots)} root{(if len(roots) > 1: \"s\" else: \"\")}" var count = 0 for root in roots: case root.kind: @@ -261,13 +258,16 @@ proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) = else: discard # TODO: Other types when debugGC: - echo &"DEBUG - GC: Traced {count} indirect references" + echo &"DEBUG - GC: Traced {count} indirect reference{(if count != 1: \"s\" else: \"\")}" proc free(self: var PeonVM, obj: ptr HeapObject) = ## Frees a single heap-allocated ## peon object and all the memory - ## it directly or indirectly owns + ## it directly or indirectly owns. Note + ## that the pointer itself is not released + ## from the GC's internal table and must be + ## handled by the caller when debugAlloc: echo &"DEBUG - GC: Freeing object: {obj[]}" case obj.kind: @@ -279,7 +279,6 @@ proc free(self: var PeonVM, obj: ptr HeapObject) = else: discard # TODO self.free(HeapObject, obj) - self.gc.pointers.excl(cast[uint64](obj)) when debugAlloc: echo &"DEBUG - GC: Current heap size: {self.gc.bytesAllocated.current}" echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}" @@ -291,37 +290,32 @@ proc sweep(self: var PeonVM) = ## Sweeps unmarked objects ## that have been left behind ## during the mark phase. - ## This is more convoluted - ## than it needs to be because - ## nim disallows changing the - ## size of a sequence during - ## iteration when debugGC: echo "DEBUG - GC: Beginning sweeping phase" - var j = -1 - var idx = 0 when debugGC: var count = 0 - while j < self.gc.objects.high(): - inc(j) - if self.gc.objects[j].marked: + var current: ptr HeapObject + var freed: HashSet[uint64] + for p in self.gc.pointers: + current = cast[ptr HeapObject](p) + if current.marked: # Object is marked: don't touch it, # but reset its mark so that it doesn't # stay alive forever - when debugGC: - echo &"DEBUG - GC: Unmarking object: {self.gc.objects[j][]}" - self.gc.objects[j].marked = false - inc(idx) + when debugMarkGC: + echo &"DEBUG - GC: Unmarking object: {current[]}" + current.marked = false else: # Object is unmarked: its memory is # fair game - self.free(self.gc.objects[idx]) - self.gc.objects.delete(idx) - inc(idx) + self.free(current) + freed.incl(p) when debugGC: inc(count) + # Set difference + self.gc.pointers = self.gc.pointers - freed when debugGC: - echo &"DEBUG - GC: Swept {count} objects" + echo &"DEBUG - GC: Swept {count} object{(if count > 1: \"s\" else: \"\")}" proc collect(self: var PeonVM) = @@ -331,6 +325,7 @@ proc collect(self: var PeonVM) = when debugGC: let before = self.gc.bytesAllocated.current let time = getMonoTime().ticks().float() / 1_000_000 + echo "" echo &"DEBUG - GC: Starting collection cycle at heap size {self.gc.bytesAllocated.current}" echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}" echo &"DEBUG - GC: Tracked objects: {self.gc.pointers.len()}" @@ -339,6 +334,8 @@ proc collect(self: var PeonVM) = self.trace(self.markRoots()) self.sweep() self.gc.nextGC = self.gc.bytesAllocated.current * HeapGrowFactor + if self.gc.nextGC == 0: + self.gc.nextGC = FirstGC when debugGC: echo &"DEBUG - GC: Collection cycle has terminated in {getMonoTime().ticks().float() / 1_000_000 - time:.2f} ms, collected {before - self.gc.bytesAllocated.current} bytes of memory in total" echo &"DEBUG - GC: Next cycle at {self.gc.nextGC} bytes" diff --git a/src/config.nim b/src/config.nim index b8c0958..b34d11c 100644 --- a/src/config.nim +++ b/src/config.nim @@ -17,13 +17,14 @@ import strformat # These variables can be tweaked to debug and test various components of the toolchain const debugLexer* {.booldefine.} = false # Print the tokenizer's output const debugParser* {.booldefine.} = false # Print the AST generated by the parser -const debugCompiler* {.booldefine.} = false # Disassemble and /or print the code generated by the compiler +const debugCompiler* {.booldefine.} = false # Disassemble and/or print the code generated by the compiler const debugVM* {.booldefine.} = false # Enable the runtime debugger in the bytecode VM const debugGC* {.booldefine.} = false # Debug the Garbage Collector (extremely verbose) const debugAlloc* {.booldefine.} = false # Trace object allocation (extremely verbose) const debugMem* {.booldefine.} = false # Debug the memory allocator (extremely verbose) const debugSerializer* {.booldefine.} = false # Validate the bytecode serializer's output const debugStressGC* {.booldefine.} = false # Make the GC run a collection at every allocation (VERY SLOW!) +const debugMarkGC* {.booldefine.} = false # Trace the marking phase object by object (extremely verbose) const PeonBytecodeMarker* = "PEON_BYTECODE" # Magic value at the beginning of bytecode files const HeapGrowFactor* = 2 # The growth factor used by the GC to schedule the next collection const FirstGC* = 1024 * 1024; # How many bytes to allocate before running the first GC diff --git a/tests/gc.pn b/tests/gc.pn index 8d8e482..ae0471b 100644 --- a/tests/gc.pn +++ b/tests/gc.pn @@ -1,17 +1,24 @@ import std; -var x = 10000000; -var y = "just a test"; -print(y); +const max = 50000; + +var x = max; +var s = "just a test"; +print(s); print("Starting GC torture test"); print(x); while x > 0 { - "hello"; + "1"; x = x - 1; } -print("END"); -print(y); -y = "test"; -print(y); -""; +x = max; +print(x); +while x > 0 { + "1"; + x = x - 1; +} +print(s); +s = "test"; +print(s); +"end of the world";