Merge branch 'gc-upgrade' into compiler-refactor

2023-02-25 17:37:32 +01:00 · 2023-02-25 17:37:32 +01:00 · 47ac1be6aa
parent 197367edae f4752a9f7c
commit 47ac1be6aa
3 changed files with 86 additions and 72 deletions
--- a/src/backend/vm.nim
+++ b/src/backend/vm.nim
@ -71,7 +71,6 @@ type
        cycles: int
        nextGC: int
        pointers: HashSet[uint64]
-        objects: seq[ptr HeapObject]
    PeonVM* = object
        ## The Peon Virtual Machine.
        ## Note how the only data
@ -105,7 +104,6 @@ proc newPeonGC*: PeonGC =
    ## Initializes a new, blank
    ## garbage collector
    result.bytesAllocated = (0, 0)
-    result.objects = @[]
    result.nextGC = FirstGC
    result.cycles = 0

@ -113,9 +111,19 @@ proc newPeonGC*: PeonGC =
 proc collect*(self: var PeonVM)


+# Our pointer tagging routines
+template tag(p: untyped): untyped = cast[pointer](cast[uint64](p) or (1'u64 shl 63'u64))
+template untag(p: untyped): untyped = cast[pointer](cast[uint64](p) and 0x7fffffffffffffff'u64)
+template getTag(p: untyped): untyped = (p and (1'u64 shl 63'u64)) == 0
+
+
 proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): pointer =
    ## Simple wrapper around realloc with
-    ## built-in garbage collection
+    ## built-in garbage collection. Callers
+    ## should keep in mind that the returned
+    ## pointer is tagged (bit 63 is set to 1)
+    ## and should be passed to untag() before
+    ## being dereferenced or otherwise used
    self.gc.bytesAllocated.current += newSize - oldSize
    try:
        when debugMem:
@ -137,9 +145,9 @@ proc reallocate*(self: var PeonVM, p: pointer, oldSize: int, newSize: int): poin
            when debugStressGC:
                self.collect()
            else:
-                if self.gc.bytesAllocated.current > self.gc.nextGC:
+                if self.gc.bytesAllocated.current >= self.gc.nextGC:
                    self.collect()
-        result = realloc(p, newSize)
+        result = tag(realloc(untag(p), newSize))
    except NilAccessDefect:
        stderr.writeLine("Peon: could not manage memory, segmentation fault")
        quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit
@ -167,17 +175,18 @@ template setKind[T, K](t: var T, kind: untyped, target: K) =


 proc allocate(self: var PeonVM, kind: ObjectKind, size: typedesc, count: int): ptr HeapObject {.inline.} =
-    ## Allocates an object on the heap
-    result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject)))
+    ## Allocates an object on the heap and adds its
+    ## location to the internal pointer list of the 
+    ## garbage collector
+    result = cast[ptr HeapObject](untag(self.reallocate(nil, 0, sizeof(HeapObject))))
    setkind(result[], kind, kind)
    result.marked = false
    case kind:
        of String:
-            result.str = cast[ptr UncheckedArray[char]](self.reallocate(nil, 0, sizeof(size) * count))
+            result.str = cast[ptr UncheckedArray[char]](untag(self.reallocate(nil, 0, sizeof(size) * count)))
            result.len = count
        else:
            discard  # TODO
-    self.gc.objects.add(result)
    self.gc.pointers.incl(cast[uint64](result))
    when debugAlloc:
        echo &"DEBUG - GC: Allocated new object: {result[]}"
@ -195,54 +204,50 @@ proc mark(self: ptr HeapObject): bool =
    return true


-proc markRoots(self: var PeonVM): seq[ptr HeapObject] =
+proc markRoots(self: var PeonVM): HashSet[ptr HeapObject] =
    ## Marks root objects *not* to be
    ## collected by the GC and returns
    ## their addresses
    when debugGC:
        echo "DEBUG - GC: Starting mark phase"
-    # Unlike what bob does in his book,
-    # we keep track of objects in a different
-    # way due to the difference of our design.
-    # Specifically, we don't have neat structs for
-    # all peon objects: When we allocate() an object, 
-    # we keep track of the small wrapper it created 
-    # along with its type and other metadata. Then, 
-    # we can go through the various sources of roots 
-    # in the VM, see if they match any pointers we 
-    # already know about (we store them in a hash set so 
-    # it's really fast), and then we can be sure that 
-    # anything that's in the difference (i.e. mathematical
-    # set difference) between our full list of pointers
-    # and the live ones is not a root object, so if it's 
-    # not indirectly reachable through a root itself, it 
-    # can be freed. I'm not sure if I can call this GC 
-    # strategy precise, since technically there is a chance 
-    # for a regular value to collide with one of the pointers 
-    # we allocated and that would cause a memory leak, but 
-    # with a 64-bit address-space it probably hardly matters,
-    # so I guess this is a mostly-precise Mark&Sweep collector
-    var live = initHashSet[uint64](self.gc.pointers.len())
+    # Unlike what Bob does in his book, we keep track
+    # of objects another way, mainly due to the difference
+    # of our respective designs. Specifically, our VM only
+    # handles a single type (uint64), while Lox has a stack
+    # of heap-allocated structs (which is convenient, but slow).
+    # The previous implementation would just store all pointers
+    # allocated by us in a hash set and then check if any source
+    # of roots contained any of the integer values that it was
+    # keeping track of, but this meant that if a primitive object's
+    # value happened to collide with an active pointer the GC would
+    # mistakenly assume the object was reachable, potentially leading
+    # to a nasty memory leak. The current implementation uses pointer
+    # tagging: we know that modern CPUs never use bit 63 in addresses,
+    # so if it set we know it cannot be a pointer, and if it is set we
+    # just need to check if it's in our list of active addresses or not.
+    # This should resolve the potential memory leak (hopefully)
+    var result = initHashSet[uint64](self.gc.pointers.len())
    for obj in self.calls:
+        if not obj.getTag():
+            continue
        if obj in self.gc.pointers:
-            live.incl(obj)
+            result.incl(obj)
    for obj in self.operands:
+        if not obj.getTag():
+            continue
        if obj in self.gc.pointers:
-            live.incl(obj)
-    # We preallocate the space on the seq
-    result = newSeqOfCap[ptr HeapObject](len(live))
+            result.incl(obj)
    var obj: ptr HeapObject
-    for p in live:
+    for p in result:
        obj = cast[ptr HeapObject](p)
        if obj.mark():
-            result.add(obj)
-            when debugGC:
+            when debugMarkGC:
                echo &"DEBUG - GC: Marked object: {obj[]}"
    when debugGC:
        echo "DEBUG - GC: Mark phase complete"


-proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) = 
+proc trace(self: var PeonVM, roots: HashSet[ptr HeapObject]) = 
    ## Traces references to other
    ## objects starting from the
    ## roots. The second argument
@ -252,7 +257,8 @@ proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) =
    ## this is where we blacken gray
    ## objects
    when debugGC:
-        echo &"DEBUG - GC: Tracing indirect references from {len(roots)} roots"
+        if len(roots) > 0:
+            echo &"DEBUG - GC: Tracing indirect references from {len(roots)} root{(if len(roots) > 1: \"s\" else: \"\")}"
        var count = 0
    for root in roots:
        case root.kind:
@ -261,13 +267,16 @@ proc trace(self: var PeonVM, roots: seq[ptr HeapObject]) =
            else:
                discard  # TODO: Other types
    when debugGC:
-        echo &"DEBUG - GC: Traced {count} indirect references"
+        echo &"DEBUG - GC: Traced {count} indirect reference{(if count != 1: \"s\" else: \"\")}"


 proc free(self: var PeonVM, obj: ptr HeapObject) =
    ## Frees a single heap-allocated
    ## peon object and all the memory
-    ## it directly or indirectly owns
+    ## it directly or indirectly owns. Note
+    ## that the pointer itself is not released
+    ## from the GC's internal table and must be
+    ## handled by the caller
    when debugAlloc:
        echo &"DEBUG - GC: Freeing object: {obj[]}"
    case obj.kind:
@ -279,7 +288,6 @@ proc free(self: var PeonVM, obj: ptr HeapObject) =
        else:
            discard  # TODO
    self.free(HeapObject, obj)
-    self.gc.pointers.excl(cast[uint64](obj))
    when debugAlloc:
        echo &"DEBUG - GC: Current heap size: {self.gc.bytesAllocated.current}"
        echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}"
@ -291,37 +299,32 @@ proc sweep(self: var PeonVM) =
    ## Sweeps unmarked objects
    ## that have been left behind
    ## during the mark phase.
-    ## This is more convoluted
-    ## than it needs to be because
-    ## nim disallows changing the
-    ## size of a sequence during
-    ## iteration
    when debugGC:
        echo "DEBUG - GC: Beginning sweeping phase"
-    var j = -1
-    var idx = 0
    when debugGC:
        var count = 0
-    while j < self.gc.objects.high():
-        inc(j)
-        if self.gc.objects[j].marked:
+    var current: ptr HeapObject
+    var freed: HashSet[uint64]
+    for p in self.gc.pointers:
+        current = cast[ptr HeapObject](p)
+        if current.marked:
            # Object is marked: don't touch it,
            # but reset its mark so that it doesn't
            # stay alive forever
-            when debugGC:
-                echo &"DEBUG - GC: Unmarking object: {self.gc.objects[j][]}"
-            self.gc.objects[j].marked = false
-            inc(idx)
+            when debugMarkGC:
+                echo &"DEBUG - GC: Unmarking object: {current[]}"
+            current.marked = false
        else:
            # Object is unmarked: its memory is
            # fair game
-            self.free(self.gc.objects[idx])
-            self.gc.objects.delete(idx)
-            inc(idx)
+            self.free(current)
+            freed.incl(p)
            when debugGC:
                inc(count)
+    # Set difference
+    self.gc.pointers = self.gc.pointers - freed
    when debugGC:
-        echo &"DEBUG - GC: Swept {count} objects"
+        echo &"DEBUG - GC: Swept {count} object{(if count > 1: \"s\" else: \"\")}"


 proc collect(self: var PeonVM) =
@ -331,6 +334,7 @@ proc collect(self: var PeonVM) =
    when debugGC:
        let before = self.gc.bytesAllocated.current
        let time = getMonoTime().ticks().float() / 1_000_000
+        echo ""
        echo &"DEBUG - GC: Starting collection cycle at heap size {self.gc.bytesAllocated.current}"
        echo &"DEBUG - GC: Total bytes allocated: {self.gc.bytesAllocated.total}"
        echo &"DEBUG - GC: Tracked objects: {self.gc.pointers.len()}"
@ -339,6 +343,8 @@ proc collect(self: var PeonVM) =
    self.trace(self.markRoots())
    self.sweep()
    self.gc.nextGC = self.gc.bytesAllocated.current * HeapGrowFactor
+    if self.gc.nextGC == 0:
+        self.gc.nextGC = FirstGC
    when debugGC:
        echo &"DEBUG - GC: Collection cycle has terminated in {getMonoTime().ticks().float() / 1_000_000 - time:.2f} ms, collected {before - self.gc.bytesAllocated.current} bytes of memory in total"
        echo &"DEBUG - GC: Next cycle at {self.gc.nextGC} bytes"
--- a/src/config.nim
+++ b/src/config.nim
@ -17,13 +17,14 @@ import strformat
 # These variables can be tweaked to debug and test various components of the toolchain
 const debugLexer* {.booldefine.} = false       # Print the tokenizer's output
 const debugParser* {.booldefine.} = false      # Print the AST generated by the parser
-const debugCompiler* {.booldefine.} = false    # Disassemble and /or print the code generated by the compiler
+const debugCompiler* {.booldefine.} = false    # Disassemble and/or print the code generated by the compiler
 const debugVM* {.booldefine.} = false          # Enable the runtime debugger in the bytecode VM
 const debugGC* {.booldefine.} = false          # Debug the Garbage Collector (extremely verbose)
 const debugAlloc* {.booldefine.} = false       # Trace object allocation (extremely verbose)
 const debugMem* {.booldefine.} = false         # Debug the memory allocator (extremely verbose)
 const debugSerializer* {.booldefine.} = false  # Validate the bytecode serializer's output
 const debugStressGC* {.booldefine.} = false    # Make the GC run a collection at every allocation (VERY SLOW!)
+const debugMarkGC* {.booldefine.} = false      # Trace the marking phase object by object (extremely verbose)
 const PeonBytecodeMarker* = "PEON_BYTECODE"    # Magic value at the beginning of bytecode files
 const HeapGrowFactor* = 2                      # The growth factor used by the GC to schedule the next collection
 const FirstGC* = 1024 * 1024;                  # How many bytes to allocate before running the first GC
--- a/tests/gc.pn
+++ b/tests/gc.pn
@ -1,17 +1,24 @@
 import std;


-var x = 10000000;
-var y = "just a test";
-print(y);
+const max = 50000;
+
+var x = max;
+var s = "just a test";
+print(s);
 print("Starting GC torture test");
 print(x);
 while x > 0 {
-    "hello";
+    "1";
    x = x - 1;
 }
-print("END");
-print(y);
-y = "test";
-print(y);
-"";
+x = max;
+print(x);
+while x > 0 {
+    "1";
+    x = x - 1;
+}
+print(s);
+s = "test";
+print(s);
+"end of the world";