Implement a M&S collector

This commit is contained in:
Mattia Giambirtone 2022-08-18 03:17:52 +02:00
parent 36970e493b
commit ae6da275fa
3 changed files with 250 additions and 108 deletions

View File

@ -12,24 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
## The Peon runtime environment
{.push checks:off.} # The VM is a critical point where checks are deleterious
import std/monotimes
import std/math
import std/segfaults
import std/strutils
import std/sets
import ../config
import ../frontend/meta/bytecode
import ../util/multibyte
import ../memory/allocator
import strutils
when debugVM:
when debugVM or debugMem:
import std/strformat
import std/terminal
{.push checks:off.} # The VM is a critical point where checks are deleterious
type
PeonVM* = ref object
@ -54,14 +55,246 @@ type
frames: seq[uint64] # Stores the bottom of stack frames
closedOver: seq[uint64] # Stores variables that do not have stack semantics
results: seq[uint64] # Stores function's results (return values)
gc: PeonGC
ObjectKind* = enum
## A tag for heap-allocated
## peon objects
String, List,
Dict, Tuple,
CustomType
HeapObject* = object
## A tagged box for a heap-allocated
## peon object
marked*: bool
case kind*: ObjectKind
of String:
str*: ptr UncheckedArray[char]
len*: int
else:
discard # TODO
PeonGC* = ref object
## A simple Mark&Sweep collector
## to manage peon's heap space
vm: PeonVM
bytesAllocated: tuple[total, current: int]
nextGC: int
pointers: HashSet[uint64]
objects: seq[ptr HeapObject]
# Implementation of peon's memory manager
proc newPeonGC*: PeonGC =
## Initializes a new, blank
## garbage collector
new(result)
result.bytesAllocated = (0, 0)
result.objects = @[]
result.nextGC = FirstGC
proc collect*(self: PeonGC)
proc reallocate*(self: PeonGC, p: pointer, oldSize: int, newSize: int): pointer =
## Simple wrapper around realloc/dealloc
self.bytesAllocated.total += newSize - oldSize
self.bytesAllocated.current += newSize - oldSize
if self.bytesAllocated.current > self.nextGC:
self.collect()
try:
if newSize == 0 and not p.isNil():
when debugMem:
if oldSize > 1:
echo &"DEBUG - Memory manager: Deallocating {oldSize} bytes of memory"
else:
echo "DEBUG - Memory manager: Deallocating 1 byte of memory"
dealloc(p)
elif (oldSize > 0 and not p.isNil() and newSize > oldSize) or oldSize == 0:
when debugStressGC:
self.collect()
when debugMem:
if oldSize == 0:
if newSize > 1:
echo &"DEBUG - Memory manager: Allocating {newSize} bytes of memory"
else:
echo "DEBUG - Memory manager: Allocating 1 byte of memory"
else:
echo &"DEBUG - Memory manager: Resizing {oldSize} bytes of memory to {newSize} bytes"
result = realloc(p, newSize)
when debugMem:
if p.isNil() and newSize == 0:
echo &"DEBUG - Memory manager: Warning, asked to dealloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request"
elif oldSize > 0 and p.isNil():
echo &"DEBUG - Memory manager: Warning, asked to realloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request"
except NilAccessDefect:
raise
stderr.write("Peon: could not manage memory, segmentation fault\n")
quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit
template resizeArray*(self: PeonGC, kind: untyped, p: pointer, oldCount, newCount: int): untyped =
## Handy template to resize a dynamic array
cast[ptr UncheckedArray[kind]](reallocate(self, p, sizeof(kind) * oldCount, sizeof(kind) * newCount))
template freeArray*(self: PeonGC, kind: untyped, p: pointer, size: int): untyped =
## Frees a dynamic array
discard reallocate(self, p, sizeof(kind) * size, 0)
template free*(self: PeonGC, kind: typedesc, p: pointer): untyped =
## Frees a pointer by reallocating its
## size to 0
discard reallocate(self, p, sizeof(kind), 0)
proc allocate*(self: PeonGC, kind: ObjectKind, size: typedesc, count: int): ptr HeapObject {.inline.} =
## Allocates aobject on the heap
result = cast[ptr HeapObject](self.reallocate(nil, 0, sizeof(HeapObject) * 1))
result.marked = false
self.bytesAllocated.total += sizeof(result)
self.bytesAllocated.current += sizeof(result)
case kind:
of String:
result.str = cast[ptr UncheckedArray[char]](self.reallocate(nil, 0, sizeof(size) * count))
result.len = count
self.bytesAllocated.current += sizeof(size) * count
else:
discard # TODO
self.objects.add(result)
self.pointers.incl(cast[uint64](result))
proc mark(self: ptr HeapObject): bool =
## Marks a single object
if self.isNil() or self.marked:
return false
self.marked = true
return true
proc mark(self: PeonGC): seq[ptr HeapObject] =
## Marks objects *not* to be
## collected by the GC and returns
## them
# Unlike what bob does in his book,
# we keep track of objects in a different
# way due to how the whole thing is designed.
# Specifically, we don't have neat structs for
# all peon objects
# When we allocate() an object, we keep track
# of the box it created along with its type and
# other metadata, as well as the address of said
# box. Then, we can go through the various sources
# of roots in the VM, see if they match any pointers
# we already know about (using a hash set so it's
# really fast), and then we can be sure that anything
# that's in the difference (i.e. mathematical set difference)
# between our full list of pointers and the live ones
# is not a root object, so if it's not indirectly reachable
# through a root itself, it can be freed. I'm not sure if I
# can call this GC strategy precise, since technically there
# is a chance for a regular value to collide with one of the
# pointers we allocated and that would cause a memory leak,
# but with a 64-bit address-space it probably hardly matters,
# so I guess this is a mostly-precise Mark&Sweep collector
var live: HashSet[uint64] = initHashSet[uint64]()
for obj in self.vm.calls:
if obj in self.pointers:
live.incl(obj)
for obj in self.vm.operands:
if obj in self.pointers:
live.incl(obj)
for obj in self.vm.closedOver:
if obj in self.pointers:
live.incl(obj)
# We preallocate the space on the seq
result = newSeqOfCap[ptr HeapObject](len(live))
var obj: ptr HeapObject
for p in live:
obj = cast[ptr HeapObject](p)
if obj.mark():
result.add(obj)
when debugMem:
if result.len() > 0:
echo &"DEBUG - GC: Marking object: {result[^1][]}"
proc trace(self: PeonGC, roots: seq[ptr HeapObject]) =
## Traces references to other
## objects starting from the
## roots. The second argument
## is the output of the mark
## phase
for root in roots:
case root.kind:
of String:
discard # No additional references
else:
discard # TODO
proc free(self: PeonGC, obj: ptr HeapObject) =
## Frees a single heap-allocated
## peon object and all the memory
## it directly or indirectly owns
case obj.kind:
of String:
# Strings only own their
# underlying character array
self.freeArray(char, obj.str, obj.len)
else:
discard # TODO
self.free(HeapObject, obj)
self.pointers.excl(cast[uint64](obj))
proc sweep(self: PeonGC) =
## Sweeps unmarked objects
## that have been left behind
## during the mark phase.
## This is more convoluted
## than it needs to be because
## nim disallows
var j = -1
var idx = 0
while j < self.objects.high():
inc(j)
if self.objects[j].marked:
# Object is marked: don't touch it,
# but reset its mark so that it doesn't
# stay alive forever
self.objects[j].marked = false
continue
else:
# Object is unmarked: its memory is
# fair game
self.free(self.objects[idx])
self.objects.delete(idx)
idx += 1
proc collect(self: PeonGC) =
## Attempts to reclaim some
## memory from unreachable
## objects onto the heap
let before = self.bytesAllocated.current
when debugGC:
echo "DEBUG - GC: Starting collection cycle"
self.trace(self.mark())
self.sweep()
self.nextGC = self.bytesAllocated.current * HeapGrowFactor
when debugGC:
echo &"DEBUG - GC: Collection cycle has terminated, collected {before - self.bytesAllocated.current} bytes of memory in total"
proc initCache*(self: PeonVM) =
## Initializes the VM's
## singletons cache
self.cache[0] = 0x0 # Nil
self.cache[0] = 0x0 # False
self.cache[1] = 0x1 # True
self.cache[2] = 0x2 # False
self.cache[2] = 0x2 # Nil
self.cache[3] = 0x3 # Positive inf
self.cache[4] = 0x4 # Negative inf
self.cache[5] = 0x5 # NaN
@ -76,6 +309,8 @@ proc newPeonVM*: PeonVM =
result.calls = newSeq[uint64]()
result.operands = newSeq[uint64]()
result.initCache()
result.gc = newPeonGC()
result.gc.vm = result
# Getters for singleton types
@ -144,7 +379,6 @@ proc peek(self: PeonVM, distance: int = 0): uint64 =
return self.operands[self.operands.high() + distance]
proc pushc(self: PeonVM, val: uint64) =
## Pushes a value to the
## call stack
@ -309,15 +543,15 @@ proc constReadFloat64(self: PeonVM, idx: int): float =
copyMem(result.addr, arr.addr, sizeof(arr))
proc constReadString(self: PeonVM, size, idx: int): ptr UncheckedArray[char] =
proc constReadString(self: PeonVM, size, idx: int): ptr HeapObject =
## Reads a constant from the
## chunk's constant table and
## returns it as a pointer to
## a heap-allocated string
let str = self.chunk.consts[idx..<idx + size].fromBytes()
result = allocate(UncheckedArray[char], char, len(str))
result = self.gc.allocate(String, char, len(str))
for i, c in str:
result[i] = c
result.str[i] = c
{.pop.}
@ -681,7 +915,10 @@ proc dispatch*(self: PeonVM) =
of PrintNan:
echo "nan"
of PrintString:
echo $cast[ptr UncheckedArray[char]](self.pop()) # TODO
let s = cast[ptr HeapObject](self.pop())
for i in 0..<s.len:
stdout.write(s.str[i])
stdout.write("\n")
of SysClock64:
# Pushes the value of a monotonic clock
# onto the operand stack. This can be used

View File

@ -22,8 +22,10 @@ const debugVM* {.booldefine.} = false
const debugGC* {.booldefine.} = false
const debugMem* {.booldefine.} = false
const debugSerializer* {.booldefine.} = false
const debugStressGC* {.booldefine.} = false
const PeonBytecodeMarker* = "PEON_BYTECODE"
const HeapGrowFactor* = 2 # How much extra memory to allocate for dynamic arrays and garbage collection when resizing
const FirstGC* = 1024 * 1024;
when HeapGrowFactor <= 1:
{.fatal: "Heap growth factor must be > 1".}
const PeonVersion* = (major: 0, minor: 1, patch: 0)

View File

@ -1,97 +0,0 @@
# Copyright 2022 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Memory allocator from JAPL
import std/segfaults
import ../config
when debugMem:
import std/strformat
proc reallocate*(p: pointer, oldSize: int, newSize: int): pointer =
## Simple wrapper around realloc/dealloc
try:
if newSize == 0 and not p.isNil():
when debugMem:
if oldSize > 1:
echo &"DEBUG - Memory manager: Deallocating {oldSize} bytes"
else:
echo "DEBUG - Memory manager: Deallocating 1 byte"
dealloc(p)
return nil
if oldSize > 0 and not p.isNil() or oldSize == 0:
when debugMem:
if oldSize == 0:
if newSize > 1:
echo &"DEBUG - Memory manager: Allocating {newSize} bytes of memory"
else:
echo "DEBUG - Memory manager: Allocating 1 byte of memory"
else:
echo &"DEBUG - Memory manager: Resizing {oldSize} bytes of memory to {newSize} bytes"
result = realloc(p, newSize)
when debugMem:
if p.isNil() and newSize == 0:
echo &"DEBUG - Memory manager: Warning, asked to dealloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request"
elif oldSize > 0 and p.isNil():
echo &"DEBUG - Memory manager: Warning, asked to realloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request"
except NilAccessDefect:
stderr.write("Peon: could not manage memory, segmentation fault\n")
quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit
type
ObjectKind* = enum
String, List,
Dict, Tuple,
CustomType
HeapObject* = object
## A tag for a heap-allocated
## peon object
case kind*: ObjectKind
of String:
str*: ptr UncheckedArray[char]
len*: uint64
else:
discard # TODO
template resizeArray*(kind: untyped, p: pointer, oldCount, newCount: int): untyped =
## Handy template to resize a dynamic array
cast[ptr UncheckedArray[kind]](reallocate(p, sizeof(kind) * oldCount, sizeof(kind) * newCount))
template freeArray*(kind: untyped, p: pointer, size: int): untyped =
## Frees a dynamic array
reallocate(p, sizeof(kind) * size, 0)
template free*(kind: untyped, p: pointer): untyped =
## Frees a pointer by reallocating its
## size to 0
reallocate(p, sizeof(kind), 0)
template growCapacity*(capacity: int): untyped =
## Handy template used to calculate how much
## more memory is needed when reallocating
## dynamic arrays
if capacity < 8: 8 else: capacity * HeapGrowFactor
template allocate*(castTo: untyped, sizeTo: untyped, count: int): untyped =
## Allocates an object and casts its pointer to the specified type
cast[ptr castTo](reallocate(nil, 0, sizeof(sizeTo) * count))