Moved to custom hashmap

This commit is contained in:
nocturn9x 2021-03-18 15:09:36 +01:00
parent cd180fe4d7
commit 22d6408236
14 changed files with 182 additions and 108 deletions

View File

@ -196,7 +196,7 @@ def build(path: str, flags: Optional[Dict[str, str]] = {}, options: Optional[Dic
logging.debug("Running tests")
start = time()
# TODO: Find a better way of running the test suite
process = run_command(f"{tests_path} {'-e' if verbose else ''}", mode="run", shell=True, stderr=PIPE)
process = run_command(f"{tests_path} {'--stdout' if verbose else ''}", mode="run", shell=True, stderr=PIPE)
if status != 0:
logging.error(f"Command '{command}' exited with non-0 exit code {status}, output below:\n{stderr.decode()}")
return False

View File

@ -31,7 +31,7 @@ import types/function
import types/numbers
import types/japlString
import types/iterable
import types/arraylist
import types/arrayList
import config
when isMainModule:
import util/debug

View File

@ -18,6 +18,7 @@
import strformat
import parseopt
import os
import options
import config
import vm
@ -28,7 +29,6 @@ import types/methods
import jale/editor
import jale/templates
import jale/plugin/defaults
import jale/plugin/history
import jale/plugin/editor_history
@ -40,10 +40,12 @@ proc getLineEditor: LineEditor =
result.bindHistory(hist) # set default history keybindings
proc repl(bytecodeVM: VM) =
var bytecodeVM = bytecodeVM
if bytecodeVM == nil:
proc repl(vmObj: Option[VM]) =
var bytecodeVM = VM()
if vmObj.isNone():
bytecodeVM = initVM()
else:
bytecodeVM = vmObj.get()
echo JAPL_VERSION_STRING
let nimDetails = &"[Nim {NimVersion} on {hostOs} ({hostCPU})]"
echo nimDetails
@ -72,7 +74,7 @@ proc repl(bytecodeVM: VM) =
proc main(file: var string = "", fromString: bool = false, interactive: bool = false) =
var source: string
if file == "" and not fromString:
repl(nil)
repl(none(VM))
return # We exit after the REPL has ran
if not fromString:
var sourceFile: File
@ -91,7 +93,7 @@ proc main(file: var string = "", fromString: bool = false, interactive: bool = f
var bytecodeVM = initVM()
discard bytecodeVM.interpret(source, file)
if interactive:
repl(bytecodeVM)
repl(some(bytecodeVM))
bytecodeVM.freeVM()

View File

@ -18,7 +18,7 @@
import ../types/function
import ../types/baseObject
import ../types/arraylist
import ../types/arrayList
{.experimental: "implicitDeref".}

View File

@ -16,7 +16,7 @@
## A chunk is a piece of bytecode together with its constants
import ../types/baseObject
import ../types/arraylist
import ../types/arrayList
type
Chunk* = object

View File

@ -28,6 +28,7 @@ import strformat
import parseutils
import strutils
template join(args: seq[ptr Obj]): string =
## A template that returns the string
## representation of all args separated
@ -41,6 +42,7 @@ template join(args: seq[ptr Obj]): string =
res = res & arg.stringify()
res
proc natPrint*(args: seq[ptr Obj]): tuple[kind: retNative, result: ptr Obj] =
## Native function print
## Prints an object representation
@ -53,6 +55,7 @@ proc natPrint*(args: seq[ptr Obj]): tuple[kind: retNative, result: ptr Obj] =
echo join(args)
return (kind: retNative.Nil, result: nil)
proc natPrintErr*(args: seq[ptr Obj]): tuple[kind:
retNative, result: ptr Obj] =
## Native function printErr
@ -63,6 +66,7 @@ proc natPrintErr*(args: seq[ptr Obj]): tuple[kind:
writeLine stderr, join(args)
return (kind: retNative.Nil, result: nil)
proc natReadline*(args: seq[ptr Obj]): tuple[kind: retNative, result: ptr Obj] =
## Native function readline
## Reads a line from stdin and returns

View File

@ -1,7 +1,7 @@
import baseObject
import ../meta/opcode
import japlString
import arraylist
import arrayList
type

135
src/types/hashmap.nim → src/types/hashMap.nim Normal file → Executable file
View File

@ -13,6 +13,12 @@
# limitations under the License.
# This module implements a very simple (yet hella fast!) associative array.
# Although this module is *meant* to be used for JAPL only, the implementation
# allows for any nim type to be stored in it thanks to the options module. You
# could literally replace nim's tables implementation with this and get identical
# behavior (well, assuming the GC doesn't fuck you up, which it probably will)
import ../memory
import ../config
@ -20,28 +26,40 @@ import baseObject
import methods
import iterable
import lenientops
# We import just the *BARE* minimum for this bad boy to work,
# since we want as little interference from nim's own GC
# as possible. This code might need slight modifications to work
# outside of the JAPL runtime
import options
import hashes
import strformat
type
Entry*[K, V] = object
## Low-level object to store key/value pairs
key*: Option[K]
value*: Option[V]
tombstone*: bool
Entry[K, V] = object
## Low-level object to store key/value pairs.
## Using an extra value for marking the entry as
## a tombstone instead of something like detecting
## tombstones as entries with null keys but full values
## may seem wasteful. The thing is, though, that since
## we want to implement sets on top of this hashmap and
## the implementation of a set is *literally* a dictionary
## with empty values and keys as the elements, this would
## confuse our findEntry method and would force us to override
## it to account for a different behavior.
## Using a third field takes up more space, but saves us
## from the hassle of rewriting code
key: Option[K]
value: Option[V]
tombstone: bool
HashMap*[K, V] = object of Iterable
## An associative array with O(1) lookup time,
## similar to nim's Table type, but using raw
## memory to be more compatible with JAPL's runtime
## memory management
entries*: ptr UncheckedArray[ptr Entry[K, V]]
entries: ptr UncheckedArray[ptr Entry[K, V]]
# This attribute counts *only* non-deleted entries
actual_length*: int
actual_length: int
proc newHashMap*[K, V](): ptr HashMap[K, V] =
@ -67,16 +85,19 @@ proc findEntry[K, V](self: ptr UncheckedArray[ptr Entry[K, V]], key: K, capacity
## array, returns a pointer to an entry
var capacity = uint64(capacity)
var idx = uint64(key.hash()) mod capacity
var tombstone: ptr Entry[K, V] = nil
while true:
result = self[idx]
if result.key.isNone() and result.key.isSome():
# We found a tombstone
tombstone = result
elif result.key.isNone() or result.key.get() == key:
if tombstone != nil:
result = tombstone
result.tombstone = true
if result.key.isNone() or result.tombstone:
# If we got here, we either found an
# empty bucket or a tombstone. In both cases,
# we're done so we just make sure to reset
# the tombstone field of the entry and just
# exit the loop
break
elif result.key.get() == key:
# This if will never error out because if
# an entry is a tombstone, its values are
# also nullified
break
# If none of these conditions match, we have a collision!
# This means we can just move on to the next slot in our probe
@ -84,7 +105,7 @@ proc findEntry[K, V](self: ptr UncheckedArray[ptr Entry[K, V]], key: K, capacity
# mechanism works makes the empty slot invariant easy to
# maintain since we increase the underlying array's size
# before we are actually full
idx += 1 mod capacity
idx = (idx + 1) mod capacity
proc adjustCapacity[K, V](self: ptr HashMap[K, V]) =
@ -92,18 +113,20 @@ proc adjustCapacity[K, V](self: ptr HashMap[K, V]) =
## for more entries. Low-level method, not recommended
var newCapacity = growCapacity(self.capacity)
var entries = allocate(UncheckedArray[ptr Entry[K, V]], Entry[K, V], newCapacity)
var oldEntry: ptr Entry[K, V]
var newEntry: ptr Entry[K, V]
self.length = 0
var temp: ptr Entry[K, V]
for x in countup(0, newCapacity - 1):
entries[x] = allocate(Entry[K, V], Entry[K, V], 1)
temp = entries[x]
temp.key = none(K)
temp.value = none(V)
temp.tombstone = false
for x in countdown(self.capacity - 1, 0):
temp = self.entries[x]
if temp.key.isSome():
entries[x] = temp
entries[x].tombstone = false
entries[x].key = none(K)
entries[x].value = none(V)
for x in countup(0, self.capacity - 1):
oldEntry = self.entries[x]
if oldEntry.key.isSome():
newEntry = entries.findEntry(oldEntry.key.get(), newCapacity)
newEntry.key = oldEntry.key
newEntry.value = oldEntry.value
self.length += 1
discard freeArray(UncheckedArray[ptr Entry[K, V]], self.entries, self.capacity)
self.entries = entries
@ -112,7 +135,11 @@ proc adjustCapacity[K, V](self: ptr HashMap[K, V]) =
proc setEntry[K, V](self: ptr HashMap[K, V], key: K, value: V): bool =
## Low-level method to set/replace an entry with a value
if self.length + 1 > self.capacity * MAP_LOAD_FACTOR:
# This seems a bit stupid, but since we want as little interference
# from nim's runtime as possible, instead of using the lenientops
# module we just convert all integers to float and yolo it
if float64(self.length + 1) >= float64(self.capacity) * MAP_LOAD_FACTOR:
# Since we always need at least some empty slots
# for our probe sequences to work properly, we
# always resize our underlying array before we're full.
@ -127,13 +154,16 @@ proc setEntry[K, V](self: ptr HashMap[K, V], key: K, value: V): bool =
self.length += 1
entry.key = some(key)
entry.value = some(value)
# Now we can make the new entry an actual full bucket
# and remove the tombstone flag
entry.tombstone = false
proc `[]`*[K, V](self: ptr HashMap[K, V], key: K): V =
## Retrieves a value by key
var entry = findEntry(self.entries, key, self.capacity)
if entry.key.isNone():
raise newException(KeyError, &"Key not found: {key}")
if entry.key.isNone() or entry.tombstone:
raise newException(KeyError, "Key not found: " & $key)
result = entry.value.get()
@ -149,37 +179,41 @@ proc del*[K, V](self: ptr HashMap[K, V], key: K) =
raise newException(KeyError, &"delete from empty hashmap")
var entry = findEntry(self.entries, key, self.capacity)
if entry.key.isSome():
## We don't reset the value of the
## 'value' attribute because that
## makes us understand that this
## entry is a tombstone and not
## a truly full bucket
self.actual_length -= 1
entry.key = none(K)
entry.tombstone = true
else:
raise newException(KeyError, &"Key not found: {key}")
raise newException(KeyError, "Key not found: " & $key)
proc contains*[K, V](self: ptr HashMap[K, V], key: K): bool =
## Checks if key is in the hashmap
var entry = findEntry(self.entries, key, self.capacity)
if entry.key.isSome():
let entry = findEntry(self.entries, key, self.capacity)
if entry.key.isSome() and not entry.tombstone:
result = true
else:
result = false
iterator keys*[K, V](self: ptr HashMap[K, V]): K =
## Yields all the keys in the hashmap
## Yields all the keys in the hashmap. This
## is the lowest-level iterator we have and it's
## the only one actually dealing with pointers
## and all that good stuff. All other iterators
## are based on this
var entry: ptr Entry[K, V]
for i in countup(0, self.capacity - 1):
entry = self.entries[i]
if entry.key.isSome():
if entry.key.isSome() and not entry.tombstone:
yield entry.key.get()
iterator values*[K, V](self: ptr HashMap[K, V]): V =
## Yields all the values in the hashmap
## Yields all the values in the hashmap.
## This could *technically* be slightly more
## efficient if we just iterated over our
## entries directly, but if we can't take
## advantage of our constant lookup time
## then what's the point? :)
for key in self.keys():
yield self[key]
@ -214,17 +248,4 @@ proc `$`*[K, V](self: ptr HashMap[K, V]): string =
proc typeName*[K, V](self: ptr HashMap[K, V]): string =
result = "dict"
var d = newHashMap[int, int]()
d[1] = 55
d[2] = 876
d[3] = 7890
d[4] = 55
d[5] = 435
d[6] = 567
d[7] = 21334 ## Adjust capacity (75% full)
d[8] = 9768
d[9] = 235
echo d
result = "dict"

View File

@ -46,3 +46,5 @@ proc hash*(self: ptr Nil): uint64 =
proc eq*(self, other: ptr Nil): bool =
result = true

View File

@ -28,7 +28,7 @@ import typeutils
import ../config
import ../memory
import ../meta/opcode
import arraylist
import arrayList
import exception
import strformat

View File

@ -48,7 +48,7 @@ proc isCallable*(obj: ptr Obj): bool =
result = obj.kind in {ObjectType.Function, ObjectType.Class, ObjectType.Native}
proc isNil*(obj: ptr Obj): bool =
proc isJaplNil*(obj: ptr Obj): bool =
## Returns true if the given obj
## is a JAPL nil object
result = obj.kind == ObjectType.Nil

View File

@ -18,19 +18,21 @@
import ../meta/opcode
import ../types/baseObject
import ../types/methods
import ../types/arraylist
import ../types/arrayList
import strformat
import terminal
import ../multibyte
proc printName(name: string) =
setForegroundColor(fgGreen)
setForegroundColor(fgRed)
write stdout, name
setForegroundColor(fgDefault)
setForegroundColor(fgGreen)
proc nl =
write stdout, "\n"
proc simpleInstruction(name: string, index: int): int =
write stdout, &"DEBUG - VM:\tInstruction -> "
printName(name)
@ -54,10 +56,19 @@ proc constantInstruction(name: string, chunk: Chunk, offset: int): int =
copyMem(constant.addr, constantArray.addr, sizeof(constantArray))
write stdout, &"DEBUG - VM:\tInstruction -> "
printName(name)
write stdout, &", points to slot {constant}"
write stdout, &", points to slot "
setForegroundColor(fgYellow)
write stdout, &"{constant}"
nl()
let obj = chunk.consts[constant]
echo &"DEBUG - VM:\tOperand -> {stringify(obj)}\nDEBUG - VM:\tValue kind -> {obj.kind}"
setForegroundColor(fgGreen)
stdout.write(&"DEBUG - VM:\tOperand -> ")
setForegroundColor(fgYellow)
stdout.write(&"{stringify(obj)}")
setForegroundColor(fgGreen)
stdout.write("\nDEBUG - VM:\tValue kind -> ")
setForegroundColor(fgYellow)
stdout.write(&"{obj.kind}\n")
return offset + 4
@ -72,7 +83,15 @@ proc jumpInstruction(name: string, chunk: Chunk, offset: int): int =
proc disassembleInstruction*(chunk: Chunk, offset: int): int =
## Takes one bytecode instruction and prints it
echo &"DEBUG - VM:\tOffset: {offset}\nDEBUG - VM:\tLine: {chunk.lines[offset]}"
setForegroundColor(fgGreen)
stdout.write(&"DEBUG - VM:\tOffset: ")
setForegroundColor(fgYellow)
stdout.write(&"{offset}")
setForegroundColor(fgGreen)
stdout.write("\nDEBUG - VM:\tLine: ")
setForegroundColor(fgYellow)
stdout.write(&"{chunk.lines[offset]}\n")
setForegroundColor(fgGreen)
var opcode = OpCode(chunk.code[offset])
case opcode:
of simpleInstructions:

View File

@ -18,8 +18,6 @@
## Standard library imports
import strformat
import tables
import std/enumerate
## Our modules
import config
when not SKIP_STDLIB_INIT:
@ -37,16 +35,16 @@ import types/methods
import types/typeutils
import types/function
import types/native
import types/arraylist
import types/arrayList
import types/hashMap
import multibyte
# We always import it to
# avoid the compiler complaining
# about functions not existing
# in production builds
import util/debug
when DEBUG_TRACE_VM:
import terminal
import util/debug
import terminal
type
@ -57,7 +55,7 @@ type
Ok,
CompileError,
RuntimeError
VM* = ref object
VM* = object
## A wrapper around the virtual machine
## functionality. Using custom heap allocated
## types for everything might sound excessive,
@ -69,7 +67,7 @@ type
frames*: ptr ArrayList[CallFrame]
stack*: ptr ArrayList[ptr Obj]
objects*: ptr ArrayList[ptr Obj]
globals*: Table[string, ptr Obj] # TODO: Custom hashmap
globals*: ptr HashMap[string, ptr Obj]
cached*: array[6, ptr Obj]
file*: ptr String
@ -81,7 +79,7 @@ func handleInterrupt() {.noconv.} =
raise newException(KeyboardInterrupt, "Ctrl+C")
proc initStack*(self: VM) =
proc initStack*(self: var VM) =
## Initializes the VM's stack, frame stack
## and objects arraylist
when DEBUG_TRACE_VM:
@ -190,7 +188,7 @@ proc peek*(self: VM, distance: int): ptr Obj =
return self.stack[self.stack.high() - distance]
proc call(self: VM, function: ptr Function, argCount: int): bool =
proc call(self: var VM, function: ptr Function, argCount: int): bool =
## Sets up the call frame and performs error checking
## when calling callables
if argCount < function.arity:
@ -211,7 +209,7 @@ proc call(self: VM, function: ptr Function, argCount: int): bool =
return true
proc call(self: VM, native: ptr Native, argCount: int): bool =
proc call(self: var VM, native: ptr Native, argCount: int): bool =
## Does the same as self.call, but with native functions
if argCount != native.arity and native.arity != -1:
self.error(newTypeError(&"function '{stringify(native.name)}' takes {native.arity} argument(s), got {argCount}"))
@ -245,7 +243,7 @@ proc call(self: VM, native: ptr Native, argCount: int): bool =
return true
proc callObject(self: VM, callee: ptr Obj, argCount: uint8): bool =
proc callObject(self: var VM, callee: ptr Obj, argCount: uint8): bool =
## Wrapper around call() to do type checking
if callee.isCallable():
case callee.kind:
@ -260,7 +258,7 @@ proc callObject(self: VM, callee: ptr Obj, argCount: uint8): bool =
return false
proc defineGlobal*(self: VM, name: string, value: ptr Obj) =
proc defineGlobal*(self: var VM, name: string, value: ptr Obj) =
## Adds a key-value couple to the VM's global scope
self.globals[name] = value
@ -300,46 +298,77 @@ when DEBUG_TRACE_VM:
## state of the virtual machine
let view = frame.getView()
setForegroundColor(fgYellow)
setForegroundColor(fgMagenta)
if iteration > 1:
echo "" # To separate different iterations
stdout.write("DEBUG - VM: General information\n")
stdout.write(&"DEBUG - VM:\tIteration -> {iteration}\n")
setForegroundColor(fgDefault)
stdout.write("DEBUG - VM:\tStack -> [")
setForegroundColor(fgGreen)
stdout.write(&"DEBUG - VM:\tIteration -> ")
setForegroundColor(fgYellow)
stdout.write(&"{iteration}\n")
setForegroundColor(fgGreen)
stdout.write("DEBUG - VM:\tStack -> ")
setForegroundColor(fgYellow)
stdout.write("[")
for i, v in self.stack:
stdout.write(stringify(v))
if i < self.stack.high():
stdout.write(", ")
stdout.write("]\nDEBUG - VM: \tGlobals -> {")
for i, (k, v) in enumerate(self.globals.pairs()):
stdout.write("]")
setForegroundColor(fgGreen)
stdout.write("\nDEBUG - VM: \tGlobals -> ")
setForegroundColor(fgYellow)
stdout.write("{")
var i = 0
for k, v in self.globals.pairs():
stdout.write(&"'{k}': {stringify(v)}")
if i < self.globals.len() - 1:
stdout.write(", ")
stdout.write("}\nDEBUG - VM: Frame information\n")
i += 1
stdout.write("}")
setForegroundColor(fgMagenta)
stdout.write("\nDEBUG - VM: Frame information\n")
setForegroundColor(fgGreen)
stdout.write("DEBUG - VM:\tType -> ")
setForegroundColor(fgYellow)
if frame.function.name == nil:
stdout.write("main\n")
else:
stdout.write(&"function, '{frame.function.name.stringify()}'\n")
echo &"DEBUG - VM:\tCount -> {self.frames.len()}"
echo &"DEBUG - VM:\tLength -> {view.len}"
setForegroundColor(fgGreen)
stdout.write(&"DEBUG - VM:\tCount -> ")
setForegroundColor(fgYellow)
stdout.write(&"{self.frames.len()}\n")
setForegroundColor(fgGreen)
stdout.write(&"DEBUG - VM:\tLength -> ")
setForegroundColor(fgYellow)
stdout.write(&"{view.len}\n")
setForegroundColor(fgGreen)
stdout.write("DEBUG - VM:\tTable -> ")
setForegroundColor(fgYellow)
stdout.write("[")
for i, e in frame.function.chunk.consts:
stdout.write(stringify(e))
if i < len(frame.function.chunk.consts) - 1:
stdout.write(", ")
stdout.write("]\nDEBUG - VM:\tStack view -> ")
stdout.write("]")
setForegroundColor(fgGreen)
stdout.write("\nDEBUG - VM:\tStack view -> ")
setForegroundColor(fgYellow)
stdout.write("[")
for i, e in view:
stdout.write(stringify(e))
if i < len(view) - 1:
stdout.write(", ")
stdout.write("]\n")
setForegroundColor(fgMagenta)
echo "DEBUG - VM: Current instruction"
setForegroundColor(fgGreen)
discard disassembleInstruction(frame.function.chunk, frame.ip - 1)
setForegroundColor(fgDefault)
proc run(self: VM): InterpretResult =
proc run(self: var VM): InterpretResult =
## Chews trough bytecode instructions executing
## them one at a time: this is the runtime's
## main loop
@ -487,9 +516,6 @@ proc run(self: VM): InterpretResult =
self.error(newTypeError(&"unsupported binary operator '**' for objects of type '{left.typeName()}' and '{right.typeName()}'"))
return RuntimeError
of OpCode.True:
## TODO: Make sure that even operations that can yield
## preallocated types, but do not have access to the VM,
## yield these cached types
self.push(cast[ptr Bool](self.getBoolean(true)))
of OpCode.False:
self.push(cast[ptr Bool](self.getBoolean(false)))
@ -571,7 +597,7 @@ proc run(self: VM): InterpretResult =
# This is implemented internally for obvious
# reasons and works on any pair of objects, which
# is why we call nim's system.== operator and NOT
# our custom one
# our custom one's
var right = self.pop()
var left = self.pop()
self.push(self.getBoolean(system.`==`(left, right)))
@ -727,7 +753,7 @@ proc freeVM*(self: VM) =
proc initCache(self: VM) =
proc initCache(self: var VM) =
## Initializes the static cache for singletons
## such as true and false
@ -754,7 +780,7 @@ proc initCache(self: VM) =
self.cached[4] = nInf.asObj()
proc initStdlib*(vm: VM) =
proc initStdlib*(vm: var VM) =
## Initializes the VM's standard library by defining builtin
## functions that do not require imports. An arity of -1
## means that the function is variadic (or that it can
@ -784,7 +810,7 @@ proc initVM*(): VM =
## and internal data structures
when DEBUG_TRACE_VM:
echo &"DEBUG - VM: Initializing the virtual machine, {JAPL_VERSION_STRING}"
result = VM(globals: initTable[string, ptr Obj]())
result = VM(globals: newHashMap[string, ptr Obj]())
result.initStack()
result.initCache()
result.initStdlib()
@ -795,7 +821,7 @@ proc initVM*(): VM =
proc interpret*(self: VM, source: string, file: string): InterpretResult =
proc interpret*(self: var VM, source: string, file: string): InterpretResult =
## Interprets a source string containing JAPL code
when DEBUG_TRACE_VM:
echo &"DEBUG - VM: Preparing to run '{file}'"