Added WIP implementation of a generic hashmap that should behave better with JAPL's own memory manager

This commit is contained in:
nocturn9x 2021-01-27 15:51:15 +01:00
parent fa9594f855
commit 787d34a8b5
7 changed files with 239 additions and 95 deletions

View File

@ -45,8 +45,8 @@ CONFIG_TEMPLATE = '''# Copyright 2020 Mattia Giambirtone
import strformat
const MAP_LOAD_FACTOR* = {map_load_factor} # Load factor for builtin hashmaps (TODO)
const ARRAY_GROW_FACTOR* = {array_grow_factor} # How much extra memory to allocate for dynamic arrays (TODO)
const MAP_LOAD_FACTOR* = {map_load_factor} # Load factor for builtin hashmaps
const ARRAY_GROW_FACTOR* = {array_grow_factor} # How much extra memory to allocate for dynamic arrays
const FRAMES_MAX* = {frames_max} # The maximum recursion limit
const JAPL_VERSION* = "0.3.0"
const JAPL_RELEASE* = "alpha"

View File

@ -26,8 +26,10 @@ import types/typeutils
import types/methods
proc repl() =
var bytecodeVM = initVM()
proc repl(bytecodeVM: VM) =
var bytecodeVM = bytecodeVM
if bytecodeVM == nil:
bytecodeVM = initVM()
echo JAPL_VERSION_STRING
echo &"[Nim {NimVersion} on {hostOs} ({hostCPU})]"
var source = ""
@ -58,7 +60,7 @@ proc repl() =
proc main(file: var string = "", fromString: bool = false, interactive: bool = false) =
var source: string
if file == "" and not fromString:
repl()
repl(nil)
return # We exit after the REPL has ran
if not fromString:
var sourceFile: File
@ -77,7 +79,7 @@ proc main(file: var string = "", fromString: bool = false, interactive: bool = f
var bytecodeVM = initVM()
discard bytecodeVM.interpret(source, file)
if interactive:
repl()
repl(bytecodeVM)
bytecodeVM.freeVM()

View File

@ -257,7 +257,7 @@ when isMainModule:
stdout.write("Lexer output: [")
var lexed = lexer.lex()
for i, el in lexed:
stdout.write($el[])
stdout.write($el)
if i < lexed.high():
stdout.write(", ")
stdout.write("]\n")

View File

@ -1,88 +0,0 @@
# Copyright 2020 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# WIP - Not working
import ../memory
import lenientops
import baseObject
import methods
import japlNil
import japlString
const HASH_MAP_LOAD_MAX = 0.75 # Hash map max load factor
type
Entry = ref object
key*: ptr Obj
value*: ptr Obj
HashMap = object of Obj
size*: int
capacity*: int
entries*: ptr UncheckedArray[Entry]
proc newHashMap(): HashMap =
result = HashMap(size: 0, capacity: 0, entries: nil)
proc freeHashMap(self: var HashMap) =
discard freeArray(self, self.entries, self.capacity)
self.size = 0
self.capacity = 0
self.entries = nil
proc findEntry(self: HashMap, key: ptr Obj): Entry =
var idx = (int key.hashValue) mod self.capacity
var entry: Entry
while true:
entry = self.entries[idx]
if entry.key.eq(key) or entry.key == nil:
result = entry
break
idx = idx + 1 mod self.capacity
proc adjustCapacity(self: var HashMap, capacity: int) =
var entries = allocate(UncheckedArray[Entry], Entry, capacity)
var i = 0
while i < capacity:
entries[i].key = nil
entries[i].value = cast[ptr Obj](asNil())
i += 1
self.entries = entries
self.capacity = capacity
proc setEntry(self: var HashMap, key: ptr Obj, value: ptr Obj): bool =
if self.size + 1 > self.capacity * HASH_MAP_LOAD_MAX:
var capacity = growCapacity(self.capacity)
self.adjustCapacity(capacity)
var entry: Entry = self.findEntry(key)
var isNewKey: bool = entry.key == nil
if isNewKey:
self.size += 1
entry.key = key
entry.value = value
result = isNewKey
when isMainModule:
var dictionary = newHashMap()
discard dictionary.setEntry(asObj[String]("helo".asStr()), asObj[String]("world".asStr()))
echo dictionary.findEntry(asObj[String]("helo".asStr())).value.toStr()

BIN
src/types/hashmap Executable file

Binary file not shown.

226
src/types/hashmap.nim Normal file
View File

@ -0,0 +1,226 @@
# Copyright 2020 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ../memory
import ../config
import baseObject
import methods
import iterable
import lenientops
import options
import hashes
import strformat
type
Entry*[K, V] = object
## Low-level object to store key/value pairs
key*: Option[K]
value*: Option[V]
tombstone*: bool
HashMap*[K, V] = object of Iterable
## An associative array with O(1) lookup time,
## similar to nim's Table type, but using raw
## memory to be more compatible with JAPL's runtime
## memory management
entries*: ptr UncheckedArray[ptr Entry[K, V]]
# This attribute counts *only* non-deleted entries
actual_length*: int
proc newHashMap*[K, V](): ptr HashMap[K, V] =
## Initializes a new, empty hashmap
result = allocateObj(HashMap[K, V], ObjectType.Dict)
result.actual_length = 0
result.entries = nil
result.capacity = 0
result.length = 0
proc freeHashMap*[K, V](self: ptr HashMap[K, V]) =
## Frees the memory associated with the hashmap
discard freeArray(UncheckedArray[ptr Entry[K, V]], self.entries, self.capacity)
self.length = 0
self.actual_length = 0
self.capacity = 0
self.entries = nil
proc findEntry[K, V](self: ptr UncheckedArray[ptr Entry[K, V]], key: K, capacity: int): ptr Entry[K, V] =
## Low-level method used to find entries in the underlying
## array, returns a pointer to an entry
var capacity = uint64(capacity)
var idx = uint64(key.hash()) mod capacity
var tombstone: ptr Entry[K, V] = nil
while true:
result = self[idx]
if result.key.isNone() and result.key.isSome():
# We found a tombstone
tombstone = result
elif result.key.isNone() or result.key.get() == key:
if tombstone != nil:
result = tombstone
result.tombstone = true
break
# If none of these conditions match, we have a collision!
# This means we can just move on to the next slot in our probe
# sequence until we find an empty slot. The way our resizing
# mechanism works makes the empty slot invariant easy to
# maintain since we increase the underlying array's size
# before we are actually full
idx += 1 mod capacity
proc adjustCapacity[K, V](self: ptr HashMap[K, V]) =
## Adjusts the capacity of the underlying array to make room
## for more entries. Low-level method, not recommended
var newCapacity = growCapacity(self.capacity)
var entries = allocate(UncheckedArray[ptr Entry[K, V]], Entry[K, V], newCapacity)
self.length = 0
var temp: ptr Entry[K, V]
for x in countup(0, newCapacity - 1):
entries[x] = allocate(Entry[K, V], Entry[K, V], 1)
temp = entries[x]
temp.key = none(K)
temp.value = none(V)
temp.tombstone = false
for x in countdown(self.capacity - 1, 0):
temp = self.entries[x]
if temp.key.isSome():
entries[x] = temp
self.length += 1
discard freeArray(UncheckedArray[ptr Entry[K, V]], self.entries, self.capacity)
self.entries = entries
self.capacity = newCapacity
proc setEntry[K, V](self: ptr HashMap[K, V], key: K, value: V): bool =
## Low-level method to set/replace an entry with a value
if self.length + 1 > self.capacity * MAP_LOAD_FACTOR:
# Since we always need at least some empty slots
# for our probe sequences to work properly, we
# always resize our underlying array before we're full.
# MAP_LOAD_FACTOR is a constant float between 0.0 and 1.0
# which determines the percentage of full buckets that's
# needed to start a resize operation
self.adjustCapacity()
var entry = findEntry(self.entries, key, self.capacity)
result = entry.key.isNone()
if result:
self.actual_length += 1
self.length += 1
entry.key = some(key)
entry.value = some(value)
proc `[]`*[K, V](self: ptr HashMap[K, V], key: K): V =
## Retrieves a value by key
var entry = findEntry(self.entries, key, self.capacity)
if entry.key.isNone():
raise newException(KeyError, &"Key not found: {key}")
result = entry.value.get()
proc `[]=`*[K, V](self: ptr HashMap[K, V], key: K, value: V) =
## Sets a value with the given key. If the key already
## exists it will be overwritten
discard self.setEntry(key, value)
proc del*[K, V](self: ptr HashMap[K, V], key: K) =
## Deletes an entry in the hashmap
if self.len() == 0:
raise newException(KeyError, &"delete from empty hashmap")
var entry = findEntry(self.entries, key, self.capacity)
if entry.key.isSome():
## We don't reset the value of the
## 'value' attribute because that
## makes us understand that this
## entry is a tombstone and not
## a truly full bucket
self.actual_length -= 1
entry.key = none(K)
else:
raise newException(KeyError, &"Key not found: {key}")
proc contains*[K, V](self: ptr HashMap[K, V], key: K): bool =
## Checks if key is in the hashmap
var entry = findEntry(self.entries, key, self.capacity)
if entry.key.isSome():
result = true
else:
result = false
iterator keys*[K, V](self: ptr HashMap[K, V]): K =
## Yields all the keys in the hashmap
var entry: ptr Entry[K, V]
for i in countup(0, self.capacity - 1):
entry = self.entries[i]
if entry.key.isSome():
yield entry.key.get()
iterator values*[K, V](self: ptr HashMap[K, V]): V =
## Yields all the values in the hashmap
for key in self.keys():
yield self[key]
iterator pairs*[K, V](self: ptr HashMap[K, V]): tuple[key: K, val: V] =
## Yields all the key/value pairs in the hashmap
for key in self.keys():
yield (key: key, val: self[key])
iterator items*[K, V](self: ptr HashMap[K, V]): K =
## Yields all the keys in the hashmap (for iteration)
for k in self.keys():
yield k
proc len*[K, V](self: ptr HashMap[K, V]): int =
## Returns the length of the hashmap
result = self.actual_length
proc `$`*[K, V](self: ptr HashMap[K, V]): string =
## Returns a string representation of the hashmap
var i = 0
result &= "{"
for key, value in self.pairs():
result &= &"{key}: {value}"
if i < self.len() - 1:
result &= ", "
i += 1
result &= "}"
var d = newHashMap[int, int]()
d[1] = 55
d[2] = 876
d[3] = 7890
d[4] = 55
d[5] = 435
d[6] = 567
d[7] = 21334 ## Adjust capacity (75% full)
d[8] = 9768
d[9] = 235
echo d

View File

@ -205,6 +205,10 @@ proc eq*(self, other: ptr Obj): bool =
discard
proc `==`*(self, other: ptr Obj): bool =
result = self.eq(other)
proc negate*(self: ptr Obj): returnType =
## Returns the result of -self or
## raises an error if the operation