string interning

This commit is contained in:
prod2 2022-01-29 04:22:21 +01:00
parent 9d6576aaeb
commit 6961c91468
6 changed files with 78 additions and 48 deletions

View File

@ -1,14 +1,8 @@
import ../types/hashtable
import ../types/ndstring
import strformat
proc hash*(str: string): int =
var hash = 2166136261'u32
for i in countup(0, str.len - 1):
hash = hash xor (str[i]).uint32
hash *= 16777619
return hash.int
proc testHashtables* =
var tbl = newTable[string, int]()

View File

@ -1,6 +1,8 @@
# The hash table implementation for string interning
import strformat
import ndstring
import bitops
const tableMaxLoad = 0.75
const tableInitSize = 8
@ -27,28 +29,21 @@ proc free*[U, V](tbl: var Table[U, V]) =
if tbl.entries != nil:
dealloc(tbl.entries)
proc isNil[U, V](entry: ptr Entry[U, V]): bool {.inline.} =
entry[].entryStatus == esNil
proc isTombstone[U, V](entry: ptr Entry[U, V]): bool {.inline.} =
entry[].entryStatus == esTombstone
proc isAlive[U, V](entry: ptr Entry[U, V]): bool {.inline.} =
entry[].entryStatus == esAlive
proc findEntry[U, V](entries: ptr UncheckedArray[Entry[U, V]], cap: int, key: U): ptr Entry[U, V] =
var index = key.hash() mod cap # TODO replace mod with sth better
var index = key.fnv1a().bitand(cap - 1)
var tombstone: ptr Entry[U, V] = nil
while true:
let entry: ptr Entry[U, V] = entries[index].addr # TODO: check the performance impact of this line
if entry.isNil():
return if tombstone != nil: tombstone else: entry
elif entry.isTombstone(): # TODO: optimalization: case statement
if tombstone == nil:
tombstone = entry
elif entry[].key == key:
return entry
index = (index + 1) mod cap # TODO replace mod with sth better
case entry[].entryStatus:
of esNil:
return if tombstone != nil: tombstone else: entry
of esTombstone:
if tombstone == nil:
tombstone = entry
of esAlive:
if entry[].key.equal(key):
return entry
index = (index + 1).bitand(cap - 1)
proc grow[U, V](tbl: var Table[U, V]): int {.inline.} =
## Calculates the new capacity
@ -99,20 +94,31 @@ proc tableGet*[U, V](tbl: Table[U, V], key: U, val: var V): bool =
return false
let entry = findEntry(tbl.entries, tbl.cap, key)
if not entry.isAlive():
if not entry[].entryStatus == esAlive:
return false
val = entry[].value
return true
proc tableFindString*(tbl: Table[NdString, NdString], chars: ptr char, len: int, hash: int): NdString =
if tbl.count == 0:
return nil
var index = hash.bitand(tbl.cap - 1)
while true:
let entry = tbl.entries[index]
if entry.entryStatus == esNil:
return nil
elif entry.key.len.int == len and entry.key.hash.int == hash and
equalMem(chars, entry.key.chars[0].unsafeAddr, len):
return entry.key
index = (index + 1).bitand(tbl.cap - 1)
proc tableDelete*[U, V](tbl: Table[U, V], key: U): bool =
if tbl.count == 0:
return false
let entry = findEntry(tbl.entries, tbl.cap, key)
if not entry.isAlive():
if not entry[].entryStatus == esAlive:
return false
entry[].entryStatus = esTombstone

View File

@ -1,29 +1,21 @@
type
NdString* = ptr object
len: uint32
chars: UncheckedArray[char]
proc newString*(str: string): NdString =
let strlen = str.len()
let len = 4 + strlen
result = cast[NdString](alloc(len))
result.len = strlen.uint32
copyMem(result.chars[0].unsafeAddr, str[0].unsafeAddr, strlen)
proc `$`*(ndStr: NdString): string =
result = newString(ndStr.len.int)
copyMem(result[0].unsafeAddr, ndStr.chars[0].unsafeAddr, ndStr.len.int)
proc `&`*(left, right: NdString): NdString =
# TODO optimize this later when strings will be benchmarked
newString($left & $right)
len*: uint32
hash*: uint32
chars*: UncheckedArray[char]
proc free*(ndStr: var NdString) =
dealloc(ndStr)
proc hash*(ndStr: NdString): int =
# for hashtable:
proc fnv1a*(ndStr: NdString): int =
var hash = 2166136261'u32
for i in countup(0, ndStr.len.int - 1):
hash = hash xor (ndStr.chars[i]).uint32
hash *= 16777619
return hash.int
return hash.int
proc equal*(left, right: NdString): bool =
left == right

36
types/stringutils.nim Normal file
View File

@ -0,0 +1,36 @@
import hashtable
import ndstring
# string extension, NOTE identical implementation in ndstring.nim
proc fnv1a*(str: string): int =
var hash = 2166136261'u32
for i in countup(0, str.len - 1):
hash = hash xor (str[i]).uint32
hash *= 16777619
return hash.int
var ndStrings = newTable[NdString, NdString]()
proc newString*(str: string): NdString =
let strlen = str.len()
let hash = str.fnv1a()
let interned = ndStrings.tableFindString(str[0].unsafeAddr, strlen, hash)
if interned != nil:
return interned
let len = 4 + strlen
result = cast[NdString](alloc(len))
result.len = strlen.uint32
result.hash = hash.uint32
copyMem(result.chars[0].unsafeAddr, str[0].unsafeAddr, strlen)
discard ndStrings.tableSet(result, nil)
proc `$`*(ndStr: NdString): string =
result = newString(ndStr.len.int)
copyMem(result[0].unsafeAddr, ndStr.chars[0].unsafeAddr, ndStr.len.int)
proc `&`*(left, right: NdString): NdString =
# TODO optimize this later when strings will be benchmarked
newString($left & $right)

View File

@ -1,5 +1,6 @@
import strformat
import types/ndstring
import types/stringutils
type
NdType* = enum

1
vm.nim
View File

@ -8,6 +8,7 @@ import pointerutils
import types/stack
import types/ndstring
import types/stringutils
when profileInstructions:
import times