string interning
This commit is contained in:
parent
9d6576aaeb
commit
6961c91468
|
@ -1,14 +1,8 @@
|
|||
import ../types/hashtable
|
||||
import ../types/ndstring
|
||||
|
||||
import strformat
|
||||
|
||||
proc hash*(str: string): int =
|
||||
var hash = 2166136261'u32
|
||||
for i in countup(0, str.len - 1):
|
||||
hash = hash xor (str[i]).uint32
|
||||
hash *= 16777619
|
||||
return hash.int
|
||||
|
||||
proc testHashtables* =
|
||||
var tbl = newTable[string, int]()
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# The hash table implementation for string interning
|
||||
|
||||
import strformat
|
||||
import ndstring
|
||||
|
||||
import bitops
|
||||
|
||||
const tableMaxLoad = 0.75
|
||||
const tableInitSize = 8
|
||||
|
@ -27,28 +29,21 @@ proc free*[U, V](tbl: var Table[U, V]) =
|
|||
if tbl.entries != nil:
|
||||
dealloc(tbl.entries)
|
||||
|
||||
proc isNil[U, V](entry: ptr Entry[U, V]): bool {.inline.} =
|
||||
entry[].entryStatus == esNil
|
||||
|
||||
proc isTombstone[U, V](entry: ptr Entry[U, V]): bool {.inline.} =
|
||||
entry[].entryStatus == esTombstone
|
||||
|
||||
proc isAlive[U, V](entry: ptr Entry[U, V]): bool {.inline.} =
|
||||
entry[].entryStatus == esAlive
|
||||
|
||||
proc findEntry[U, V](entries: ptr UncheckedArray[Entry[U, V]], cap: int, key: U): ptr Entry[U, V] =
|
||||
var index = key.hash() mod cap # TODO replace mod with sth better
|
||||
var index = key.fnv1a().bitand(cap - 1)
|
||||
var tombstone: ptr Entry[U, V] = nil
|
||||
while true:
|
||||
let entry: ptr Entry[U, V] = entries[index].addr # TODO: check the performance impact of this line
|
||||
if entry.isNil():
|
||||
return if tombstone != nil: tombstone else: entry
|
||||
elif entry.isTombstone(): # TODO: optimalization: case statement
|
||||
if tombstone == nil:
|
||||
tombstone = entry
|
||||
elif entry[].key == key:
|
||||
return entry
|
||||
index = (index + 1) mod cap # TODO replace mod with sth better
|
||||
case entry[].entryStatus:
|
||||
of esNil:
|
||||
return if tombstone != nil: tombstone else: entry
|
||||
of esTombstone:
|
||||
if tombstone == nil:
|
||||
tombstone = entry
|
||||
of esAlive:
|
||||
if entry[].key.equal(key):
|
||||
return entry
|
||||
index = (index + 1).bitand(cap - 1)
|
||||
|
||||
proc grow[U, V](tbl: var Table[U, V]): int {.inline.} =
|
||||
## Calculates the new capacity
|
||||
|
@ -99,20 +94,31 @@ proc tableGet*[U, V](tbl: Table[U, V], key: U, val: var V): bool =
|
|||
return false
|
||||
|
||||
let entry = findEntry(tbl.entries, tbl.cap, key)
|
||||
if not entry.isAlive():
|
||||
if not entry[].entryStatus == esAlive:
|
||||
return false
|
||||
|
||||
val = entry[].value
|
||||
return true
|
||||
|
||||
|
||||
proc tableFindString*(tbl: Table[NdString, NdString], chars: ptr char, len: int, hash: int): NdString =
|
||||
if tbl.count == 0:
|
||||
return nil
|
||||
var index = hash.bitand(tbl.cap - 1)
|
||||
while true:
|
||||
let entry = tbl.entries[index]
|
||||
if entry.entryStatus == esNil:
|
||||
return nil
|
||||
elif entry.key.len.int == len and entry.key.hash.int == hash and
|
||||
equalMem(chars, entry.key.chars[0].unsafeAddr, len):
|
||||
return entry.key
|
||||
index = (index + 1).bitand(tbl.cap - 1)
|
||||
|
||||
proc tableDelete*[U, V](tbl: Table[U, V], key: U): bool =
|
||||
if tbl.count == 0:
|
||||
return false
|
||||
|
||||
let entry = findEntry(tbl.entries, tbl.cap, key)
|
||||
if not entry.isAlive():
|
||||
if not entry[].entryStatus == esAlive:
|
||||
return false
|
||||
|
||||
entry[].entryStatus = esTombstone
|
||||
|
|
|
@ -1,29 +1,21 @@
|
|||
|
||||
type
|
||||
NdString* = ptr object
|
||||
len: uint32
|
||||
chars: UncheckedArray[char]
|
||||
|
||||
proc newString*(str: string): NdString =
|
||||
let strlen = str.len()
|
||||
let len = 4 + strlen
|
||||
result = cast[NdString](alloc(len))
|
||||
result.len = strlen.uint32
|
||||
copyMem(result.chars[0].unsafeAddr, str[0].unsafeAddr, strlen)
|
||||
|
||||
proc `$`*(ndStr: NdString): string =
|
||||
result = newString(ndStr.len.int)
|
||||
copyMem(result[0].unsafeAddr, ndStr.chars[0].unsafeAddr, ndStr.len.int)
|
||||
|
||||
proc `&`*(left, right: NdString): NdString =
|
||||
# TODO optimize this later when strings will be benchmarked
|
||||
newString($left & $right)
|
||||
len*: uint32
|
||||
hash*: uint32
|
||||
chars*: UncheckedArray[char]
|
||||
|
||||
proc free*(ndStr: var NdString) =
|
||||
dealloc(ndStr)
|
||||
|
||||
proc hash*(ndStr: NdString): int =
|
||||
# for hashtable:
|
||||
|
||||
proc fnv1a*(ndStr: NdString): int =
|
||||
var hash = 2166136261'u32
|
||||
for i in countup(0, ndStr.len.int - 1):
|
||||
hash = hash xor (ndStr.chars[i]).uint32
|
||||
hash *= 16777619
|
||||
return hash.int
|
||||
return hash.int
|
||||
|
||||
proc equal*(left, right: NdString): bool =
|
||||
left == right
|
|
@ -0,0 +1,36 @@
|
|||
import hashtable
|
||||
import ndstring
|
||||
|
||||
# string extension, NOTE identical implementation in ndstring.nim
|
||||
proc fnv1a*(str: string): int =
|
||||
var hash = 2166136261'u32
|
||||
for i in countup(0, str.len - 1):
|
||||
hash = hash xor (str[i]).uint32
|
||||
hash *= 16777619
|
||||
return hash.int
|
||||
|
||||
var ndStrings = newTable[NdString, NdString]()
|
||||
|
||||
proc newString*(str: string): NdString =
|
||||
let strlen = str.len()
|
||||
let hash = str.fnv1a()
|
||||
|
||||
let interned = ndStrings.tableFindString(str[0].unsafeAddr, strlen, hash)
|
||||
if interned != nil:
|
||||
return interned
|
||||
|
||||
let len = 4 + strlen
|
||||
result = cast[NdString](alloc(len))
|
||||
result.len = strlen.uint32
|
||||
result.hash = hash.uint32
|
||||
copyMem(result.chars[0].unsafeAddr, str[0].unsafeAddr, strlen)
|
||||
|
||||
discard ndStrings.tableSet(result, nil)
|
||||
|
||||
proc `$`*(ndStr: NdString): string =
|
||||
result = newString(ndStr.len.int)
|
||||
copyMem(result[0].unsafeAddr, ndStr.chars[0].unsafeAddr, ndStr.len.int)
|
||||
|
||||
proc `&`*(left, right: NdString): NdString =
|
||||
# TODO optimize this later when strings will be benchmarked
|
||||
newString($left & $right)
|
|
@ -1,5 +1,6 @@
|
|||
import strformat
|
||||
import types/ndstring
|
||||
import types/stringutils
|
||||
|
||||
type
|
||||
NdType* = enum
|
||||
|
|
Loading…
Reference in New Issue