读代码:Dict的实现
本文以 PR #37568之后的代码为基准
首先看一下 mutable struct Dict
部分
mutable struct Dict{K,V} <: AbstractDict{K,V}
# Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
slots::Vector{UInt8}
keys::Array{K,1}
vals::Array{V,1}
ndel::Int
count::Int
age::UInt
idxfloor::Int # an index <= the indices of all used slots
maxprobe::Int
# ...
end
其中基本的构造函数
function Dict{K,V}() where V where K
n = 16
new(zeros(UInt8,n), Vector{K}(undef, n), Vector{V}(undef, n), 0, 0, 0, 1, 0)
end
这意味着初始化时默认预分配长度为16的空间
其它的构造函数阅读即可,继续往下翻到第一个 Dict
相关代码
@propagate_inbounds isslotempty(h::Dict, i::Int) = h.slots[i] == 0x00
@propagate_inbounds isslotfilled(h::Dict, i::Int) = (h.slots[i] & 0x80) != 0
@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x7f
这表明 slots
字段用于表示指定位置的状态
先跳过几个函数,阅读 ht_keyindex
# get the index where a key is stored, or -1 if not present
function ht_keyindex(h::Dict{K,V}, key) where V where K
isempty(h) && return -1
sz = length(h.keys)
iter = 0
maxprobe = h.maxprobe
index, sh = hashindex(key, sz)
keys = h.keys
@inbounds while true
isslotempty(h,index) && return -1
if h.slots[index] == sh
k = keys[index]
if (key === k || isequal(key, k))
return index
end
end
index = (index & (sz-1)) + 1
(iter += 1) > maxprobe && return -1
end
# This line is unreachable
end
其中调用了一次 hashindex
# Gets 7 most significant bits from the hash (hsh), first bit is 1
_shorthash7(hsh::UInt32) = (hsh >> UInt(25))%UInt8 | 0x80
_shorthash7(hsh::UInt64) = (hsh >> UInt(57))%UInt8 | 0x80
# hashindex (key, sz) - computes optimal position and shorthash7
# idx - optimal position in the hash table
# sh::UInt8 - short hash (7 highest hash bits)
function hashindex(key, sz)
hsh = hash(key)::UInt
idx = (((hsh % Int) & (sz-1)) + 1)::Int
return idx, _shorthash7(hsh)
end
我们知道,hash
用于将数据映射到整数,并保证 x==y
=> hash(x) == hash(y)
。 通过分析,可以得出:这里 hashindex
得到的第一个值限制在 [1, sz]
之间,且 x==y
=> hashindex(x) == hashindex(y)
,而第二个值取出高七位与slots
中数据比较
那么这里带来了一个问题:对于 x !=y
,hashindex(x) == hashindex(y)
仍然有可能发生,这意味着不能直接返回 index
,而需在判断失败后继续尝试 (index & (sz-1)) + 1
为防止无限循环,有专门字段 maxprobe
表示最大尝试次数
ht_keyindex2_shorthash!
是一个更高级的函数,在未找到时自动得出插入位置
# get (index, sh) for the key
# index - where a key is stored, or -pos if not present
# and the key would be inserted at pos
# sh::UInt8 - short hash (7 highest hash bits)
# This version is for use by setindex! and get!
function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
sz = length(h.keys)
iter = 0
maxprobe = h.maxprobe
index, sh = hashindex(key, sz)
avail = 0
keys = h.keys
@inbounds while true
if isslotempty(h,index)
return (avail < 0 ? avail : -index), sh
end
if isslotmissing(h,index)
if avail == 0
# found an available slot, but need to keep scanning
# in case "key" already exists in a later collided slot.
avail = -index
end
elseif h.slots[index] == sh
k = keys[index]
if key === k || isequal(key, k)
return index, sh
end
end
index = (index & (sz-1)) + 1
iter += 1
iter > maxprobe && break
end
avail < 0 && return avail, sh
maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
# Check if key is not present, may need to keep searching to find slot
@inbounds while iter < maxallowed
if !isslotfilled(h,index)
h.maxprobe = iter
return -index, sh
end
index = (index & (sz-1)) + 1
iter += 1
end
rehash!(h, h.count > 64000 ? sz*2 : sz*4)
return ht_keyindex2_shorthash!(h, key)
end
其中使用了全局常量
const global maxallowedprobe = 16
const global maxprobeshift = 6