读代码:Dict的实现

Note

本文以 PR #37568之后的代码为基准

首先看一下 mutable struct Dict 部分

mutable struct Dict{K,V} <: AbstractDict{K,V}
	# Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
	slots::Vector{UInt8}
	keys::Array{K,1}
	vals::Array{V,1}
	ndel::Int
	count::Int
	age::UInt
	idxfloor::Int  # an index <= the indices of all used slots
	maxprobe::Int
	# ...
end

其中基本的构造函数

function Dict{K,V}() where V where K
	n = 16
	new(zeros(UInt8,n), Vector{K}(undef, n), Vector{V}(undef, n), 0, 0, 0, 1, 0)
end

这意味着初始化时默认预分配长度为16的空间

其它的构造函数阅读即可,继续往下翻到第一个 Dict 相关代码

@propagate_inbounds isslotempty(h::Dict, i::Int) = h.slots[i] == 0x00
@propagate_inbounds isslotfilled(h::Dict, i::Int) = (h.slots[i] & 0x80) != 0
@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x7f

这表明 slots 字段用于表示指定位置的状态

先跳过几个函数,阅读 ht_keyindex

# get the index where a key is stored, or -1 if not present
function ht_keyindex(h::Dict{K,V}, key) where V where K
	isempty(h) && return -1
	sz = length(h.keys)
	iter = 0
	maxprobe = h.maxprobe
	index, sh = hashindex(key, sz)
	keys = h.keys

	@inbounds while true
		isslotempty(h,index) && return -1
		if h.slots[index] == sh
			k = keys[index]
			if (key ===  k || isequal(key, k))
				return index
			end
		end

		index = (index & (sz-1)) + 1
		(iter += 1) > maxprobe && return -1
	end
	# This line is unreachable
end

其中调用了一次 hashindex

# Gets 7 most significant bits from the hash (hsh), first bit is 1
_shorthash7(hsh::UInt32) = (hsh >> UInt(25))%UInt8 | 0x80
_shorthash7(hsh::UInt64) = (hsh >> UInt(57))%UInt8 | 0x80

# hashindex (key, sz) - computes optimal position and shorthash7
#    idx - optimal position in the hash table
#    sh::UInt8 - short hash (7 highest hash bits)
function hashindex(key, sz)
	hsh = hash(key)::UInt
	idx = (((hsh % Int) & (sz-1)) + 1)::Int
	return idx, _shorthash7(hsh)
end

我们知道,hash 用于将数据映射到整数,并保证 x==y => hash(x) == hash(y)。 通过分析,可以得出:这里 hashindex 得到的第一个值限制在 [1, sz] 之间,且 x==y => hashindex(x) == hashindex(y),而第二个值取出高七位与slots中数据比较

那么这里带来了一个问题:对于 x !=y hashindex(x) == hashindex(y) 仍然有可能发生,这意味着不能直接返回 index,而需在判断失败后继续尝试 (index & (sz-1)) + 1

为防止无限循环,有专门字段 maxprobe 表示最大尝试次数

ht_keyindex2_shorthash! 是一个更高级的函数,在未找到时自动得出插入位置

# get (index, sh) for the key
#    index - where a key is stored, or -pos if not present
#            and the key would be inserted at pos
#    sh::UInt8 - short hash (7 highest hash bits)
# This version is for use by setindex! and get!
function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
	sz = length(h.keys)
	iter = 0
	maxprobe = h.maxprobe
	index, sh = hashindex(key, sz)
	avail = 0
	keys = h.keys

	@inbounds while true
		if isslotempty(h,index)
			return (avail < 0 ? avail : -index), sh
		end

		if isslotmissing(h,index)
			if avail == 0
				# found an available slot, but need to keep scanning
				# in case "key" already exists in a later collided slot.
				avail = -index
			end
		elseif h.slots[index] == sh
			k = keys[index]
			if key === k || isequal(key, k)
				return index, sh
			end
		end

		index = (index & (sz-1)) + 1
		iter += 1
		iter > maxprobe && break
	end

	avail < 0 && return avail, sh

	maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
	# Check if key is not present, may need to keep searching to find slot
	@inbounds while iter < maxallowed
		if !isslotfilled(h,index)
			h.maxprobe = iter
			return -index, sh
		end
		index = (index & (sz-1)) + 1
		iter += 1
	end

	rehash!(h, h.count > 64000 ? sz*2 : sz*4)

	return ht_keyindex2_shorthash!(h, key)
end

其中使用了全局常量

const global maxallowedprobe = 16
const global maxprobeshift   = 6