Skip to content

Commit 6dd0355

Browse files
committed
bloom
1 parent f6e2483 commit 6dd0355

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

lazy_iterate.jl

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
using BitIntegers
2+
3+
using Base: count_ones
4+
import Base: in!, iterate, IteratorSize
5+
6+
function _hash_bloom(x, seeds, T)
7+
out = zero(T)
8+
for h in seeds
9+
out |= one(T)<<(hash(x, h) % (8sizeof(T)))
10+
end
11+
return out
12+
end
13+
14+
mutable struct BloomFilter{T,K}
15+
memory::T
16+
const seeds::NTuple{K, UInt64}
17+
const p::Float64 # max tolerable false positive rate
18+
function BloomFilter{T,K}(p, seeds) where {T<:Unsigned,K}
19+
new{T,K}(zero(T), seeds, p)
20+
end
21+
end
22+
23+
function BloomFilter(; k=5, p=1/16)
24+
return BloomFilter{UInt64, k}(p, ntuple(_ -> rand(UInt), k))
25+
end
26+
27+
function in!(x, f::BloomFilter{T}) where {T}
28+
h = _hash_bloom(x, f.seeds, T)
29+
r = (f.memory & h) == h
30+
f.memory |= h
31+
return r
32+
end
33+
34+
function false_positive_rate(f::BloomFilter{T,K}) where {T,K}
35+
pop = count_ones(f.memory)
36+
bits = 8sizeof(T)
37+
return (pop/bits)^K
38+
end
39+
40+
struct Unique
41+
f::Set
42+
g
43+
end
44+
45+
function iterate(u::Unique)
46+
x, next = iterate(u.g)
47+
push!(u.f, x)
48+
return (x, next)
49+
end
50+
51+
function iterate(u::Unique, state)
52+
it = iterate(u.g, state)
53+
if isnothing(it)
54+
return nothing
55+
else
56+
x, next = it
57+
p = in!(x, u.f)
58+
if !p # definitely unique
59+
return (x, next)
60+
else
61+
return iterate(u, next)
62+
end
63+
end
64+
end
65+
66+
IteratorSize(::Unique) = Base.SizeUnknown()

0 commit comments

Comments
 (0)