-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbloom_filter.rb
78 lines (67 loc) · 1.96 KB
/
bloom_filter.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
require 'compsci/bloom_filter/openssl'
include CompSci
# simulate a db query that returns data for roughly 10% of queries
# a non-nil result means additional (hypothetical) processing is required
def db_query(num, mod: 10, elapsed: 0.01)
sleep elapsed
Zlib.crc32(num.to_s) if num % mod == 0
end
# check ARGV for config directives
klass = BloomFilter
ARGV.each { |arg|
if arg.match(/digest/i)
klass = BloomFilter::Digest
elsif arg.match(/openssl/i)
klass = BloomFilter::OpenSSL
end
}
bf = klass.new(bits: 1024)
puts klass.name
puts bf
puts
# now we want to preload the filter to match the db
# if the db returns a record, we will take hypothetical further action
# if the db returns nil, no action necessary
# so we want to load the filter with queries for which the db has records
# if the filter returns true, we still check the db
# the db will probably return true but maybe false (BF false positive)
# if the filter returns false, we're done, without checking the db
# first dump all known records into the filter
value_space = 999 # possible queries
found = 0
value_space.times { |i|
# since we're simulating a dump, these queries are free (elapsed: 0)
if db_query(i, elapsed: 0)
# a record was found; add it to the filter
found += 1
bf.add(i.to_s)
end
}
puts "Checked #{value_space} values to load the filter with #{found} items"
puts bf
puts
#################
puts "Running queries straight to db..."
t = Time.new
counts = Hash.new(0)
iters = 99
iters.times { |i|
puts format("%i: %s", i, db_query(i) || 'not found')
}
elapsed = Time.new - t
puts
puts format("%i queries direct to db: %.2f s elapsed", iters, elapsed)
puts
sleep 0.5
#################
puts "Running queries filtered by bloom filter..."
t = Time.new
counts = Hash.new(0)
iters = 99
iters.times { |i|
puts format("%i: %s", i, bf.include?(i.to_s) ? db_query(i) : 'filtered')
}
elapsed = Time.new - t
puts
puts format("%i filtered queries: %.2f s elapsed", iters, elapsed)
puts