Skip to content

Commit b3f1dfe

Browse files
committed
✨ Add SequenceSet#normalized?
Returns whether `#string` is fully normalized: entries have been sorted, deduplicated, and coalesced, and all entries are in normal form. In other words, `#normalized?` returns `true` if (and only if) `#string` is equal to `#normalized_string`. (`#entries` and `#elements` can be identical for non-normalized strings.) Because this is a new method, the benchmarks monkey-patch a naive implementation of the method for prior versions of net-imap without it.
1 parent 6af9120 commit b3f1dfe

File tree

3 files changed

+178
-5
lines changed

3 files changed

+178
-5
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
SAMPLES = Integer ENV.fetch("BENCHMARK_SAMPLES", 100)
7+
INPUT_COUNT = Integer ENV.fetch("BENCHMARK_INPUT_COUNT", 1000)
8+
MAX_INPUT = Integer ENV.fetch("BENCHMARK_MAX_INPUT", 1400)
9+
WARMUP_RUNS = Integer ENV.fetch("BENCHMARK_WARMUP_RUNS", 200)
10+
SHUFFLE_PCT = Float ENV.fetch("BENCHMARK_SHUFFLE_PCT", 0.2)
11+
ABNORMAL_PCT = Float ENV.fetch("BENCHMARK_ABNORMAL_PCT", 0.2)
12+
13+
def init_sets(count: 100, set_size: INPUT_COUNT, max: MAX_INPUT)
14+
Array.new(count) {
15+
Net::IMAP::SequenceSet.new(Array.new(set_size) { rand(1..max) })
16+
}
17+
end
18+
19+
def init_normal_sets(...)
20+
init_sets(...)
21+
end
22+
23+
def init_frozen_normal_sets(...)
24+
init_sets(...)
25+
.map(&:freeze)
26+
end
27+
28+
def shuffle_entries(seqset)
29+
case SHUFFLE_PCT
30+
in 1.0... then seqset.entries.shuffle
31+
in ...0.0 then raise RangeError, "SHUFFLE_PCT should be positive"
32+
else
33+
unsorted, entries = seqset.entries.partition { rand < SHUFFLE_PCT }
34+
unsorted.each do |entry|
35+
entries.insert(rand(0..entries.size), entry)
36+
end
37+
entries
38+
end
39+
end
40+
41+
def init_unsorted_sets(...)
42+
init_sets(...)
43+
.each do |seqset|
44+
entries = shuffle_entries(seqset)
45+
seqset.clear
46+
entries.each do |entry|
47+
seqset.append entry
48+
end
49+
end
50+
end
51+
52+
def abnormal_form(seqset)
53+
seqset.entries
54+
.map {|entry|
55+
if ABNORMAL_PCT < rand
56+
entry.is_a?(Range) ? "#{entry.begin}:#{entry.end || :*}" : entry
57+
elsif entry.is_a? Range
58+
"#{entry.end || "*"}:#{entry.begin}"
59+
else
60+
"#{entry}:#{entry}"
61+
end
62+
}
63+
.join(",")
64+
end
65+
66+
def init_abnormal_sets(...)
67+
init_sets(...)
68+
.each do |seqset|
69+
seqset.string = abnormal_form(seqset)
70+
end
71+
end
72+
73+
# Benchmark against a naive version that could be used in earlier releases
74+
unless Net::IMAP::SequenceSet.instance_methods.include?(:normalized?)
75+
class Net::IMAP::SequenceSet
76+
def normalized?
77+
@string.nil? || @string == normalized_string
78+
end
79+
end
80+
end
81+
82+
# warmup (esp. for JIT)
83+
WARMUP_RUNS.times do
84+
init_sets(count: 20, set_size: 100, max: 120).each do |set|
85+
set.normalized?
86+
end
87+
end
88+
89+
benchmark:
90+
- name: "normal (#string not called)"
91+
prelude: $sets = init_normal_sets
92+
script: $sets.sample.normalized?
93+
- name: "normal (#string called)"
94+
prelude: $sets = init_normal_sets.tap do _1.each(&:string) end
95+
script: $sets.sample.normalized?
96+
- name: "frozen and normal"
97+
prelude: $sets = init_frozen_normal_sets
98+
script: $sets.sample.normalized?
99+
- name: "unsorted"
100+
prelude: $sets = init_unsorted_sets
101+
script: $sets.sample.normalized?
102+
- name: "abnormal"
103+
prelude: $sets = init_abnormal_sets
104+
script: $sets.sample.normalized?
105+
106+
contexts:
107+
# n.b: can't use anything newer as the baseline: it's over 500x faster!
108+
- name: v0.5.12
109+
gems:
110+
net-imap: 0.5.12
111+
require: false
112+
- name: local
113+
prelude: |
114+
$LOAD_PATH.unshift "./lib"
115+
$allowed_to_profile = true # only profile local code
116+
require: false

lib/net/imap/sequence_set.rb

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ class IMAP
284284
# <tt>*</tt>.
285285
#
286286
# <i>Denormalized properties:</i>
287+
# - #normalized?: Returns whether #entries are sorted, deduplicated, and
288+
# coalesced, and all #string entries are in normalized form.
287289
# - #has_duplicates?: Returns whether the ordered entries repeat any
288290
# numbers.
289291
# - #count_duplicates: Returns the count of repeated numbers in the ordered
@@ -1696,6 +1698,53 @@ def xor!(other)
16961698
merge(other).subtract(both)
16971699
end
16981700

1701+
# Returns whether #string is fully normalized: entries have been sorted,
1702+
# deduplicated, and coalesced, and all entries are in normal form. See
1703+
# SequenceSet@Ordered+and+Normalized+sets.
1704+
#
1705+
# Net::IMAP::SequenceSet["1,3,5"].normalized? #=> true
1706+
# Net::IMAP::SequenceSet["20:30"].normalized? #=> true
1707+
#
1708+
# Net::IMAP::SequenceSet["3,5,1"].normalized? #=> false, not sorted
1709+
# Net::IMAP::SequenceSet["1,2,3"].normalized? #=> false, not coalesced
1710+
# Net::IMAP::SequenceSet["1:5,2"].normalized? #=> false, repeated number
1711+
#
1712+
# Net::IMAP::SequenceSet["1:1"].normalized? #=> false, number as range
1713+
# Net::IMAP::SequenceSet["5:1"].normalized? #=> false, backwards range
1714+
#
1715+
# Returns +true+ if (and only if) #string is equal to #normalized_string:
1716+
# seqset = Net::IMAP::SequenceSet["1:3,5"]
1717+
# seqset.string #=> "1:3,5"
1718+
# seqset.normalized_string #=> "1:3,5"
1719+
# seqset.entries #=> [1..3, 5]
1720+
# seqset.elements #=> [1..3, 5]
1721+
# seqset.normalized? #=> true
1722+
#
1723+
# seqset = Net::IMAP::SequenceSet["3,1,2"]
1724+
# seqset.string #=> "3,1,2"
1725+
# seqset.normalized_string #=> "1:3"
1726+
# seqset.entries #=> [3, 1, 2]
1727+
# seqset.elements #=> [1..3]
1728+
# seqset.normalized? #=> false
1729+
#
1730+
# Can return +false+ even when #entries and #elements are the same:
1731+
# seqset = Net::IMAP::SequenceSet["5:1"]
1732+
# seqset.string #=> "5:1"
1733+
# seqset.normalized_string #=> "1:5"
1734+
# seqset.entries #=> [1..5]
1735+
# seqset.elements #=> [1..5]
1736+
# seqset.normalized? #=> false
1737+
#
1738+
# Note that empty sets are normalized, even though they are not #valid?:
1739+
# seqset = Net::IMAP::SequenceSet.empty
1740+
# seqset.normalized? #=> true
1741+
# seqset.valid? #=> false
1742+
#
1743+
# Related: #normalize, #normalize!, #normalized_string
1744+
def normalized?
1745+
@string.nil? || normal_string?(@string)
1746+
end
1747+
16991748
# Returns a SequenceSet with a normalized string representation: entries
17001749
# have been sorted, deduplicated, and coalesced, and all entries
17011750
# are in normal form. Returns +self+ for frozen normalized sets, and a
@@ -1706,16 +1755,15 @@ def xor!(other)
17061755
# Net::IMAP::SequenceSet["1:5,3:7,10:9,10:11"].normalize
17071756
# #=> Net::IMAP::SequenceSet["1:7,9:11"]
17081757
#
1709-
# Related: #normalize!, #normalized_string
1758+
# Related: #normalize!, #normalized_string, #normalized?
17101759
def normalize
1711-
return self if frozen? && (@string.nil? || normal_string?(@string))
1712-
remain_frozen dup.normalize!
1760+
frozen? && normalized? ? self : remain_frozen(dup.normalize!)
17131761
end
17141762

17151763
# Resets #string to be sorted, deduplicated, and coalesced. Returns
17161764
# +self+. See SequenceSet@Ordered+and+Normalized+sets.
17171765
#
1718-
# Related: #normalize, #normalized_string
1766+
# Related: #normalize, #normalized_string, #normalized?
17191767
def normalize!
17201768
modifying! # redundant check, to normalize the error message for JRuby
17211769
@string = nil
@@ -1731,7 +1779,7 @@ def normalize!
17311779
#
17321780
# Returns +nil+ when the set is empty.
17331781
#
1734-
# Related: #normalize!, #normalize, #string, #to_s
1782+
# Related: #normalize!, #normalize, #string, #to_s, #normalized?
17351783
def normalized_string
17361784
@tuples.empty? ? nil : -@tuples.map { tuple_to_str _1 }.join(",")
17371785
end

test/net/imap/test_sequence_set.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,6 +1299,15 @@ def assert_seqset_enum(expected, seqset, enum)
12991299
assert_equal data[:normalize], set.normalized_string
13001300
end
13011301

1302+
test "#normalized?" do |data|
1303+
set = SequenceSet.new(data[:input])
1304+
eql = set.normalized_string == set.string
1305+
msg = "expect string=%p and normalized_string=%p %s normal" % [
1306+
set.string, set.normalized_string, eql ? "to be" : "not to be"
1307+
]
1308+
assert set.normalized? == eql, msg
1309+
end
1310+
13021311
test "#normalize" do |data|
13031312
set = SequenceSet.new(data[:input])
13041313
assert_equal data[:normalize], set.normalize.string

0 commit comments

Comments
 (0)