Skip to content

Commit 56ed006

Browse files
testing different circuit breaking scenarios (#794)
* testing different circuit breaking scenarios * adding concurrency * adds more puts to get further information during phases * Fixing concurrency, unprotected ping, extras * update classic sustained test * cleaning up outdated tests, and testing without ping rate * modify ki instead of dividing by window size --------- Co-authored-by: Abdulrahman Alhamali <[email protected]>
1 parent 86a733e commit 56ed006

File tree

7 files changed

+526
-13
lines changed

7 files changed

+526
-13
lines changed

experiments/experimental_resource.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,14 @@ def current_latency_degradation
172172
end
173173
end
174174

175+
def unprotected_ping
176+
if should_fail?(current_error_rate)
177+
raise RequestError, "Ping failed"
178+
else
179+
true
180+
end
181+
end
182+
175183
# Get current error rate (accounting for ramp-up)
176184
def current_error_rate
177185
return @error_rate_degradation[:rate] unless @error_rate_degradation[:ramp_start] && @error_rate_degradation[:ramp_duration] > 0

experiments/sustained_load.png

244 KB
Loading
256 KB
Loading

experiments/test_sustained_load.rb

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
# frozen_string_literal: true
2+
3+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
4+
5+
require "semian"
6+
require_relative "experimental_resource"
7+
8+
puts "Creating experimental resource configuration..."
9+
# Resource configuration (shared across all thread-local instances)
10+
resource_config = {
11+
name: "protected_service",
12+
endpoints_count: 50,
13+
min_latency: 0.01,
14+
max_latency: 0.3,
15+
distribution: {
16+
type: :log_normal,
17+
mean: 0.15,
18+
std_dev: 0.05,
19+
},
20+
error_rate: 0.01, # Starting at 1% error rate
21+
timeout: 5, # 5 seconds timeout
22+
semian: {
23+
success_threshold: 2,
24+
error_threshold: 3,
25+
error_threshold_timeout: 20,
26+
error_timeout: 15,
27+
bulkhead: false,
28+
},
29+
}
30+
31+
# Initialize Semian resource before threading to avoid race conditions
32+
puts "Initializing Semian resource..."
33+
begin
34+
init_resource = Semian::Experiments::ExperimentalResource.new(**resource_config)
35+
init_resource.request(0) # Make one request to trigger registration
36+
rescue
37+
# Ignore any error, we just needed to trigger registration
38+
end
39+
puts "Resource initialized successfully.\n"
40+
41+
outcomes = {}
42+
done = false
43+
outcomes_mutex = Mutex.new
44+
45+
num_threads = 60
46+
puts "Starting #{num_threads} concurrent request threads (50 requests/second each = 3000 rps total)..."
47+
puts "Each thread will have its own resource instance (matching production behavior)...\n"
48+
49+
request_threads = []
50+
thread_resources = []
51+
num_threads.times do |_|
52+
request_threads << Thread.new do
53+
# Each thread creates its own resource instance (like production)
54+
# They share the same Semian circuit breaker via the name
55+
thread_resource = Semian::Experiments::ExperimentalResource.new(**resource_config)
56+
thread_resources << thread_resource
57+
58+
until done
59+
sleep(0.02) # Each thread: 50 requests per second
60+
61+
begin
62+
thread_resource.request(rand(thread_resource.endpoints_count))
63+
64+
outcomes_mutex.synchronize do
65+
current_sec = outcomes[Time.now.to_i] ||= {
66+
success: 0,
67+
circuit_open: 0,
68+
error: 0,
69+
}
70+
print("✓")
71+
current_sec[:success] += 1
72+
end
73+
rescue Semian::Experiments::ExperimentalResource::CircuitOpenError => e
74+
outcomes_mutex.synchronize do
75+
current_sec = outcomes[Time.now.to_i] ||= {
76+
success: 0,
77+
circuit_open: 0,
78+
error: 0,
79+
}
80+
print("⚡")
81+
current_sec[:circuit_open] += 1
82+
end
83+
rescue Semian::Experiments::ExperimentalResource::RequestError, Semian::Experiments::ExperimentalResource::TimeoutError => e
84+
outcomes_mutex.synchronize do
85+
current_sec = outcomes[Time.now.to_i] ||= {
86+
success: 0,
87+
circuit_open: 0,
88+
error: 0,
89+
}
90+
print("✗")
91+
current_sec[:error] += 1
92+
end
93+
end
94+
end
95+
end
96+
end
97+
98+
test_duration = 540 # 9 minutes total
99+
100+
puts "\n=== Sustained Load Test (CLASSIC) ==="
101+
puts "Phase 1: Baseline 1% error rate (2 minutes)"
102+
puts "Phase 2: High 20% error rate (5 minutes)"
103+
puts "Phase 3: Return to baseline 1% error rate (2 minutes)"
104+
puts "Total Duration: #{test_duration} seconds"
105+
puts "Starting test...\n"
106+
107+
start_time = Time.now
108+
sleep 120
109+
110+
thread_resources.each do |thread_resource|
111+
thread_resource.set_error_rate(0.20)
112+
end
113+
114+
sleep 300
115+
116+
thread_resources.each do |thread_resource|
117+
thread_resource.set_error_rate(0.01)
118+
end
119+
120+
sleep 120
121+
122+
done = true
123+
puts "\nWaiting for all request threads to finish..."
124+
request_threads.each(&:join)
125+
end_time = Time.now
126+
127+
puts "\n\n=== Test Complete ==="
128+
puts "Actual duration: #{(end_time - start_time).round(2)} seconds"
129+
puts "\nGenerating analysis..."
130+
131+
# Calculate summary statistics
132+
total_success = outcomes.values.sum { |data| data[:success] }
133+
total_circuit_open = outcomes.values.sum { |data| data[:circuit_open] }
134+
total_error = outcomes.values.sum { |data| data[:error] }
135+
total_requests = total_success + total_circuit_open + total_error
136+
137+
puts "\n=== Summary Statistics ==="
138+
puts "Total Requests: #{total_requests}"
139+
puts " Successes: #{total_success} (#{(total_success.to_f / total_requests * 100).round(2)}%)"
140+
puts " Circuit Open: #{total_circuit_open} (#{(total_circuit_open.to_f / total_requests * 100).round(2)}%)"
141+
puts " Errors: #{total_error} (#{(total_error.to_f / total_requests * 100).round(2)}%)"
142+
puts "\nExpected errors at 20% rate: #{(total_requests * 0.20).round(0)}"
143+
puts "Actual errors: #{total_error}"
144+
puts "Difference: #{total_error - (total_requests * 0.20).round(0)}"
145+
146+
# Time-based analysis (30-second buckets)
147+
bucket_size = 30 # seconds
148+
num_buckets = (test_duration / bucket_size.to_f).ceil
149+
150+
puts "\n=== Time-Based Analysis (#{bucket_size}-second buckets) ==="
151+
(0...num_buckets).each do |bucket_idx|
152+
bucket_start = outcomes.keys[0] + (bucket_idx * bucket_size)
153+
bucket_data = outcomes.select { |time, _| time >= bucket_start && time < bucket_start + bucket_size }
154+
155+
bucket_success = bucket_data.values.sum { |d| d[:success] }
156+
bucket_errors = bucket_data.values.sum { |d| d[:error] }
157+
bucket_circuit = bucket_data.values.sum { |d| d[:circuit_open] }
158+
bucket_total = bucket_success + bucket_errors + bucket_circuit
159+
160+
bucket_time_range = "#{bucket_idx * bucket_size}-#{(bucket_idx + 1) * bucket_size}s"
161+
circuit_pct = bucket_total > 0 ? ((bucket_circuit.to_f / bucket_total) * 100).round(2) : 0
162+
error_pct = bucket_total > 0 ? ((bucket_errors.to_f / bucket_total) * 100).round(2) : 0
163+
status = bucket_circuit > 0 ? "⚡" : "✓"
164+
165+
puts "#{status} #{bucket_time_range}: #{bucket_total} requests | Success: #{bucket_success} | Errors: #{bucket_errors} (#{error_pct}%) | Circuit Open: #{bucket_circuit} (#{circuit_pct}%)"
166+
end
167+
168+
# Calculate circuit breaker efficiency
169+
expected_errors = (total_requests * 0.20).round(0)
170+
actual_errors = total_error
171+
error_difference = actual_errors - expected_errors
172+
173+
puts "\n=== Classic Circuit Breaker Impact ==="
174+
puts "Expected errors without circuit breaker: #{expected_errors}"
175+
puts "Actual errors with circuit breaker: #{actual_errors}"
176+
if error_difference > 0
177+
puts "Extra errors allowed through: #{error_difference}"
178+
puts "Rejection efficiency: #{((1 - error_difference.to_f / expected_errors) * 100).round(2)}%"
179+
else
180+
puts "Errors prevented: #{-error_difference}"
181+
puts "Protection efficiency: #{((-error_difference.to_f / expected_errors) * 100).round(2)}%"
182+
end
183+
184+
puts "\nGenerating visualization..."
185+
186+
require "gruff"
187+
188+
# Create line graph showing requests per 10-second bucket
189+
graph = Gruff::Line.new(1400)
190+
graph.title = "Classic Circuit Breaker: Sustained 20% Error Load"
191+
graph.x_axis_label = "Time (10-second intervals)"
192+
graph.y_axis_label = "Requests per Interval"
193+
194+
graph.hide_dots = false
195+
graph.line_width = 3
196+
197+
# Aggregate data into 10-second buckets for detailed visualization
198+
small_bucket_size = 10
199+
num_small_buckets = (test_duration / small_bucket_size.to_f).ceil
200+
201+
bucketed_data = []
202+
(0...num_small_buckets).each do |bucket_idx|
203+
bucket_start = outcomes.keys[0] + (bucket_idx * small_bucket_size)
204+
bucket_data = outcomes.select { |time, _| time >= bucket_start && time < bucket_start + small_bucket_size }
205+
206+
bucketed_data << {
207+
success: bucket_data.values.sum { |d| d[:success] },
208+
circuit_open: bucket_data.values.sum { |d| d[:circuit_open] },
209+
error: bucket_data.values.sum { |d| d[:error] },
210+
}
211+
end
212+
213+
# Set x-axis labels (show every 30 seconds for clarity)
214+
labels = {}
215+
(0...num_small_buckets).each do |i|
216+
time_sec = i * small_bucket_size
217+
labels[i] = "#{time_sec}s" if time_sec % 30 == 0
218+
end
219+
graph.labels = labels
220+
221+
graph.data("Success", bucketed_data.map { |d| d[:success] })
222+
graph.data("Circuit Open", bucketed_data.map { |d| d[:circuit_open] })
223+
graph.data("Error", bucketed_data.map { |d| d[:error] })
224+
225+
graph.write("sustained_load.png")
226+
227+
puts "Graph saved to sustained_load.png"

0 commit comments

Comments
 (0)