Skip to content

Commit 2076cbd

Browse files
committed
Merge branch 'feat/DEX-2291/add-option-max-error-count' into 'master'
[DEX-2291] feat: add option max_error_count Closes DEX-2291 See merge request nstmrt/rubygems/sbmt-kafka_consumer!55
2 parents f3d0c62 + 82feac8 commit 2076cbd

File tree

9 files changed

+89
-8
lines changed

9 files changed

+89
-8
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1313

1414
### Fixed
1515

16+
## [2.5.0] - 2024-06-24
17+
18+
### Added
19+
20+
- Added option `max_error_count` for liveness probes, which is triggered when `librdkafka.error`
21+
1622
## [2.4.1] - 2024-06-15
1723

1824
### Fixed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ default: &default
9292
enabled: true
9393
path: "/liveness"
9494
timeout: 15
95+
max_error_count: 15 # default 10
9596
metrics: # optional section
9697
port: 9090
9798
path: "/metrics"

lib/sbmt/kafka_consumer/config/probes/liveness_probe.rb

+1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ class Sbmt::KafkaConsumer::Config::Probes::LivenessProbe < Dry::Struct
88
.optional
99
.default("/liveness")
1010
attribute :timeout, Sbmt::KafkaConsumer::Types::Coercible::Integer.optional.default(10)
11+
attribute :max_error_count, Sbmt::KafkaConsumer::Types::Coercible::Integer.optional.default(10)
1112
end

lib/sbmt/kafka_consumer/instrumentation/liveness_listener.rb

+23-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,14 @@ class LivenessListener
77
include ListenerHelper
88
include KafkaConsumer::Probes::Probe
99

10-
def initialize(timeout_sec: 10)
10+
ERROR_TYPE = "Liveness probe error"
11+
12+
def initialize(timeout_sec: 10, max_error_count: 10)
1113
@consumer_groups = Karafka::App.routes.map(&:name)
1214
@timeout_sec = timeout_sec
15+
@max_error_count = max_error_count
16+
@error_count = 0
17+
@error_backtrace = nil
1318
@polls = {}
1419

1520
setup_subscription
@@ -18,16 +23,31 @@ def initialize(timeout_sec: 10)
1823
def probe(_env)
1924
now = current_time
2025
timed_out_polls = select_timed_out_polls(now)
21-
return probe_ok groups: meta_from_polls(polls, now) if timed_out_polls.empty?
2226

23-
probe_error failed_groups: meta_from_polls(timed_out_polls, now)
27+
if timed_out_polls.empty? && @error_count < @max_error_count
28+
probe_ok groups: meta_from_polls(polls, now) if timed_out_polls.empty?
29+
elsif @error_count >= @max_error_count
30+
probe_error error_type: ERROR_TYPE, failed_librdkafka: {error_count: @error_count, error_backtrace: @error_backtrace}
31+
else
32+
probe_error error_type: ERROR_TYPE, failed_groups: meta_from_polls(timed_out_polls, now)
33+
end
2434
end
2535

2636
def on_connection_listener_fetch_loop(event)
2737
consumer_group = event.payload[:subscription_group].consumer_group
2838
polls[consumer_group.name] = current_time
2939
end
3040

41+
def on_error_occurred(event)
42+
type = event[:type]
43+
44+
return unless type == "librdkafka.error"
45+
error = event[:error]
46+
47+
@error_backtrace ||= (error.backtrace || []).join("\n")
48+
@error_count += 1
49+
end
50+
3151
private
3252

3353
attr_reader :polls, :timeout_sec, :consumer_groups

lib/sbmt/kafka_consumer/probes/host.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def health_check_app(config)
2323
liveness = config[:liveness]
2424
if liveness[:enabled]
2525
c.probe liveness[:path], Sbmt::KafkaConsumer::Instrumentation::LivenessListener.new(
26-
timeout_sec: liveness[:timeout]
26+
timeout_sec: liveness[:timeout], max_error_count: liveness[:max_error_count]
2727
)
2828
end
2929

lib/sbmt/kafka_consumer/probes/probe.rb

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def probe_ok(extra_meta = {})
1919
end
2020

2121
def probe_error(extra_meta = {})
22+
KafkaConsumer.logger.error("probe error meta: #{meta.merge(extra_meta).inspect}")
2223
[500, HEADERS, [meta.merge(extra_meta).to_json]]
2324
end
2425

lib/sbmt/kafka_consumer/version.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
module Sbmt
44
module KafkaConsumer
5-
VERSION = "2.4.1"
5+
VERSION = "2.5.0"
66
end
77
end

spec/sbmt/kafka_consumer/instrumentation/liveness_listener_spec.rb

+44-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,14 @@
1919
expect(probe).to eq [
2020
500,
2121
{"Content-Type" => "application/json"},
22-
[{failed_groups: {"CONSUMER_GROUP" => {had_poll: false}}}.to_json]
22+
[
23+
{
24+
error_type: Sbmt::KafkaConsumer::Instrumentation::LivenessListener::ERROR_TYPE,
25+
failed_groups:
26+
{"CONSUMER_GROUP" =>
27+
{had_poll: false}}
28+
}.to_json
29+
]
2330
]
2431
end
2532
end
@@ -63,6 +70,7 @@
6370
{"Content-Type" => "application/json"},
6471
[
6572
{
73+
error_type: Sbmt::KafkaConsumer::Instrumentation::LivenessListener::ERROR_TYPE,
6674
failed_groups: {
6775
"CONSUMER_GROUP" => {
6876
had_poll: true,
@@ -76,4 +84,39 @@
7684
end
7785
end
7886
end
87+
88+
context "with librdkafka errors" do
89+
let(:error_event) { {type: "librdkafka.error", error: StandardError.new("Test error")} }
90+
91+
before do
92+
allow(error_event[:error]).to receive(:backtrace).and_return(["line 1", "line 2"])
93+
end
94+
95+
it "increments error count and stores backtrace" do
96+
expect { service.on_error_occurred(error_event) }.to change { service.instance_variable_get(:@error_count) }.by(1)
97+
expect(service.instance_variable_get(:@error_backtrace)).to eq("line 1\nline 2")
98+
end
99+
100+
context "when error count exceeds max_error_count" do
101+
before do
102+
10.times { service.on_error_occurred(error_event) }
103+
end
104+
105+
it "returns error with error count and backtrace" do
106+
expect(probe).to eq [
107+
500,
108+
{"Content-Type" => "application/json"},
109+
[
110+
{
111+
error_type: Sbmt::KafkaConsumer::Instrumentation::LivenessListener::ERROR_TYPE,
112+
failed_librdkafka: {
113+
error_count: 10,
114+
error_backtrace: "line 1\nline 2"
115+
}
116+
}.to_json
117+
]
118+
]
119+
end
120+
end
121+
end
79122
end

spec/sbmt/kafka_consumer/probes/probe_spec.rb

+11-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ def probe(_env); end
1313

1414
let(:env) { double(:env) }
1515
let(:service) { subject_klass.new }
16+
let(:logger) { instance_double(Logger) }
17+
18+
before do
19+
allow(Sbmt::KafkaConsumer).to receive(:logger).and_return(logger)
20+
allow(logger).to receive(:error)
21+
end
1622

1723
describe ".call" do
1824
it "calls probe with env" do
@@ -36,8 +42,11 @@ def probe(_env); end
3642
end
3743

3844
describe ".probe_error" do
39-
it "returns 500 with meta" do
40-
expect(service.probe_error).to eq [500, {"Content-Type" => "application/json"}, ["{}"]]
45+
it "logs the error message and returns 500 with meta" do
46+
error_meta = {foo: "bar"}
47+
expect(service.probe_error(error_meta)).to eq [500, {"Content-Type" => "application/json"}, [error_meta.to_json]]
48+
49+
expect(logger).to have_received(:error).with("probe error meta: #{error_meta.inspect}")
4150
end
4251
end
4352
end

0 commit comments

Comments
 (0)