From 4c2090afb30f52416486572824fb85e8f4ba5ca1 Mon Sep 17 00:00:00 2001 From: Jay Nelson Date: Thu, 23 Oct 2014 18:39:51 -0700 Subject: [PATCH 01/72] Move to epocxy 0.9.8 to sync with ttserver/uffda While moving ttserver to 0.9.8 so that it could incorporate Uffda, I discovered that kafkerl needs to change at the same time for compatibility. This merge has to be coordinated, or should be made with a new version tag. --- rebar.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index 3a16831..7edf443 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.7", {git, "git@github.com:duomark/dk_cxy.git", {tag, "0.9.7"}}}, + {epocxy, "0.9.8", {git, "git@github.com:duomark/dk_cxy.git", {tag, "0.9.8"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", - "master"}}]}. \ No newline at end of file + "master"}}]}. From 7a4b5c077b3e91232a19ff451c96285fd28faee2 Mon Sep 17 00:00:00 2001 From: georgeye Date: Tue, 11 Nov 2014 16:30:55 -0800 Subject: [PATCH 02/72] update epocxy --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 7edf443..944c09e 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.8", {git, "git@github.com:duomark/dk_cxy.git", {tag, "0.9.8"}}}, + {epocxy, "0.9.9", {git, "git@github.com:duomark/dk_cxy.git", {tag, "0.9.9"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From 8c147dd2a33f07f3b790c371c04ce64d168caa97 Mon Sep 17 00:00:00 2001 From: zvoykish Date: Thu, 4 Dec 2014 21:18:17 +0200 Subject: [PATCH 03/72] updated epocxy tag to 0.9.8b --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 944c09e..4483fd7 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.9", {git, "git@github.com:duomark/dk_cxy.git", {tag, "0.9.9"}}}, + {epocxy, "0.9.8b", {git, "git@github.com:duomark/epocxy.git", {tag, "0.9.8b"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From f18340f64faaa732530f9434d4bd137db8076c91 Mon Sep 17 00:00:00 2001 From: zvoykish Date: Thu, 4 Dec 2014 21:28:25 +0200 Subject: [PATCH 04/72] updated rebar.config to use tigertext/epocxy repo --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 4483fd7..4eb8759 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.8b", {git, "git@github.com:duomark/epocxy.git", {tag, "0.9.8b"}}}, + {epocxy, "0.9.8b", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8b"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From 247c9dd902a4ebddc5bcf5b9f3e5ce5af9db4110 Mon Sep 17 00:00:00 2001 From: Martin Hald Date: Tue, 6 Jan 2015 15:54:10 -0800 Subject: [PATCH 05/72] Update rebar.config --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 4eb8759..d2167e5 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.8b", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8b"}}}, + {epocxy, "0.9.8c", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8c"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From f64b0d29b175ade24f35f1e2f622447edba0f5c1 Mon Sep 17 00:00:00 2001 From: Jay Nelson Date: Wed, 18 Feb 2015 18:00:28 -0800 Subject: [PATCH 06/72] Use epocxy 0.9.8e --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index d2167e5..fde990d 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.8c", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8c"}}}, + {epocxy, "0.9.8e", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8e"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From 64f4bbfe12ecdf843c38d7e74d204c2a4f456796 Mon Sep 17 00:00:00 2001 From: Jay Nelson Date: Tue, 3 Mar 2015 13:50:17 -0800 Subject: [PATCH 07/72] Update epocxy to 0.9.8-tt-b --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index fde990d..57365a6 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.8e", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8e"}}}, + {epocxy, "0.9.8e", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8-tt-b"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From 32196116e7f3c3c69d3a144cad66c048ff50b5da Mon Sep 17 00:00:00 2001 From: Martin Kristiansen Date: Fri, 12 Jun 2015 18:09:05 -0700 Subject: [PATCH 08/72] update to avoid busy wait loop --- src/kafkerl_connector.erl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 5cb6c40..c5ee8bb 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -266,14 +266,16 @@ handle_request_metadata(State, NewTopics, _) -> Now = get_timestamp(), LastRequest = State#state.last_metadata_request, Cooldown = State#state.metadata_request_cd, - _ = case Cooldown - (Now - LastRequest) of + LastMetadataUpdate = case Cooldown - (Now - LastRequest) of Negative when Negative =< 0 -> - _ = make_metadata_request(State); + _ = make_metadata_request(State), + Now; Time -> - _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]) + _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]), + LastRequest end, State#state{broker_mapping = void, known_topics = NewKnownTopics, - last_metadata_request = Now}. + last_metadata_request = LastMetadataUpdate}. %%============================================================================== %% Utils @@ -488,4 +490,4 @@ get_timestamp() -> %%============================================================================== warn_metadata_request(Host, Port, Reason) -> lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). \ No newline at end of file + [Host, Port, Reason]). From 76a860c0fad34fd3154fdcbddcd6fc26e5dc20d0 Mon Sep 17 00:00:00 2001 From: Jay Nelson Date: Tue, 16 Jun 2015 14:04:39 -0700 Subject: [PATCH 09/72] Bump epocxy to 0.9.8g --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 57365a6..95d46e6 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, - {epocxy, "0.9.8e", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8-tt-b"}}}, + {epocxy, "0.9.8g", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8-tt-d"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From cb40e4cb0391d5437ab74eaad0871d9f5f97e4a7 Mon Sep 17 00:00:00 2001 From: georgeye Date: Fri, 26 Jun 2015 09:32:19 -0700 Subject: [PATCH 10/72] update to use tt repo --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 95d46e6..91f7088 100644 --- a/rebar.config +++ b/rebar.config @@ -17,7 +17,7 @@ {i, "include"}]}. {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, - {lager, ".*", {git, "git@github.com:basho/lager.git", "master"}}, + {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, {epocxy, "0.9.8g", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8-tt-d"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From a79f5e168438b1dd49e7983f17781ef7d7a9d8c4 Mon Sep 17 00:00:00 2001 From: Jay Nelson Date: Mon, 14 Sep 2015 21:09:12 -0700 Subject: [PATCH 11/72] Bump epocxy to 0.9.8h --- rebar.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rebar.config b/rebar.config index 91f7088..be48bd4 100644 --- a/rebar.config +++ b/rebar.config @@ -16,8 +16,8 @@ warn_untyped_record, {i, "include"}]}. {deps, - [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, - {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, - {epocxy, "0.9.8g", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8-tt-d"}}}, - {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", + [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, + {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, + {epocxy, "0.9.8h", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8-tt-h"}}}, + {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From 2c13f04b37110238289cf70b98893091ac30de3a Mon Sep 17 00:00:00 2001 From: zvoykish Date: Thu, 11 Feb 2016 02:10:43 +0200 Subject: [PATCH 12/72] Bump epocxy to 1.0.0 --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index be48bd4..fca5ead 100644 --- a/rebar.config +++ b/rebar.config @@ -18,6 +18,6 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, - {epocxy, "0.9.8h", {git, "git@github.com:tigertext/epocxy.git", {tag, "0.9.8-tt-h"}}}, + {epocxy, "1.0.0", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.0"}}}, {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", "master"}}]}. From 6060ba11c186b5342d43472711df3614d6d3f0f9 Mon Sep 17 00:00:00 2001 From: Martin Hald Date: Thu, 10 Mar 2016 15:48:22 -0800 Subject: [PATCH 13/72] [OK-125] kafkerl improvements --- src/kafkerl_connector.erl | 152 ++++----------------- src/kafkerl_metadata_requester.erl | 207 +++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+), 126 deletions(-) create mode 100644 src/kafkerl_metadata_requester.erl diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index c5ee8bb..3740f78 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -7,7 +7,7 @@ -export([send/3, request_metadata/1, request_metadata/2, request_metadata/3, subscribe/2, subscribe/3, get_partitions/1, unsubscribe/2]). % Only for internal use --export([do_request_metadata/6, make_metadata_request/1]). +-export([make_metadata_request/1]). % Only for broker connections -export([produce_succeeded/2]). % Supervisors @@ -260,21 +260,33 @@ handle_request_metadata(State, Topics) -> % Ignore it if the topic mapping is void, we are already requesting the metadata handle_request_metadata(State = #state{broker_mapping = void}, _, false) -> State; -handle_request_metadata(State, NewTopics, _) -> +handle_request_metadata(State = #state{brokers = Brokers, + known_topics = Topics, + max_metadata_retries = MaxMetadataRetries, + retry_interval = RetryInterval}, + NewTopics, _) -> SortedNewTopics = lists:sort(NewTopics), NewKnownTopics = lists:umerge(State#state.known_topics, SortedNewTopics), - Now = get_timestamp(), - LastRequest = State#state.last_metadata_request, - Cooldown = State#state.metadata_request_cd, - LastMetadataUpdate = case Cooldown - (Now - LastRequest) of - Negative when Negative =< 0 -> - _ = make_metadata_request(State), - Now; - Time -> - _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]), - LastRequest - end, - State#state{broker_mapping = void, known_topics = NewKnownTopics, +%% Now = get_timestamp(), +%% LastRequest = State#state.last_metadata_request, +%% Cooldown = State#state.metadata_request_cd, +%% LastMetadataUpdate = case Cooldown - (Now - LastRequest) of +%% Negative when Negative =< 0 -> +%% _ = make_metadata_request(State), +%% Now; +%% Time -> +%% _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]), +%% LastRequest +%% end, + Request = metadata_request(State, Topics), + % Start requesting metadata + BrokerMapping = case kafkerl_metadata_requester:req_metadata(Brokers, get_metadata_tcp_options(), MaxMetadataRetries, + RetryInterval, Request) of + metadata_timeout -> void; + {error, all_down} -> void; + {metadata_updated, Mapping} -> Mapping + end, + State#state{broker_mapping = BrokerMapping, known_topics = NewKnownTopics, last_metadata_request = LastMetadataUpdate}. %%============================================================================== @@ -295,60 +307,7 @@ get_ets_dump_name({OldName, Counter}) -> get_metadata_tcp_options() -> kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). -do_request_metadata(Pid, _Brokers, _TCPOpts, 0, _RetryInterval, _Request) -> - Pid ! metadata_timeout; -do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> - case do_request_metadata(Brokers, TCPOpts, Request) of - {ok, TopicMapping} -> - Pid ! {metadata_updated, TopicMapping}; - _Error -> - timer:sleep(RetryInterval), - NewRetries = case Retries of - -1 -> -1; - N -> N - 1 - end, - do_request_metadata(Pid, Brokers, TCPOpts, NewRetries, RetryInterval, - Request) - end. -do_request_metadata([], _TCPOpts, _Request) -> - {error, all_down}; -do_request_metadata([{Host, Port} = _Broker | T] = _Brokers, TCPOpts, Request) -> - lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), - % Connect to the Broker - case gen_tcp:connect(Host, Port, TCPOpts) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Failed, try with the next one in the list - do_request_metadata(T, TCPOpts, Request); - {ok, Socket} -> - % On success, send the metadata request - case gen_tcp:send(Socket, Request) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Unable to send request, try the next broker - do_request_metadata(T, TCPOpts, Request); - ok -> - case gen_tcp:recv(Socket, 0, 6000) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - gen_tcp:close(Socket), - % Nothing received (probably a timeout), try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, Data} -> - gen_tcp:close(Socket), - case kafkerl_protocol:parse_metadata_response(Data) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % The parsing failed, try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, _CorrelationId, Metadata} -> - % We received a metadata response, make sure it has brokers - {ok, get_topic_mapping(Metadata)} - end - end - end - end. send_event(Event, {all, Callback}) -> kafkerl_utils:send_event(Callback, Event); @@ -372,54 +331,6 @@ metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, AllTopics = lists:umerge(KnownTopics, NewTopics), kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). -%%============================================================================== -%% Topic/broker mapping -%%============================================================================== -get_topic_mapping({BrokerMetadata, TopicMetadata}) -> - % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] - Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), - % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] - Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), - % Converts the BrokerIds from the previous array into socket addresses - lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> - case lists:keyfind(BrokerId, 1, BrokerMetadata) of - {BrokerId, HostData} -> - {true, {{Topic, Partition, BrokerId}, HostData}}; - _Any -> - false - end - end, Partitions). - -expand_topic({?NO_ERROR, Topic, Partitions}) -> - {true, {Topic, Partitions}}; -expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> - % Replica not available can be ignored, still, show a warning - lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, Partitions}}; -expand_topic({Error, Topic, _Partitions}) -> - lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, []}}. - -expand_partitions(Metadata) -> - expand_partitions(Metadata, []). - -expand_partitions({_Topic, []}, Acc) -> - {true, Acc}; -expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, - _, _} | T]}, Acc) -> - lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> get_broker_mapping(TopicMapping, State, 0, []). @@ -475,19 +386,8 @@ make_metadata_request(State = #state{brokers = Brokers, known_topics = Topics, max_metadata_retries = MaxMetadataRetries, retry_interval = RetryInterval}) -> - Request = metadata_request(State, Topics), - % Start requesting metadata - Params = [self(), Brokers, get_metadata_tcp_options(), MaxMetadataRetries, - RetryInterval, Request], spawn_monitor(?MODULE, do_request_metadata, Params). get_timestamp() -> {A, B, C} = erlang:now(), (A * 1000000 + B) * 1000 + C div 1000. - -%%============================================================================== -%% Error handling -%%============================================================================== -warn_metadata_request(Host, Port, Reason) -> - lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). diff --git a/src/kafkerl_metadata_requester.erl b/src/kafkerl_metadata_requester.erl new file mode 100644 index 0000000..be783fc --- /dev/null +++ b/src/kafkerl_metadata_requester.erl @@ -0,0 +1,207 @@ +-module(kafkerl_metadata_requester). +-author("martin"). + +-behaviour(gen_server). + +%% API +-export([start_link/0, req_metadata/5]). + +%% gen_server callbacks +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-include("kafkerl.hrl"). +-include("kafkerl_consumers.hrl"). + +-define(SERVER, ?MODULE). + +-record(state, { + last_metadata_req_time :: undefined | erlang:timestamp() +}). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%%-------------------------------------------------------------------- +%% @doc +%% Starts the server +%% +%% @end +%%-------------------------------------------------------------------- +-spec(start_link() -> + {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). +start_link() -> + gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). + +req_metadata(Brokers, TCPOpts, Retries, RetryInterval, Request) -> + gen_server:call(?MODULE, {do_req_metadata, Brokers, TCPOpts, Retries, RetryInterval, Request}). + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +-spec(init(Args :: term()) -> + {ok, State :: #state{}} | {ok, State :: #state{}, timeout() | hibernate} | + {stop, Reason :: term()} | ignore). +init([]) -> + {ok, #state{}}. + +-spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, + State :: #state{}) -> + {reply, Reply :: term(), NewState :: #state{}} | + {reply, Reply :: term(), NewState :: #state{}, timeout() | hibernate} | + {noreply, NewState :: #state{}} | + {noreply, NewState :: #state{}, timeout() | hibernate} | + {stop, Reason :: term(), Reply :: term(), NewState :: #state{}} | + {stop, Reason :: term(), NewState :: #state{}}). +handle_call({do_req_metadata, Brokers, TCPOpts, Retries, RetryInterval, Request}, _From, State) -> + Response = do_request_metadata(Brokers, TCPOpts, Retries, RetryInterval, Request), + {reply, Response, State}; +handle_call(_Request, _From, State) -> + {reply, ok, State}. + +-spec(handle_cast(Request :: term(), State :: #state{}) -> + {noreply, NewState :: #state{}} | + {noreply, NewState :: #state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #state{}}). +handle_cast(_Request, State) -> + {noreply, State}. + +-spec(handle_info(Info :: timeout() | term(), State :: #state{}) -> + {noreply, NewState :: #state{}} | + {noreply, NewState :: #state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #state{}}). +handle_info(_Info, State) -> + {noreply, State}. + +-spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: #state{}) -> term()). +terminate(_Reason, _State) -> + ok. + +-spec(code_change(OldVsn :: term() | {down, term()}, State :: #state{}, + Extra :: term()) -> + {ok, NewState :: #state{}} | {error, Reason :: term()}). +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +do_request_metadata(_Brokers, _TCPOpts, 0, _RetryInterval, _Request) -> + metadata_timeout; +do_request_metadata(Brokers, TCPOpts, Retries, RetryInterval, Request) -> + case do_request_metadata(Brokers, TCPOpts, Request) of + {ok, TopicMapping} -> + {metadata_updated, TopicMapping}; + _Error -> + timer:sleep(RetryInterval), + NewRetries = case Retries of + -1 -> -1; + N -> N - 1 + end, + do_request_metadata(Brokers, TCPOpts, NewRetries, RetryInterval, Request) + end. + +do_request_metadata([], _TCPOpts, _Request) -> + {error, all_down}; +do_request_metadata([{Host, Port} = _Broker | T] = _Brokers, TCPOpts, Request) -> + lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + % Connect to the Broker + case gen_tcp:connect(Host, Port, TCPOpts) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + % Failed, try with the next one in the list + do_request_metadata(T, TCPOpts, Request); + {ok, Socket} -> + % On success, send the metadata request + case gen_tcp:send(Socket, Request) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + % Unable to send request, try the next broker + do_request_metadata(T, TCPOpts, Request); + ok -> + case gen_tcp:recv(Socket, 0, 6000) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + gen_tcp:close(Socket), + % Nothing received (probably a timeout), try the next broker + do_request_metadata(T, TCPOpts, Request); + {ok, Data} -> + gen_tcp:close(Socket), + case kafkerl_protocol:parse_metadata_response(Data) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + % The parsing failed, try the next broker + do_request_metadata(T, TCPOpts, Request); + {ok, _CorrelationId, Metadata} -> + % We received a metadata response, make sure it has brokers + {ok, get_topic_mapping(Metadata)} + end + end + end + end. + +%%============================================================================== +%% Topic/broker mapping +%%============================================================================== +get_topic_mapping({BrokerMetadata, TopicMetadata}) -> + % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] + Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), + % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] + Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), + % Converts the BrokerIds from the previous array into socket addresses + lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> + case lists:keyfind(BrokerId, 1, BrokerMetadata) of + {BrokerId, HostData} -> + {true, {{Topic, Partition, BrokerId}, HostData}}; + _Any -> + false + end + end, Partitions). + +expand_topic({?NO_ERROR, Topic, Partitions}) -> + {true, {Topic, Partitions}}; +expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> + % Replica not available can be ignored, still, show a warning + lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, Partitions}}; +expand_topic({Error, Topic, _Partitions}) -> + lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, []}}. + +expand_partitions(Metadata) -> + expand_partitions(Metadata, []). + +expand_partitions({_Topic, []}, Acc) -> + {true, Acc}; +expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, + _, _} | T]}, Acc) -> + lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> + lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + expand_partitions({Topic, T}, Acc). + + + +%%============================================================================== +%% Error handling +%%============================================================================== +warn_metadata_request(Host, Port, Reason) -> + lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). From c622bcaa41a4cb3f10ce45887a611e1cff2c0d55 Mon Sep 17 00:00:00 2001 From: Martin Hald Date: Wed, 16 Mar 2016 11:15:02 -0700 Subject: [PATCH 14/72] [OK-125] kafkerl improvements --- src/kafkerl_connector.erl | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 3740f78..2057ce9 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -6,8 +6,6 @@ %% API -export([send/3, request_metadata/1, request_metadata/2, request_metadata/3, subscribe/2, subscribe/3, get_partitions/1, unsubscribe/2]). -% Only for internal use --export([make_metadata_request/1]). % Only for broker connections -export([produce_succeeded/2]). % Supervisors @@ -33,7 +31,6 @@ callbacks = [] :: [{filters(), callback()}], known_topics = [] :: [binary()], pending = [] :: [basic_message()], - last_metadata_request = 0 :: integer(), metadata_request_cd = 0 :: integer(), last_dump_name = {"", 0} :: {string(), integer()}}). -type state() :: #state{}. @@ -203,7 +200,8 @@ init([Config]) -> autocreate_topics = AutocreateTopics, max_metadata_retries = MaxMetadataRetries, metadata_request_cd = MetadataRequestCooldown}, - {_Pid, _Ref} = make_metadata_request(State), + {ok, _Pid} = kafkerl_metadata_requester:start_link(), + erlang:send_after(0, self(), {metadata_updated, []}), {ok, State}; {errors, Errors} -> lists:foreach(fun(E) -> @@ -267,17 +265,6 @@ handle_request_metadata(State = #state{brokers = Brokers, NewTopics, _) -> SortedNewTopics = lists:sort(NewTopics), NewKnownTopics = lists:umerge(State#state.known_topics, SortedNewTopics), -%% Now = get_timestamp(), -%% LastRequest = State#state.last_metadata_request, -%% Cooldown = State#state.metadata_request_cd, -%% LastMetadataUpdate = case Cooldown - (Now - LastRequest) of -%% Negative when Negative =< 0 -> -%% _ = make_metadata_request(State), -%% Now; -%% Time -> -%% _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]), -%% LastRequest -%% end, Request = metadata_request(State, Topics), % Start requesting metadata BrokerMapping = case kafkerl_metadata_requester:req_metadata(Brokers, get_metadata_tcp_options(), MaxMetadataRetries, @@ -286,8 +273,7 @@ handle_request_metadata(State = #state{brokers = Brokers, {error, all_down} -> void; {metadata_updated, Mapping} -> Mapping end, - State#state{broker_mapping = BrokerMapping, known_topics = NewKnownTopics, - last_metadata_request = LastMetadataUpdate}. + State#state{broker_mapping = BrokerMapping, known_topics = NewKnownTopics}. %%============================================================================== %% Utils @@ -382,12 +368,3 @@ send_mapping_to(NewCallback, #state{broker_mapping = Mapping}) -> Partitions = get_partitions_from_mapping(Mapping), send_event({partition_update, Partitions}, NewCallback). -make_metadata_request(State = #state{brokers = Brokers, - known_topics = Topics, - max_metadata_retries = MaxMetadataRetries, - retry_interval = RetryInterval}) -> - spawn_monitor(?MODULE, do_request_metadata, Params). - -get_timestamp() -> - {A, B, C} = erlang:now(), - (A * 1000000 + B) * 1000 + C div 1000. From 4782aa7c68bf3532cd8007d274f4d8b02f1a236b Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Mon, 28 Mar 2016 19:40:12 -0300 Subject: [PATCH 15/72] some minor fixes --- src/kafkerl_connector.erl | 24 +++++++++++++++++++++--- src/kafkerl_metadata_requester.erl | 14 +++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 2057ce9..ec2bfb2 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -269,9 +269,27 @@ handle_request_metadata(State = #state{brokers = Brokers, % Start requesting metadata BrokerMapping = case kafkerl_metadata_requester:req_metadata(Brokers, get_metadata_tcp_options(), MaxMetadataRetries, RetryInterval, Request) of - metadata_timeout -> void; - {error, all_down} -> void; - {metadata_updated, Mapping} -> Mapping + metadata_timeout -> + void; + {error, all_down} -> + void; + {metadata_updated, Mapping} -> + NewBrokerMapping = get_broker_mapping(Mapping, State), + lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), + % Get the partition data to send to the subscribers and send it + PartitionData = get_partitions_from_mapping(NewBrokerMapping), + Callbacks = State#state.callbacks, + NewCallbacks = send_event({partition_update, PartitionData}, Callbacks), + % Add to the list of known topics + Updated_Topics = lists:sort([T || {T, _P} <- PartitionData]), + NewKnownTopics = lists:umerge(Updated_Topics, State#state.known_topics), + lager:debug("Known topics: ~p", [NewKnownTopics]), + % Reverse the pending messages and try to send them again + RPending = lists:reverse(State#state.pending), + ok = lists:foreach(fun(P) -> send(self(), P, []) end, RPending), + {noreply, State#state{broker_mapping = NewBrokerMapping, pending = [], + callbacks = NewCallbacks, + known_topics = NewKnownTopics}} end, State#state{broker_mapping = BrokerMapping, known_topics = NewKnownTopics}. diff --git a/src/kafkerl_metadata_requester.erl b/src/kafkerl_metadata_requester.erl index be783fc..ec52158 100644 --- a/src/kafkerl_metadata_requester.erl +++ b/src/kafkerl_metadata_requester.erl @@ -106,7 +106,7 @@ do_request_metadata(Brokers, TCPOpts, Retries, RetryInterval, Request) -> -1 -> -1; N -> N - 1 end, - do_request_metadata(Brokers, TCPOpts, NewRetries, RetryInterval, Request) + req_metadata(Brokers, TCPOpts, NewRetries, RetryInterval, Request) end. do_request_metadata([], _TCPOpts, _Request) -> @@ -158,12 +158,12 @@ get_topic_mapping({BrokerMetadata, TopicMetadata}) -> Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), % Converts the BrokerIds from the previous array into socket addresses lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> - case lists:keyfind(BrokerId, 1, BrokerMetadata) of - {BrokerId, HostData} -> - {true, {{Topic, Partition, BrokerId}, HostData}}; - _Any -> - false - end + case lists:keyfind(BrokerId, 1, BrokerMetadata) of + {BrokerId, HostData} -> + {true, {{Topic, Partition, BrokerId}, HostData}}; + _Any -> + false + end end, Partitions). expand_topic({?NO_ERROR, Topic, Partitions}) -> From 58f18a6c0c7e4667f572bdd99374e4c9460d12e8 Mon Sep 17 00:00:00 2001 From: Martin Hald Date: Tue, 29 Mar 2016 12:57:38 -0700 Subject: [PATCH 16/72] Revert "Hernan.tt hotfix" --- src/kafkerl_connector.erl | 173 +++++++++++++++++++----- src/kafkerl_metadata_requester.erl | 207 ----------------------------- 2 files changed, 139 insertions(+), 241 deletions(-) delete mode 100644 src/kafkerl_metadata_requester.erl diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index ec2bfb2..c5ee8bb 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -6,6 +6,8 @@ %% API -export([send/3, request_metadata/1, request_metadata/2, request_metadata/3, subscribe/2, subscribe/3, get_partitions/1, unsubscribe/2]). +% Only for internal use +-export([do_request_metadata/6, make_metadata_request/1]). % Only for broker connections -export([produce_succeeded/2]). % Supervisors @@ -31,6 +33,7 @@ callbacks = [] :: [{filters(), callback()}], known_topics = [] :: [binary()], pending = [] :: [basic_message()], + last_metadata_request = 0 :: integer(), metadata_request_cd = 0 :: integer(), last_dump_name = {"", 0} :: {string(), integer()}}). -type state() :: #state{}. @@ -200,8 +203,7 @@ init([Config]) -> autocreate_topics = AutocreateTopics, max_metadata_retries = MaxMetadataRetries, metadata_request_cd = MetadataRequestCooldown}, - {ok, _Pid} = kafkerl_metadata_requester:start_link(), - erlang:send_after(0, self(), {metadata_updated, []}), + {_Pid, _Ref} = make_metadata_request(State), {ok, State}; {errors, Errors} -> lists:foreach(fun(E) -> @@ -258,40 +260,22 @@ handle_request_metadata(State, Topics) -> % Ignore it if the topic mapping is void, we are already requesting the metadata handle_request_metadata(State = #state{broker_mapping = void}, _, false) -> State; -handle_request_metadata(State = #state{brokers = Brokers, - known_topics = Topics, - max_metadata_retries = MaxMetadataRetries, - retry_interval = RetryInterval}, - NewTopics, _) -> +handle_request_metadata(State, NewTopics, _) -> SortedNewTopics = lists:sort(NewTopics), NewKnownTopics = lists:umerge(State#state.known_topics, SortedNewTopics), - Request = metadata_request(State, Topics), - % Start requesting metadata - BrokerMapping = case kafkerl_metadata_requester:req_metadata(Brokers, get_metadata_tcp_options(), MaxMetadataRetries, - RetryInterval, Request) of - metadata_timeout -> - void; - {error, all_down} -> - void; - {metadata_updated, Mapping} -> - NewBrokerMapping = get_broker_mapping(Mapping, State), - lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), - % Get the partition data to send to the subscribers and send it - PartitionData = get_partitions_from_mapping(NewBrokerMapping), - Callbacks = State#state.callbacks, - NewCallbacks = send_event({partition_update, PartitionData}, Callbacks), - % Add to the list of known topics - Updated_Topics = lists:sort([T || {T, _P} <- PartitionData]), - NewKnownTopics = lists:umerge(Updated_Topics, State#state.known_topics), - lager:debug("Known topics: ~p", [NewKnownTopics]), - % Reverse the pending messages and try to send them again - RPending = lists:reverse(State#state.pending), - ok = lists:foreach(fun(P) -> send(self(), P, []) end, RPending), - {noreply, State#state{broker_mapping = NewBrokerMapping, pending = [], - callbacks = NewCallbacks, - known_topics = NewKnownTopics}} - end, - State#state{broker_mapping = BrokerMapping, known_topics = NewKnownTopics}. + Now = get_timestamp(), + LastRequest = State#state.last_metadata_request, + Cooldown = State#state.metadata_request_cd, + LastMetadataUpdate = case Cooldown - (Now - LastRequest) of + Negative when Negative =< 0 -> + _ = make_metadata_request(State), + Now; + Time -> + _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]), + LastRequest + end, + State#state{broker_mapping = void, known_topics = NewKnownTopics, + last_metadata_request = LastMetadataUpdate}. %%============================================================================== %% Utils @@ -311,7 +295,60 @@ get_ets_dump_name({OldName, Counter}) -> get_metadata_tcp_options() -> kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). +do_request_metadata(Pid, _Brokers, _TCPOpts, 0, _RetryInterval, _Request) -> + Pid ! metadata_timeout; +do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> + case do_request_metadata(Brokers, TCPOpts, Request) of + {ok, TopicMapping} -> + Pid ! {metadata_updated, TopicMapping}; + _Error -> + timer:sleep(RetryInterval), + NewRetries = case Retries of + -1 -> -1; + N -> N - 1 + end, + do_request_metadata(Pid, Brokers, TCPOpts, NewRetries, RetryInterval, + Request) + end. +do_request_metadata([], _TCPOpts, _Request) -> + {error, all_down}; +do_request_metadata([{Host, Port} = _Broker | T] = _Brokers, TCPOpts, Request) -> + lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + % Connect to the Broker + case gen_tcp:connect(Host, Port, TCPOpts) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + % Failed, try with the next one in the list + do_request_metadata(T, TCPOpts, Request); + {ok, Socket} -> + % On success, send the metadata request + case gen_tcp:send(Socket, Request) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + % Unable to send request, try the next broker + do_request_metadata(T, TCPOpts, Request); + ok -> + case gen_tcp:recv(Socket, 0, 6000) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + gen_tcp:close(Socket), + % Nothing received (probably a timeout), try the next broker + do_request_metadata(T, TCPOpts, Request); + {ok, Data} -> + gen_tcp:close(Socket), + case kafkerl_protocol:parse_metadata_response(Data) of + {error, Reason} -> + warn_metadata_request(Host, Port, Reason), + % The parsing failed, try the next broker + do_request_metadata(T, TCPOpts, Request); + {ok, _CorrelationId, Metadata} -> + % We received a metadata response, make sure it has brokers + {ok, get_topic_mapping(Metadata)} + end + end + end + end. send_event(Event, {all, Callback}) -> kafkerl_utils:send_event(Callback, Event); @@ -335,6 +372,54 @@ metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, AllTopics = lists:umerge(KnownTopics, NewTopics), kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). +%%============================================================================== +%% Topic/broker mapping +%%============================================================================== +get_topic_mapping({BrokerMetadata, TopicMetadata}) -> + % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] + Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), + % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] + Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), + % Converts the BrokerIds from the previous array into socket addresses + lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> + case lists:keyfind(BrokerId, 1, BrokerMetadata) of + {BrokerId, HostData} -> + {true, {{Topic, Partition, BrokerId}, HostData}}; + _Any -> + false + end + end, Partitions). + +expand_topic({?NO_ERROR, Topic, Partitions}) -> + {true, {Topic, Partitions}}; +expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> + % Replica not available can be ignored, still, show a warning + lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, Partitions}}; +expand_topic({Error, Topic, _Partitions}) -> + lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, []}}. + +expand_partitions(Metadata) -> + expand_partitions(Metadata, []). + +expand_partitions({_Topic, []}, Acc) -> + {true, Acc}; +expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, + _, _} | T]}, Acc) -> + lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> + lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> get_broker_mapping(TopicMapping, State, 0, []). @@ -386,3 +471,23 @@ send_mapping_to(NewCallback, #state{broker_mapping = Mapping}) -> Partitions = get_partitions_from_mapping(Mapping), send_event({partition_update, Partitions}, NewCallback). +make_metadata_request(State = #state{brokers = Brokers, + known_topics = Topics, + max_metadata_retries = MaxMetadataRetries, + retry_interval = RetryInterval}) -> + Request = metadata_request(State, Topics), + % Start requesting metadata + Params = [self(), Brokers, get_metadata_tcp_options(), MaxMetadataRetries, + RetryInterval, Request], + spawn_monitor(?MODULE, do_request_metadata, Params). + +get_timestamp() -> + {A, B, C} = erlang:now(), + (A * 1000000 + B) * 1000 + C div 1000. + +%%============================================================================== +%% Error handling +%%============================================================================== +warn_metadata_request(Host, Port, Reason) -> + lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). diff --git a/src/kafkerl_metadata_requester.erl b/src/kafkerl_metadata_requester.erl deleted file mode 100644 index ec52158..0000000 --- a/src/kafkerl_metadata_requester.erl +++ /dev/null @@ -1,207 +0,0 @@ --module(kafkerl_metadata_requester). --author("martin"). - --behaviour(gen_server). - -%% API --export([start_link/0, req_metadata/5]). - -%% gen_server callbacks --export([init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - terminate/2, - code_change/3]). - --include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). - --define(SERVER, ?MODULE). - --record(state, { - last_metadata_req_time :: undefined | erlang:timestamp() -}). - -%%%=================================================================== -%%% API -%%%=================================================================== - -%%-------------------------------------------------------------------- -%% @doc -%% Starts the server -%% -%% @end -%%-------------------------------------------------------------------- --spec(start_link() -> - {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). -start_link() -> - gen_server:start_link({local, ?SERVER}, ?MODULE, [], []). - -req_metadata(Brokers, TCPOpts, Retries, RetryInterval, Request) -> - gen_server:call(?MODULE, {do_req_metadata, Brokers, TCPOpts, Retries, RetryInterval, Request}). - -%%%=================================================================== -%%% gen_server callbacks -%%%=================================================================== - --spec(init(Args :: term()) -> - {ok, State :: #state{}} | {ok, State :: #state{}, timeout() | hibernate} | - {stop, Reason :: term()} | ignore). -init([]) -> - {ok, #state{}}. - --spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, - State :: #state{}) -> - {reply, Reply :: term(), NewState :: #state{}} | - {reply, Reply :: term(), NewState :: #state{}, timeout() | hibernate} | - {noreply, NewState :: #state{}} | - {noreply, NewState :: #state{}, timeout() | hibernate} | - {stop, Reason :: term(), Reply :: term(), NewState :: #state{}} | - {stop, Reason :: term(), NewState :: #state{}}). -handle_call({do_req_metadata, Brokers, TCPOpts, Retries, RetryInterval, Request}, _From, State) -> - Response = do_request_metadata(Brokers, TCPOpts, Retries, RetryInterval, Request), - {reply, Response, State}; -handle_call(_Request, _From, State) -> - {reply, ok, State}. - --spec(handle_cast(Request :: term(), State :: #state{}) -> - {noreply, NewState :: #state{}} | - {noreply, NewState :: #state{}, timeout() | hibernate} | - {stop, Reason :: term(), NewState :: #state{}}). -handle_cast(_Request, State) -> - {noreply, State}. - --spec(handle_info(Info :: timeout() | term(), State :: #state{}) -> - {noreply, NewState :: #state{}} | - {noreply, NewState :: #state{}, timeout() | hibernate} | - {stop, Reason :: term(), NewState :: #state{}}). -handle_info(_Info, State) -> - {noreply, State}. - --spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), - State :: #state{}) -> term()). -terminate(_Reason, _State) -> - ok. - --spec(code_change(OldVsn :: term() | {down, term()}, State :: #state{}, - Extra :: term()) -> - {ok, NewState :: #state{}} | {error, Reason :: term()}). -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -%%%=================================================================== -%%% Internal functions -%%%=================================================================== - -do_request_metadata(_Brokers, _TCPOpts, 0, _RetryInterval, _Request) -> - metadata_timeout; -do_request_metadata(Brokers, TCPOpts, Retries, RetryInterval, Request) -> - case do_request_metadata(Brokers, TCPOpts, Request) of - {ok, TopicMapping} -> - {metadata_updated, TopicMapping}; - _Error -> - timer:sleep(RetryInterval), - NewRetries = case Retries of - -1 -> -1; - N -> N - 1 - end, - req_metadata(Brokers, TCPOpts, NewRetries, RetryInterval, Request) - end. - -do_request_metadata([], _TCPOpts, _Request) -> - {error, all_down}; -do_request_metadata([{Host, Port} = _Broker | T] = _Brokers, TCPOpts, Request) -> - lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), - % Connect to the Broker - case gen_tcp:connect(Host, Port, TCPOpts) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Failed, try with the next one in the list - do_request_metadata(T, TCPOpts, Request); - {ok, Socket} -> - % On success, send the metadata request - case gen_tcp:send(Socket, Request) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Unable to send request, try the next broker - do_request_metadata(T, TCPOpts, Request); - ok -> - case gen_tcp:recv(Socket, 0, 6000) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - gen_tcp:close(Socket), - % Nothing received (probably a timeout), try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, Data} -> - gen_tcp:close(Socket), - case kafkerl_protocol:parse_metadata_response(Data) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % The parsing failed, try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, _CorrelationId, Metadata} -> - % We received a metadata response, make sure it has brokers - {ok, get_topic_mapping(Metadata)} - end - end - end - end. - -%%============================================================================== -%% Topic/broker mapping -%%============================================================================== -get_topic_mapping({BrokerMetadata, TopicMetadata}) -> - % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] - Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), - % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] - Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), - % Converts the BrokerIds from the previous array into socket addresses - lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> - case lists:keyfind(BrokerId, 1, BrokerMetadata) of - {BrokerId, HostData} -> - {true, {{Topic, Partition, BrokerId}, HostData}}; - _Any -> - false - end - end, Partitions). - -expand_topic({?NO_ERROR, Topic, Partitions}) -> - {true, {Topic, Partitions}}; -expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> - % Replica not available can be ignored, still, show a warning - lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, Partitions}}; -expand_topic({Error, Topic, _Partitions}) -> - lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, []}}. - -expand_partitions(Metadata) -> - expand_partitions(Metadata, []). - -expand_partitions({_Topic, []}, Acc) -> - {true, Acc}; -expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, - _, _} | T]}, Acc) -> - lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - expand_partitions({Topic, T}, Acc). - - - -%%============================================================================== -%% Error handling -%%============================================================================== -warn_metadata_request(Host, Port, Reason) -> - lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). From 292956189bc1eecf3dceafe110adc757a0b60788 Mon Sep 17 00:00:00 2001 From: georgeye Date: Fri, 17 Jun 2016 12:59:22 -0700 Subject: [PATCH 17/72] update deps --- rebar.config | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rebar.config b/rebar.config index fca5ead..8134a63 100644 --- a/rebar.config +++ b/rebar.config @@ -16,8 +16,7 @@ warn_untyped_record, {i, "include"}]}. {deps, - [{parse_trans, ".*", {git, "git@github.com:uwiger/parse_trans.git", "master"}}, + [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, {epocxy, "1.0.0", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.0"}}}, - {validerl, ".*", {git, "https://github.com/HernanRivasAcosta/validerl.git", - "master"}}]}. + {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. From 55aad620ed58709f513dba23234d3e34c7caea64 Mon Sep 17 00:00:00 2001 From: georgeye Date: Fri, 17 Jun 2016 13:30:19 -0700 Subject: [PATCH 18/72] update version --- src/kafkerl.app.src | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kafkerl.app.src b/src/kafkerl.app.src index d354ab2..c60cecf 100644 --- a/src/kafkerl.app.src +++ b/src/kafkerl.app.src @@ -1,8 +1,8 @@ {application, kafkerl, [{description, []}, - {vsn, "1"}, + {vsn, "1.0"}, {registered, []}, {applications, [kernel, stdlib]}, {mod, {kafkerl, []}}, {env, []}, - {lager, [{handlers, [{lager_console_backend, info}]}]}]}. \ No newline at end of file + {lager, [{handlers, [{lager_console_backend, info}]}]}]}. From c6ca0eda608a3305b90cab22075d5562f2c58a91 Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Thu, 5 Mar 2015 16:22:46 -0300 Subject: [PATCH 19/72] updated the epocxy version --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index 8134a63..2154eed 100644 --- a/rebar.config +++ b/rebar.config @@ -19,4 +19,4 @@ [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, {epocxy, "1.0.0", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.0"}}}, - {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. + {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. \ No newline at end of file From fa1cc97eae74621294287b509414ebf68ffe6780 Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Wed, 29 Jul 2015 14:39:52 -0300 Subject: [PATCH 20/72] added the kafka consumer and simplified the producer API, bumped version to 2.0.0.a --- include/kafkerl.hrl | 7 +- rel/kafkerl.app.config | 5 +- src/kafkerl.erl | 154 ++++++++++++++---------------- src/kafkerl_broker_connection.erl | 98 +++++++++++++++++-- src/kafkerl_connector.erl | 81 +++++++++++----- src/kafkerl_protocol.erl | 33 +++++-- src/kafkerl_utils.erl | 20 ++++ 7 files changed, 271 insertions(+), 127 deletions(-) diff --git a/include/kafkerl.hrl b/include/kafkerl.hrl index b78cb13..64964c3 100644 --- a/include/kafkerl.hrl +++ b/include/kafkerl.hrl @@ -57,8 +57,9 @@ -type produce_response() :: {ok, correlation_id(), [produce_topic()]}. %% Fetch responses --type messages() :: [{topic(), [{{partition(), integer()}, [binary()]}]}]. --type fetch_state() :: {binary(), integer(), [any()]}. +-type messages() :: [{topic(), [{{partition(), integer()}, + [binary() | {binary(), binary()}]}]}]. +-type fetch_state() :: {binary(), integer(), [any()]} | void. -type fetch_response() :: {ok, integer(), messages()} | {incomplete, integer(), messages(), fetch_state()} | error(). @@ -91,4 +92,4 @@ -define(OFFSETS_LOAD_IN_PROGRESS_CODE, 14). -define(CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE, 15). -define(NOT_COORDINATOR_FOR_CONSUMER_CODE, 16). --define(UNKNOWN, -1). +-define(UNKNOWN, -1). \ No newline at end of file diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index a514299..2542515 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -13,7 +13,8 @@ {metadata_tcp_timeout, 1000}, {max_queue_size, 20}, % In items, per topic/partition {max_time_queued, 5}, % In seconds - {metadata_request_cooldown, 1500} % In milliseconds - ]}, + {metadata_request_cooldown, 1500}, % In milliseconds + {consumer_min_bytes, 1}, + {consumer_max_wait, 1500}]}, {topics, [test1, test2, test3]}, {tests, [{kafka_installation, "~/kafka"}]}]}]. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index cbf7430..320aa23 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -2,24 +2,26 @@ -author('hernanrivasacosta@gmail.com'). -export([start/0, start/2]). --export([version/0, - produce/1, produce/2, produce_messages_from_file/1, - produce_messages_from_file/2, produce_messages_from_file/3, - get_partitions/0, get_partitions/1, - subscribe/1, subscribe/2, subscribe/3, - unsubscribe/1, unsubscribe/2, +-export([produce/3, produce/4, produce/5, + consume/2, consume/3, consume/4, request_metadata/0, request_metadata/1, request_metadata/2, - valid_message/1]). + partitions/0, partitions/1]). +-export([version/0]). -include("kafkerl.hrl"). -include("kafkerl_consumers.hrl"). %% Types --type produce_option() :: {buffer_size, integer() | infinity} | - {dump_location, string()}. --type produce_options() :: [produce_option()]. +-type option() :: {buffer_size, integer() | infinity} | + {dump_location, string()} | + {consumer, callback()} | + {min_bytes, integer()} | + {max_wait, integer()} | + {offset, integer()}. +-type options() :: [option()]. +-type server_ref() :: atom() | pid(). --export_type([produce_options/0]). +-export_type([options/0, server_ref/0]). %%============================================================================== %% API @@ -34,79 +36,71 @@ start(_StartType, _StartArgs) -> %%============================================================================== %% Access API %%============================================================================== --spec version() -> {integer(), integer(), integer()}. -version() -> - {1, 1, 2}. - --spec produce(basic_message()) -> ok. -produce(Message) -> - produce(?MODULE, Message). --spec produce(atom(), basic_message()) -> ok; - (basic_message(), produce_options()) -> ok. -produce(Message, Options) when is_tuple(Message) -> - produce(?MODULE, Message, Options); -produce(Name, Message) -> - produce(Name, Message, []). --spec produce(atom(), basic_message(), produce_options()) -> ok. -produce(Name, Message, Options) -> - kafkerl_connector:send(Name, Message, Options). - --spec produce_messages_from_file(string()) -> ok. -produce_messages_from_file(Filename) -> - produce_messages_from_file(?MODULE, Filename). --spec produce_messages_from_file(atom(), basic_message()) -> ok; - (string(), produce_options()) -> ok. -produce_messages_from_file(Filename, Options) when is_list(Filename) -> - produce_messages_from_file(?MODULE, Filename, Options); -produce_messages_from_file(Name, Filename) -> - produce_messages_from_file(Name, Filename, []). --spec produce_messages_from_file(atom(), string(), produce_options()) -> ok. -produce_messages_from_file(Name, Filename, Options) -> - {ok, Bin} = file:read_file(Filename), - Messages = binary_to_term(Bin), - [produce(Name, M, Options) || M <- Messages], - ok. - --spec get_partitions() -> [{topic(), [partition()]}] | error(). -get_partitions() -> - get_partitions(?MODULE). --spec get_partitions(atom()) -> [{topic(), [partition()]}] | error(). -get_partitions(Name) -> - kafkerl_connector:get_partitions(Name). - --spec subscribe(callback()) -> ok. -subscribe(Callback) -> - subscribe(?MODULE, Callback). --spec subscribe(atom(), callback()) -> ok. -subscribe(Callback, all = Filter) -> - subscribe(?MODULE, Callback, Filter); -subscribe(Callback, Filter) when is_list(Filter) -> - subscribe(?MODULE, Callback, Filter); -subscribe(Name, Callback) -> - subscribe(Name, Callback, all). --spec subscribe(atom(), callback(), filters()) -> ok. -subscribe(Name, Callback, Filter) -> - kafkerl_connector:subscribe(Name, Callback, Filter). - --spec unsubscribe(callback()) -> ok. -unsubscribe(Callback) -> - unsubscribe(?MODULE, Callback). --spec unsubscribe(atom(), callback()) -> ok. -unsubscribe(Name, Callback) -> - kafkerl_connector:unsubscribe(Name, Callback). +%% Produce API +-spec produce(topic(), partition(), payload()) -> ok. +produce(Topic, Partition, Message) -> + produce(?MODULE, Topic, Partition, Message, []). + +-spec produce(server_ref(), topic(), partition(), payload()) -> ok; + (topic(), partition(), payload(), options()) -> ok. +produce(Topic, Partition, Message, Options) when is_list(Options) -> + produce(?MODULE, {Topic, Partition, Message}, Options); +produce(ServerRef, Topic, Partition, Message) -> + produce(ServerRef, {Topic, Partition, Message}, []). + +-spec produce(server_ref(), topic(), partition(), payload(), options()) -> ok. +produce(ServerRef, Topic, Partition, Message, Options) -> + kafkerl_connector:send(ServerRef, {Topic, Partition, Message}, Options). + +%% Consume API +-spec consume(topic(), partition()) -> ok | error(). +consume(Topic, Partition) -> + consume(?MODULE, Topic, Partition, []). + +-spec consume(topic(), partition(), options()) -> ok | [binary()] | error(); + (server_ref(), topic(), partition()) -> ok | error(). +consume(Topic, Partition, Options) when is_list(Options) -> + consume(?MODULE, Topic, Partition, Options); +consume(ServerRef, Topic, Partition) -> + consume(ServerRef, Topic, Partition, []). + +-spec consume(server_ref(), topic(), partition(), options()) -> + ok | [binary()] | error(). +consume(ServerRef, Topic, Partition, Options) -> + case lists:keyfind(consumer, 1, Options) of + false -> + NewOptions = [{consumer, self()} | Options], + kafkerl_connector:fetch(ServerRef, Topic, Partition, NewOptions), + kafkerl_utils:gather_consume_responses(); + _ -> + kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) + end. +%% Metadata API -spec request_metadata() -> ok. request_metadata() -> request_metadata(?MODULE). + -spec request_metadata(atom() | [topic()]) -> ok. -request_metadata(Name) when is_atom(Name) -> - kafkerl_connector:request_metadata(Name); -request_metadata(Topics) -> - request_metadata(?MODULE, Topics). +request_metadata(Topics) when is_list(Topics) -> + request_metadata(?MODULE, Topics); +request_metadata(ServerRef) -> + kafkerl_connector:request_metadata(ServerRef). + -spec request_metadata(atom(), [topic()]) -> ok. -request_metadata(Name, Topics) -> - kafkerl_connector:request_metadata(Name, Topics). +request_metadata(ServerRef, Topics) -> + kafkerl_connector:request_metadata(ServerRef, Topics). + +%% Partitions +-spec partitions() -> [{topic(), [partition()]}] | error(). +partitions() -> + partitions(?MODULE). --spec valid_message(any()) -> boolean(). -valid_message(Any) -> - kafkerl_utils:valid_message(Any). \ No newline at end of file +-spec partitions(server_ref()) -> [{topic(), [partition()]}] | error(). +partitions(ServerRef) -> + kafkerl_connector:get_partitions(ServerRef). + +%% Utils +-spec version() -> {integer(), integer(), integer()}. +version() -> + {2, 0, 0}. \ No newline at end of file diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 686cbfd..98921d1 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -4,7 +4,7 @@ -behaviour(gen_server). %% API --export([add_buffer/2, clear_buffers/1]). +-export([add_buffer/2, clear_buffers/1, fetch/4]). % Only for internal use -export([connect/6]). % Supervisors @@ -14,6 +14,7 @@ handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). +-include("kafkerl_consumers.hrl"). -type server_ref() :: atom() | pid(). -type conn_idx() :: 0..1023. @@ -33,7 +34,11 @@ request_number = 0 :: integer(), pending_requests = [] :: [integer()], max_time_queued = 0 :: integer(), - ets = undefined :: atom()}). + ets = undefined :: atom(), + fetching = void :: integer() | void, + fetches = [] :: [{correlation_id(), + callback(), + fetch_state()}]}). -type state() :: #state{}. %%============================================================================== @@ -60,6 +65,10 @@ add_buffer(ServerRef, Buffer) -> clear_buffers(ServerRef) -> gen_server:call(ServerRef, {clear_buffers}). +-spec fetch(server_ref(), topic(), partition(), kafkerl:options()) -> ok. +fetch(ServerRef, Topic, Partition, Options) -> + gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). + %%============================================================================== %% gen_server callbacks %%============================================================================== @@ -67,7 +76,9 @@ clear_buffers(ServerRef) -> handle_call({add_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> {reply, ok, State#state{buffers = [Buffer| Buffers]}}; handle_call({clear_buffers}, _From, State) -> - {reply, ok, State#state{buffers = []}}. + {reply, ok, State#state{buffers = []}}; +handle_call({fetch, Topic, Partition, Options}, _From, State) -> + handle_fetch(Topic, Partition, Options, State). -spec handle_info(any(), state()) -> {noreply, state()}. handle_info({connected, Socket}, State) -> @@ -163,6 +174,30 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, end end. +handle_fetch(Topic, Partition, Options, State = #state{client_id = ClientId, + socket = Socket, + name = Name}) -> + Offset = proplists:get_value(offset, Options, 0), + MaxWait = proplists:get_value(max_wait, Options), + MinBytes = proplists:get_value(min_bytes, Options), + {ok, CorrelationId, NewState} = build_fetch_correlation_id(Options, State), + Request = {Topic, {Partition, Offset, 2147483647}}, + Payload = kafkerl_protocol:build_fetch_request(Request, + ClientId, + CorrelationId, + MaxWait, + MinBytes), + case gen_tcp:send(Socket, Payload) of + {error, Reason} -> + lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(NewState)}; + ok -> + lager:debug("~p sent request ~p", [Name, CorrelationId]), + {reply, ok, NewState} + end. + % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, tcp_options = TCPOpts, @@ -173,8 +208,39 @@ handle_tcp_close(State = #state{retry_interval = RetryInterval, _Pid = spawn_link(?MODULE, connect, Params), State#state{socket = undefined}. -handle_tcp_data(Bin, State = #state{connector = Connector, ets = EtsName, - name = Name}) -> +handle_tcp_data(Bin, State = #state{fetches = Fetches}) -> + {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, State), + case lists:keytake(CorrelationId, 1, Fetches) of + {value, {CorrelationId, Consumer, FetchState}, NewFetches} -> + NewState = State#state{fetches = NewFetches}, + handle_fetch_response(Bin, Consumer, FetchState, NewState); + false -> + handle_produce_response(Bin, State) + end. + +handle_fetch_response(Bin, Consumer, FetchState, State) -> + case kafkerl_protocol:parse_fetch_response(Bin, FetchState) of + {ok, _CorrelationId, [{_, [{{_, MessagesInPartition}, Messages}]}]} -> + send_messages(Consumer, {message_count, MessagesInPartition}, true), + send_messages(Consumer, {consume_done, Messages}, true), + {ok, State#state{fetching = void}}; + {incomplete, CorrelationId, Topics, NewFetchState} -> + _ = case Topics of + [{_, [{_, Messages}]}] -> + send_messages(Consumer, {consumed, Messages}, false); + _ -> + ignore + end, + Fetches = State#state.fetches, + NewFetches = [{CorrelationId, Consumer, NewFetchState} | Fetches], + {ok, State#state{fetches = NewFetches, fetching = CorrelationId}}; + Error -> + kafkerl_utils:send_event(Consumer, Error), + {ok, State#state{fetching = void}} + end. + +handle_produce_response(Bin, State = #state{connector = Connector, name = Name, + ets = EtsName}) -> case kafkerl_protocol:parse_produce_response(Bin) of {ok, CorrelationId, Topics} -> case ets:lookup(EtsName, CorrelationId) of @@ -237,6 +303,12 @@ build_correlation_id(State = #state{request_number = RequestNumber, CorrelationId = (ConnIdx bsl 22) bor NextRequest, {ok, CorrelationId, State#state{request_number = NextRequest}}. +build_fetch_correlation_id(Options, State = #state{fetches = Fetches}) -> + Consumer = proplists:get_value(consumer, Options), + {ok, CorrelationId, NewState} = build_correlation_id(State), + NewFetches = [{CorrelationId, Consumer, void} | Fetches], + {ok, CorrelationId, NewState#state{fetches = NewFetches}}. + % TODO: Refactor this function, it is not sufficiently clear what it does separate_errors(Topics) -> separate_errors(Topics, {[], []}). @@ -262,8 +334,8 @@ handle_errors(Errors, Messages, Name) -> handle_error({Topic, Partition, Error}, Messages, Name) when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse - Error =:= ?LEADER_NOT_AVAILABLE orelse - Error =:= ?NOT_LEADER_FOR_PARTITION -> + Error =:= ?NOT_LEADER_FOR_PARTITION orelse + Error =:= ?LEADER_NOT_AVAILABLE -> case get_message_for_error(Topic, Partition, Messages, Name) of undefined -> false; Message -> {true, Message} @@ -328,4 +400,14 @@ get_messages_from(Ets, Retries) -> _Error -> lager:warning("giving up on reading from the ETS buffer"), [] - end. \ No newline at end of file + end. + +parse_correlation_id(Bin, #state{fetching = void}) -> + {ok, _CorrelationId, _NewBin} = kafkerl_protocol:parse_correlation_id(Bin); +parse_correlation_id(Bin, #state{fetching = CorrelationId}) -> + {ok, CorrelationId, Bin}. + +send_messages(_Consumer, {_EventType, []}, false = _SendEmptyMessages) -> + ok; +send_messages(Consumer, Event, _SendEmptyMessages) -> + kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index c5ee8bb..69f3533 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -4,8 +4,15 @@ -behaviour(gen_server). %% API --export([send/3, request_metadata/1, request_metadata/2, request_metadata/3, - subscribe/2, subscribe/3, get_partitions/1, unsubscribe/2]). +% Metadata +-export([request_metadata/1, request_metadata/2, request_metadata/3, + get_partitions/1]). +% Produce +-export([send/3]). +% Consume +-export([fetch/4]). +% Common +-export([subscribe/2, subscribe/3, unsubscribe/2]). % Only for internal use -export([do_request_metadata/6, make_metadata_request/1]). % Only for broker connections @@ -19,9 +26,8 @@ -include("kafkerl.hrl"). -include("kafkerl_consumers.hrl"). --type server_ref() :: atom() | pid(). -type broker_mapping_key() :: {topic(), partition()}. --type broker_mapping() :: {broker_mapping_key(), server_ref()}. +-type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. -record(state, {brokers = [] :: [socket_address()], broker_mapping = void :: [broker_mapping()] | void, @@ -35,7 +41,8 @@ pending = [] :: [basic_message()], last_metadata_request = 0 :: integer(), metadata_request_cd = 0 :: integer(), - last_dump_name = {"", 0} :: {string(), integer()}}). + last_dump_name = {"", 0} :: {string(), integer()}, + default_fetch_options = [] :: kafkerl:options()}). -type state() :: #state{}. %%============================================================================== @@ -45,7 +52,7 @@ start_link(Name, Config) -> gen_server:start_link({local, Name}, ?MODULE, [Config], []). --spec send(server_ref(), basic_message(), kafkerl:produce_option()) -> +-spec send(kafkerl:server_ref(), basic_message(), kafkerl:options()) -> ok | error(). send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), @@ -62,7 +69,13 @@ send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> gen_server:call(ServerRef, {send, Message}) end. --spec get_partitions(server_ref()) -> [{topic(), [partition()]}] | error(). +-spec fetch(kafkerl:server_ref(), topic(), partition(), kafkerl:options()) -> + ok. +fetch(ServerRef, Topic, Partition, Options) -> + gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). + +-spec get_partitions(kafkerl:server_ref()) -> + [{topic(), [partition()]}] | error(). get_partitions(ServerRef) -> case gen_server:call(ServerRef, {get_partitions}) of {ok, Mapping} -> @@ -71,29 +84,29 @@ get_partitions(ServerRef) -> Error end. --spec subscribe(server_ref(), callback()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), callback()) -> ok | error(). subscribe(ServerRef, Callback) -> subscribe(ServerRef, Callback, all). --spec subscribe(server_ref(), callback(), filters()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), callback(), filters()) -> ok | error(). subscribe(ServerRef, Callback, Filter) -> gen_server:call(ServerRef, {subscribe, {Filter, Callback}}). --spec unsubscribe(server_ref(), callback()) -> ok. +-spec unsubscribe(kafkerl:server_ref(), callback()) -> ok. unsubscribe(ServerRef, Callback) -> gen_server:call(ServerRef, {unsubscribe, Callback}). --spec request_metadata(server_ref()) -> ok. +-spec request_metadata(kafkerl:server_ref()) -> ok. request_metadata(ServerRef) -> gen_server:call(ServerRef, {request_metadata}). --spec request_metadata(server_ref(), [topic()] | boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [topic()] | boolean()) -> ok. request_metadata(ServerRef, TopicsOrForced) -> gen_server:call(ServerRef, {request_metadata, TopicsOrForced}). --spec request_metadata(server_ref(), [topic()], boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [topic()], boolean()) -> ok. request_metadata(ServerRef, Topics, Forced) -> gen_server:call(ServerRef, {request_metadata, Topics, Forced}). --spec produce_succeeded(server_ref(), +-spec produce_succeeded(kafkerl:server_ref(), [{topic(), partition(), [binary()], integer()}]) -> ok. produce_succeeded(ServerRef, Messages) -> gen_server:cast(ServerRef, {produce_succeeded, Messages}). @@ -114,6 +127,8 @@ handle_call({dump_buffer_to_disk, Buffer, Options}, _From, State) -> {reply, ok, State#state{last_dump_name = DumpName}}; handle_call({send, Message}, _From, State) -> handle_send(Message, State); +handle_call({fetch, Topic, Partition, Options}, _From, State) -> + {reply, handle_fetch(Topic, Partition, Options, State), State}; handle_call({request_metadata}, _From, State) -> {reply, ok, handle_request_metadata(State, [])}; handle_call({request_metadata, Forced}, _From, State) when is_boolean(Forced) -> @@ -191,18 +206,22 @@ init([Config]) -> {topics, [binary], required}, {metadata_tcp_timeout, positive_integer, {default, 1500}}, {assume_autocreate_topics, boolean, {default, false}}, - {metadata_request_cooldown, positive_integer, {default, 333}}], + {metadata_request_cooldown, positive_integer, {default, 333}}, + {consumer_min_bytes, positive_integer, {default, 1}}, + {consumer_max_wait, positive_integer, {default, 1500}}], case normalizerl:normalize_proplist(Schema, Config) of {ok, [Brokers, MaxMetadataRetries, ClientId, Topics, RetryInterval, - AutocreateTopics, MetadataRequestCooldown]} -> - State = #state{config = Config, - known_topics = Topics, - brokers = Brokers, - client_id = ClientId, - retry_interval = RetryInterval, - autocreate_topics = AutocreateTopics, - max_metadata_retries = MaxMetadataRetries, - metadata_request_cd = MetadataRequestCooldown}, + AutocreateTopics, MetadataRequestCooldown, MinBytes, MaxWait]} -> + DefaultFetchOptions = [{min_bytes, MinBytes}, {max_wait, MaxWait}], + State = #state{config = Config, + known_topics = Topics, + brokers = Brokers, + client_id = ClientId, + retry_interval = RetryInterval, + autocreate_topics = AutocreateTopics, + max_metadata_retries = MaxMetadataRetries, + metadata_request_cd = MetadataRequestCooldown, + default_fetch_options = DefaultFetchOptions}, {_Pid, _Ref} = make_metadata_request(State), {ok, State}; {errors, Errors} -> @@ -248,12 +267,22 @@ handle_send(Message, State = #state{broker_mapping = Mapping, pending = Pending, end end. +handle_fetch(_Topic, _Partition, _Options, #state{broker_mapping = void}) -> + {error, not_connected}; +handle_fetch(Topic, Partition, Options, State) -> + case lists:keyfind({Topic, Partition}, 1, State#state.broker_mapping) of + false -> + {error, {no_broker, {Topic, Partition}}}; + {_, Broker} -> + NewOptions = Options ++ State#state.default_fetch_options, + kafkerl_broker_connection:fetch(Broker, Topic, Partition, NewOptions) + end. + handle_get_partitions(#state{broker_mapping = void}) -> {error, not_available}; handle_get_partitions(#state{broker_mapping = Mapping}) -> {ok, Mapping}. - handle_request_metadata(State, Topics) -> handle_request_metadata(State, Topics, false). @@ -313,7 +342,7 @@ do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> do_request_metadata([], _TCPOpts, _Request) -> {error, all_down}; -do_request_metadata([{Host, Port} = _Broker | T] = _Brokers, TCPOpts, Request) -> +do_request_metadata([{Host, Port} = _Broker | T], TCPOpts, Request) -> lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), % Connect to the Broker case gen_tcp:connect(Host, Port, TCPOpts) of diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index deceb09..11b18a9 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -4,7 +4,8 @@ -export([build_produce_request/4, build_fetch_request/5, build_metadata_request/3]). --export([parse_produce_response/1, parse_fetch_response/1, +-export([parse_correlation_id/1, + parse_produce_response/1, parse_fetch_response/1, parse_fetch_response/2, parse_metadata_response/1]). -include("kafkerl.hrl"). @@ -32,6 +33,12 @@ build_metadata_request(Topics, CorrelationId, ClientId) -> [build_request_header(ClientId, ?METADATA_KEY, CorrelationId), Request]. % Message parsing +-spec parse_correlation_id(binary()) -> {ok, integer(), binary()}. +parse_correlation_id(<<_Size:32/unsigned-integer, + CorrelationId:32/unsigned-integer, + Remainder/binary>>) -> + {ok, CorrelationId, Remainder}. + -spec parse_produce_response(binary()) -> produce_response(). parse_produce_response(<<_Size:32/unsigned-integer, CorrelationId:32/unsigned-integer, @@ -49,12 +56,16 @@ parse_fetch_response(<<_Size:32/unsigned-integer, {ok, Topics} -> {ok, CorrelationId, Topics}; {incomplete, Topics, {Bin, Steps}} -> - {incomplete, CorrelationId, Topics, {Bin, CorrelationId, Steps}} + {incomplete, CorrelationId, Topics, {Bin, CorrelationId, Steps}}; + {error, _Reason} = Error -> + Error end; parse_fetch_response(_Other) -> {error, unexpected_binary}. -spec parse_fetch_response(binary(), fetch_state()) -> fetch_response(). +parse_fetch_response(Bin, void) -> + parse_fetch_response(Bin); parse_fetch_response(Bin, {Remainder, CorrelationId, Steps}) -> NewBin = <>, parse_steps(NewBin, CorrelationId, Steps). @@ -68,10 +79,10 @@ parse_metadata_response(< {ok, CorrelationId, {Brokers, Metadata}}; - Error -> + {error, _Reason} = Error -> Error end; - Error -> + {error, _Reason} = Error -> Error end; parse_metadata_response(_Other) -> @@ -361,7 +372,9 @@ parse_topics(Count, Bin, Acc) -> Step = {topics, Count}, {incomplete, lists:reverse(Acc, [Topic]), {Remainder, Steps ++ [Step]}}; incomplete -> - {incomplete, lists:reverse(Acc), {Bin, [{topics, Count}]}} + {incomplete, lists:reverse(Acc), {Bin, [{topics, Count}]}}; + {error, _Reason} = Error -> + Error end. parse_topic(< Step = {topic, TopicName}, - {incomplete, {TopicName, Partitions}, {Bin, Steps ++ [Step]}} + {incomplete, {TopicName, Partitions}, {Bin, Steps ++ [Step]}}; + {error, _Reason} = Error -> + Error end; parse_topic(_Bin) -> incomplete. @@ -393,7 +408,9 @@ parse_partitions(Count, Bin, Acc) -> {incomplete, lists:reverse(Acc, [Partition]), NewState}; incomplete -> Step = {partitions, Count}, - {incomplete, lists:reverse(Acc), {Bin, [Step]}} + {incomplete, lists:reverse(Acc), {Bin, [Step]}}; + {error, _Reason} = Error -> + Error end. parse_partition(<> -> - {no_key, Value} + Value end, % 12 is the size of the offset plus the size of the MessageSize int {ok, {KV, MessageSize + 12}, Remainder}; diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index bbcd149..267f369 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -5,6 +5,7 @@ -export([get_tcp_options/1]). -export([merge_messages/1, split_messages/1, valid_message/1]). -export([buffer_name/2]). +-export([gather_consume_responses/0, gather_consume_responses/1]). -include("kafkerl.hrl"). -include("kafkerl_consumers.hrl"). @@ -129,3 +130,22 @@ is_partition({Partition, Messages}) -> (is_binary(Messages) orelse is_list_of_binaries(Messages)); is_partition(_Any) -> false. + +gather_consume_responses() -> + gather_consume_responses(2500). +gather_consume_responses(Timeout) -> + gather_consume_responses(Timeout, []). +gather_consume_responses(Timeout, Acc) -> + receive + {message_count, _} -> + % Ignore this one + gather_consume_responses(Acc); + {consumed, Messages} -> + gather_consume_responses(Acc ++ Messages); + {consume_done, Messages} -> + Acc ++ Messages; + {error, _Reason} = Error -> + Error + after Timeout -> + {error, {no_response, Acc}} + end. \ No newline at end of file From 2469eb4ceaff5e008f4f625cd7f340b8aaa03d78 Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Tue, 4 Aug 2015 11:08:35 -0300 Subject: [PATCH 21/72] fixed bad arity on a function call --- src/kafkerl_utils.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 267f369..b7d8f70 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -139,9 +139,9 @@ gather_consume_responses(Timeout, Acc) -> receive {message_count, _} -> % Ignore this one - gather_consume_responses(Acc); + gather_consume_responses(Timeout, Acc); {consumed, Messages} -> - gather_consume_responses(Acc ++ Messages); + gather_consume_responses(Timeout, Acc ++ Messages); {consume_done, Messages} -> Acc ++ Messages; {error, _Reason} = Error -> From d7f3dd68e9e843b652ed4d92dbe4e26f356385ca Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Tue, 4 Aug 2015 11:13:58 -0300 Subject: [PATCH 22/72] removed an unnecesary parameter on send_messages --- src/kafkerl_broker_connection.erl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 98921d1..713fff7 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -221,13 +221,13 @@ handle_tcp_data(Bin, State = #state{fetches = Fetches}) -> handle_fetch_response(Bin, Consumer, FetchState, State) -> case kafkerl_protocol:parse_fetch_response(Bin, FetchState) of {ok, _CorrelationId, [{_, [{{_, MessagesInPartition}, Messages}]}]} -> - send_messages(Consumer, {message_count, MessagesInPartition}, true), - send_messages(Consumer, {consume_done, Messages}, true), + send_messages(Consumer, {message_count, MessagesInPartition}), + send_messages(Consumer, {consume_done, Messages}), {ok, State#state{fetching = void}}; {incomplete, CorrelationId, Topics, NewFetchState} -> _ = case Topics of - [{_, [{_, Messages}]}] -> - send_messages(Consumer, {consumed, Messages}, false); + [{_, [{_, [_ | _] = Messages}]}] -> + send_messages(Consumer, {consumed, Messages}); _ -> ignore end, @@ -407,7 +407,5 @@ parse_correlation_id(Bin, #state{fetching = void}) -> parse_correlation_id(Bin, #state{fetching = CorrelationId}) -> {ok, CorrelationId, Bin}. -send_messages(_Consumer, {_EventType, []}, false = _SendEmptyMessages) -> - ok; -send_messages(Consumer, Event, _SendEmptyMessages) -> +send_messages(Consumer, Event) -> kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file From 301359920da082663cb15fe7be6a25f89551986b Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Thu, 21 Jan 2016 14:37:56 -0300 Subject: [PATCH 23/72] consumer improvements, removed types from .hrl files, improved API --- bin/stop_all_brokers.sh | 2 +- bin/stop_zk.sh | 2 +- include/kafkerl.hrl | 77 --------- include/kafkerl_consumers.hrl | 9 - src/kafkerl.erl | 30 +++- src/kafkerl_broker_connection.erl | 254 +++++++++++++++++----------- src/kafkerl_connector.erl | 50 +++--- src/kafkerl_protocol.erl | 269 +++++++++++++++++++----------- src/kafkerl_sup.erl | 2 - src/kafkerl_utils.erl | 22 ++- 10 files changed, 398 insertions(+), 319 deletions(-) delete mode 100644 include/kafkerl_consumers.hrl diff --git a/bin/stop_all_brokers.sh b/bin/stop_all_brokers.sh index 47bee29..51cec34 100755 --- a/bin/stop_all_brokers.sh +++ b/bin/stop_all_brokers.sh @@ -1 +1 @@ -ps ax | grep -i 'kafka\.Kafka' | grep java | grep -v grep | awk '{print $1}' | xargs kill -15 \ No newline at end of file +ps ax | grep -i 'kafka\.Kafka' | grep java | grep -v grep | awk '{print $1}' | xargs kill -9 diff --git a/bin/stop_zk.sh b/bin/stop_zk.sh index a63f1fc..b93e032 100755 --- a/bin/stop_zk.sh +++ b/bin/stop_zk.sh @@ -1 +1 @@ -ps ax | grep -i 'zookeeper' | grep -v grep | awk '{print $1}' | xargs kill -15 \ No newline at end of file +ps ax | grep -i 'zookeeper' | grep -v grep | awk '{print $1}' | xargs kill -9 diff --git a/include/kafkerl.hrl b/include/kafkerl.hrl index 64964c3..bf4955c 100644 --- a/include/kafkerl.hrl +++ b/include/kafkerl.hrl @@ -1,80 +1,3 @@ -%% Constants -% Misc --define(ETS_BUFFER, ets_buffer). --define(DEFAULT_TCP_OPTS, lists:sort([{mode, binary}, {packet, 0}])). -% Compression --define(COMPRESSION_NONE, none). --define(COMPRESSION_GZIP, gzip). --define(COMPRESSION_SNAPPY, snappy). --define(KAFKERL_COMPRESSION_TYPES, [?COMPRESSION_NONE, - ?COMPRESSION_GZIP, - ?COMPRESSION_SNAPPY]). -% API keys --define(PRODUCE_KEY, 0). --define(FETCH_KEY, 1). --define(OFFSET_KEY, 2). --define(METADATA_KEY, 3). - -%% Common --type error_code() :: -1..16. --type correlation_id() :: non_neg_integer(). - -%% Connection --type address_host() :: string(). --type address_port() :: 1..65535. --type socket_address() :: {address_host(), address_port()}. --type broker_id() :: integer(). --type broker() :: {broker_id(), socket_address()}. - -%% Configuration --type compression() :: ?COMPRESSION_NONE | - ?COMPRESSION_GZIP | - ?COMPRESSION_SNAPPY. - -%% Requests --type client_id() :: binary(). --type topic() :: binary(). --type partition() :: integer(). --type payload() :: binary() | [binary()]. --type basic_message() :: {topic(), partition(), payload()}. --type merged_message() :: basic_message() | - {topic(), [{partition(), payload()}]} | - [merged_message()]. - --type fetch_offset() :: integer(). --type fetch_max_bytes() :: integer(). --type fetch_partition() :: {partition(), fetch_offset(), fetch_max_bytes()} | - [fetch_partition()]. --type fetch_request() :: {topic(), fetch_partition()} | - [fetch_request()]. - -%% Reponses --type error() :: {error, atom() | {atom(), any()}}. - -%% Produce responses --type produce_partition() :: {partition(), error_code(), integer()}. --type produce_topic() :: {topic(), [produce_partition()]}. --type produce_response() :: {ok, correlation_id(), [produce_topic()]}. - -%% Fetch responses --type messages() :: [{topic(), [{{partition(), integer()}, - [binary() | {binary(), binary()}]}]}]. --type fetch_state() :: {binary(), integer(), [any()]} | void. --type fetch_response() :: {ok, integer(), messages()} | - {incomplete, integer(), messages(), fetch_state()} | - error(). - -%% Metadata responses --type leader() :: integer(). --type replica() :: integer(). --type isr() :: integer(). --type partition_metadata() :: {error_code(), partition(), broker_id(), - [replica()], [isr()]}. --type topic_metadata() :: {error_code(), topic(), [partition_metadata()]}. --type metadata() :: {[broker()], [topic_metadata()]}. --type metadata_response() :: {ok, correlation_id(), metadata()} | - error(). - %% Error codes -define(NO_ERROR, 0). -define(OFFSET_OUT_OF_RANGE, 1). diff --git a/include/kafkerl_consumers.hrl b/include/kafkerl_consumers.hrl deleted file mode 100644 index 6b66190..0000000 --- a/include/kafkerl_consumers.hrl +++ /dev/null @@ -1,9 +0,0 @@ -% Pid, M:F or M:F(A1, A2, ..., An) --type callback() :: pid() | - fun() | - {atom(), atom()} | - {atom(), atom(), [any()]}. - --type filters() :: all | [atom()]. - --type message_metadata() :: {done | incomplete, integer(), integer()}. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 320aa23..32fb06e 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -3,25 +3,35 @@ -export([start/0, start/2]). -export([produce/3, produce/4, produce/5, - consume/2, consume/3, consume/4, + consume/2, consume/3, consume/4, stop_consuming/2, stop_consuming/3, request_metadata/0, request_metadata/1, request_metadata/2, partitions/0, partitions/1]). -export([version/0]). --include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). - %% Types +-type callback() :: pid() | + fun() | + {atom(), atom()} | + {atom(), atom(), [any()]}. -type option() :: {buffer_size, integer() | infinity} | {dump_location, string()} | {consumer, callback()} | {min_bytes, integer()} | {max_wait, integer()} | - {offset, integer()}. + {offset, integer()} | + {fetch_interval, false | integer()}. -type options() :: [option()]. -type server_ref() :: atom() | pid(). --export_type([options/0, server_ref/0]). +-type error() :: {error, atom() | {atom(), any()}}. + +-type topic() :: binary(). +-type partition() :: integer(). +-type payload() :: binary() | [binary()]. +-type basic_message() :: {topic(), partition(), payload()}. + +-export_type([server_ref/0, error/0, options/0, callback/0, + topic/0, partition/0, payload/0, basic_message/0]). %%============================================================================== %% API @@ -76,6 +86,14 @@ consume(ServerRef, Topic, Partition, Options) -> kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) end. +-spec stop_consuming(topic(), partition()) -> ok. +stop_consuming(Topic, Partition) -> + stop_consuming(?MODULE, Topic, Partition). + +-spec stop_consuming(server_ref(), topic(), partition()) -> ok. +stop_consuming(ServerRef, Topic, Partition) -> + kafkerl_connector:stop_fetch(ServerRef, Topic, Partition). + %% Metadata API -spec request_metadata() -> ok. request_metadata() -> diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 713fff7..0cccbfb 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -4,7 +4,7 @@ -behaviour(gen_server). %% API --export([add_buffer/2, clear_buffers/1, fetch/4]). +-export([add_buffer/2, clear_buffers/1, fetch/4, stop_fetch/3]). % Only for internal use -export([connect/6]). % Supervisors @@ -14,20 +14,25 @@ handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). --type server_ref() :: atom() | pid(). -type conn_idx() :: 0..1023. -type start_link_response() :: {ok, atom(), pid()} | ignore | {error, any()}. +-record(fetch, {correlation_id = 0 :: kafkerl_protocol:correlation_id(), + server_ref = undefined :: kafkerl:server_ref(), + topic = undefined :: kafkerl:topic(), + partition = undefined :: kafkerl:partition(), + options = undefined :: kafkerl:options(), + state = void :: kafkerl_protocol:fetch_state()}). + -record(state, {name = undefined :: atom(), buffers = [] :: [atom()], conn_idx = undefined :: conn_idx(), client_id = undefined :: binary(), - socket = undefined :: undefined | port(), - address = undefined :: undefined | socket_address(), - connector = undefined :: undefined | pid(), - tref = undefined :: undefined | any(), + socket = undefined :: port(), + address = undefined :: kafkerl_connector:address(), + connector = undefined :: pid(), + tref = undefined :: any(), tcp_options = [] :: [any()], max_retries = 0 :: integer(), retry_interval = 0 :: integer(), @@ -35,16 +40,14 @@ pending_requests = [] :: [integer()], max_time_queued = 0 :: integer(), ets = undefined :: atom(), - fetching = void :: integer() | void, - fetches = [] :: [{correlation_id(), - callback(), - fetch_state()}]}). + fetches = [] :: [#fetch{}], + current_fetch = void :: void | kafkerl_protocol:correlation_id()}). -type state() :: #state{}. %%============================================================================== %% API %%============================================================================== --spec start_link(conn_idx(), pid(), socket_address(), any()) -> +-spec start_link(conn_idx(), pid(), kafkerl_connector:address(), any()) -> start_link_response(). start_link(Id, Connector, Address, Config) -> NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), @@ -57,17 +60,21 @@ start_link(Id, Connector, Address, Config) -> Other end. --spec add_buffer(server_ref(), atom()) -> ok. +-spec add_buffer(kafkerl:server_ref(), atom()) -> ok. add_buffer(ServerRef, Buffer) -> gen_server:call(ServerRef, {add_buffer, Buffer}). --spec clear_buffers(server_ref()) -> ok. +-spec clear_buffers(kafkerl:server_ref()) -> ok. clear_buffers(ServerRef) -> gen_server:call(ServerRef, {clear_buffers}). --spec fetch(server_ref(), topic(), partition(), kafkerl:options()) -> ok. +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> ok. fetch(ServerRef, Topic, Partition, Options) -> - gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). + gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). + +-spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> ok. +stop_fetch(ServerRef, Topic, Partition) -> + gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). %%============================================================================== %% gen_server callbacks @@ -77,8 +84,10 @@ handle_call({add_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> {reply, ok, State#state{buffers = [Buffer| Buffers]}}; handle_call({clear_buffers}, _From, State) -> {reply, ok, State#state{buffers = []}}; -handle_call({fetch, Topic, Partition, Options}, _From, State) -> - handle_fetch(Topic, Partition, Options, State). +handle_call({fetch, ServerRef, Topic, Partition, Options}, _From, State) -> + handle_fetch(ServerRef, Topic, Partition, Options, State); +handle_call({stop_fetch, Topic, Partition}, _From, State) -> + handle_stop_fetch(Topic, Partition, State). -spec handle_info(any(), state()) -> {noreply, state()}. handle_info({connected, Socket}, State) -> @@ -87,7 +96,7 @@ handle_info(connection_timeout, State) -> {stop, {error, unable_to_connect}, State}; handle_info({tcp_closed, _Socket}, State = #state{name = Name, address = {Host, Port}}) -> - lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), + _ = lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), NewState = handle_tcp_close(State), {noreply, NewState}; handle_info({tcp, _Socket, Bin}, State) -> @@ -99,7 +108,7 @@ handle_info({flush, Time}, State) -> {ok, _Tref} = queue_flush(Time), handle_flush(State); handle_info(Msg, State = #state{name = Name}) -> - lager:notice("~p received unexpected info message: ~p on ~p", [Name, Msg]), + _ = lager:notice("~p received unexpected info message: ~p on ~p", [Name, Msg]), {noreply, State}. % Boilerplate @@ -135,7 +144,7 @@ init([Id, Connector, Address, Config, Name]) -> {ok, State}; {errors, Errors} -> lists:foreach(fun(E) -> - lager:critical("broker connection config error ~p", [E]) + _ = lager:critical("broker connection config error ~p", [E]) end, Errors), {stop, bad_config} end. @@ -156,32 +165,31 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, MergedMessages -> Request = kafkerl_protocol:build_produce_request(MergedMessages, ClientId, - CorrelationId, - ?COMPRESSION_NONE), + CorrelationId), true = ets:insert_new(EtsName, {CorrelationId, MergedMessages}), - lager:debug("~p sending ~p", [Name, Request]), + _ = lager:debug("~p sending ~p", [Name, Request]), case gen_tcp:send(Socket, Request) of {error, Reason} -> - lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), gen_tcp:close(Socket), ets:delete_all_objects(EtsName, CorrelationId), ok = resend_messages(MergedMessages, Connector), {noreply, handle_tcp_close(NewState)}; ok -> - lager:debug("~p sent message ~p", [Name, CorrelationId]), + _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), {noreply, NewState} end end. -handle_fetch(Topic, Partition, Options, State = #state{client_id = ClientId, - socket = Socket, - name = Name}) -> +handle_fetch(ServerRef, Topic, Partition, Options, + State = #state{fetches = Fetches, client_id = ClientId, + socket = Socket, name = Name}) -> + {ok, CorrelationId, NewState} = build_correlation_id(State), Offset = proplists:get_value(offset, Options, 0), + Request = {Topic, {Partition, Offset, 2147483647}}, MaxWait = proplists:get_value(max_wait, Options), MinBytes = proplists:get_value(min_bytes, Options), - {ok, CorrelationId, NewState} = build_fetch_correlation_id(Options, State), - Request = {Topic, {Partition, Offset, 2147483647}}, Payload = kafkerl_protocol:build_fetch_request(Request, ClientId, CorrelationId, @@ -189,14 +197,41 @@ handle_fetch(Topic, Partition, Options, State = #state{client_id = ClientId, MinBytes), case gen_tcp:send(Socket, Payload) of {error, Reason} -> - lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - gen_tcp:close(Socket), - {reply, {error, no_connection}, handle_tcp_close(NewState)}; + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + ok = gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(State)}; ok -> - lager:debug("~p sent request ~p", [Name, CorrelationId]), - {reply, ok, NewState} - end. + _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), + NewFetch = #fetch{correlation_id = CorrelationId, + server_ref = ServerRef, + topic = Topic, + partition = Partition, + options = Options}, + {reply, ok, NewState#state{fetches = [NewFetch | Fetches]}} + end; +handle_fetch(_ServerRef, _Topic, _Partition, _Options, State) -> + {reply, {error, fetch_in_progress}, State}. + +handle_stop_fetch(Topic, Partition, State) -> + % Leave current fetch as it is + NewFetches = remove_fetch(Topic, Partition, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches}}. + +remove_fetch(Topic, Partition, CurrentFetches) -> + remove_fetch(Topic, Partition, CurrentFetches, []). +remove_fetch(_Topic, _Partition, [], Acc) -> + Acc; +remove_fetch(Topic, Partition, + [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> + % Clearing the fetch options ensures this fetch will stop sending any messages + % since there is no consumer. This also removes the fetch_interval so it won't + % be requested again. + % Simply removing the fetch here doesn't work since we will still get a server + % response, but we will not be able to properly handle it. + [Fetch#fetch{options = []} | Acc] ++ T; +remove_fetch(Topic, Partition, [H | T], Acc) -> + remove_fetch(Topic, Partition, T, [H | Acc]). % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, @@ -208,35 +243,55 @@ handle_tcp_close(State = #state{retry_interval = RetryInterval, _Pid = spawn_link(?MODULE, connect, Params), State#state{socket = undefined}. -handle_tcp_data(Bin, State = #state{fetches = Fetches}) -> - {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, State), - case lists:keytake(CorrelationId, 1, Fetches) of - {value, {CorrelationId, Consumer, FetchState}, NewFetches} -> - NewState = State#state{fetches = NewFetches}, - handle_fetch_response(Bin, Consumer, FetchState, NewState); - false -> +handle_tcp_data(Bin, State = #state{fetches = Fetches, + current_fetch = CurrentFetch}) -> + {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, CurrentFetch), + case get_fetch(CorrelationId, Fetches) of + Fetch = #fetch{} -> + handle_fetch_response(Bin, Fetch, State); + _ -> handle_produce_response(Bin, State) end. -handle_fetch_response(Bin, Consumer, FetchState, State) -> - case kafkerl_protocol:parse_fetch_response(Bin, FetchState) of - {ok, _CorrelationId, [{_, [{{_, MessagesInPartition}, Messages}]}]} -> - send_messages(Consumer, {message_count, MessagesInPartition}), - send_messages(Consumer, {consume_done, Messages}), - {ok, State#state{fetching = void}}; - {incomplete, CorrelationId, Topics, NewFetchState} -> - _ = case Topics of - [{_, [{_, [_ | _] = Messages}]}] -> - send_messages(Consumer, {consumed, Messages}); - _ -> - ignore - end, - Fetches = State#state.fetches, - NewFetches = [{CorrelationId, Consumer, NewFetchState} | Fetches], - {ok, State#state{fetches = NewFetches, fetching = CorrelationId}}; +handle_fetch_response(Bin, Fetch, State = #state{name = Name, + fetches = Fetches}) -> + Options = Fetch#fetch.options, + Consumer = proplists:get_value(consumer, Options), + case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of + {ok, _CorrelationId, [{_, [{{_, Offset}, Messages}]}]} -> + % The messages can be empty, for example when there are no new messages in + % this partition, if that happens, don't send anything and end the fetch. + ok = send_messages(Consumer, + case Messages of + [] -> []; + _ -> [{consumed, Messages}, {offset, Offset}] + end), + case proplists:get_value(fetch_interval, Options, false) of + false -> {ok, State#state{current_fetch = void}}; + Interval -> + NewOptions = lists:keyreplace(offset, 1, Options, {offset, Offset}), + Arguments = [Fetch#fetch.server_ref, Fetch#fetch.topic, + Fetch#fetch.partition, NewOptions], + _ = timer:apply_after(Interval, ?MODULE, fetch, Arguments), + {ok, State#state{current_fetch = void, + fetches = lists:delete(Fetch, Fetches)}} + end; + {incomplete, CorrelationId, Data, NewFetchState} -> + ok = case Data of + [{_, [{_, Messages}]}] -> + send_messages(Consumer, {consumed, Messages}); + _ -> + % On some cases, kafka will return an incomplete response with no + % messages, in this case since we don't have anything to send, we + % just need to update the fetch state. + ok + end, + {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | + lists:delete(Fetch, Fetches)], + current_fetch = CorrelationId}}; Error -> - kafkerl_utils:send_event(Consumer, Error), - {ok, State#state{fetching = void}} + ok = send_messages(Consumer, Error), + {ok, State#state{current_fetch = void, fetches = lists:delete(Fetch, Fetches)}} end. handle_produce_response(Bin, State = #state{connector = Connector, name = Name, @@ -246,7 +301,7 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, case ets:lookup(EtsName, CorrelationId) of [{CorrelationId, Messages}] -> ets:delete(EtsName, CorrelationId), - {Errors, Successes} = separate_errors(Topics), + {Errors, Successes} = split_errors_and_successes(Topics), % First, send the offsets and messages that were delivered spawn(fun() -> notify_success_to_connector(Successes, Messages, Connector) @@ -260,13 +315,13 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, {ok, State} end; _ -> - lager:warning("~p was unable to properly process produce response", - [Name]), + _ = lager:warning("~p was unable to properly process produce response", + [Name]), {error, invalid_produce_response} end; Other -> - lager:critical("~p got unexpected response when parsing message: ~p", - [Name, Other]), + _ = lager:critical("~p got unexpected response when parsing message: ~p", + [Name, Other]), {ok, State} end. @@ -303,25 +358,18 @@ build_correlation_id(State = #state{request_number = RequestNumber, CorrelationId = (ConnIdx bsl 22) bor NextRequest, {ok, CorrelationId, State#state{request_number = NextRequest}}. -build_fetch_correlation_id(Options, State = #state{fetches = Fetches}) -> - Consumer = proplists:get_value(consumer, Options), - {ok, CorrelationId, NewState} = build_correlation_id(State), - NewFetches = [{CorrelationId, Consumer, void} | Fetches], - {ok, CorrelationId, NewState#state{fetches = NewFetches}}. - -% TODO: Refactor this function, it is not sufficiently clear what it does -separate_errors(Topics) -> - separate_errors(Topics, {[], []}). +split_errors_and_successes(Topics) -> + split_errors_and_successes(Topics, {[], []}). -separate_errors([], Acc) -> +split_errors_and_successes([], Acc) -> Acc; -separate_errors([{Topic, Partitions} | T], Acc) -> +split_errors_and_successes([{Topic, Partitions} | T], Acc) -> F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> {E, [{Topic, Partition, Offset} | S]}; ({Partition, Error, _}, {E, S}) -> {[{Topic, Partition, Error} | E], S} end, - separate_errors(T, lists:foldl(F, Acc, Partitions)). + split_errors_and_successes(T, lists:foldl(F, Acc, Partitions)). handle_errors([], _Messages, _Name) -> ignore; @@ -341,21 +389,21 @@ handle_error({Topic, Partition, Error}, Messages, Name) Message -> {true, Message} end; handle_error({Topic, Partition, Error}, _Messages, Name) -> - lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", - [Name, kafkerl_error:get_error_name(Error), Topic, Partition]), + _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", + [Name, kafkerl_error:get_error_name(Error), Topic, Partition]), false. get_message_for_error(Topic, Partition, SavedMessages, Name) -> case lists:keyfind(Topic, 1, SavedMessages) of false -> - lager:error("~p found no saved messages for topic ~p, partition ~p", - [Name, Topic, Partition]), + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), undefined; {Topic, Partitions} -> case lists:keyfind(Partition, 1, Partitions) of false -> - lager:error("~p found no saved messages for topic ~p, partition ~p", - [Name, Topic, Partition]), + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), undefined; {Partition, Messages} -> {Topic, Partition, Messages} @@ -363,19 +411,19 @@ get_message_for_error(Topic, Partition, SavedMessages, Name) -> end. connect(Pid, Name, _TCPOpts, {Host, Port} = _Address, _Timeout, 0) -> - lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), + _ = lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), Pid ! connection_timeout; connect(Pid, Name, TCPOpts, {Host, Port} = Address, Timeout, Retries) -> - lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), + _ = lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), case gen_tcp:connect(Host, Port, TCPOpts, 5000) of {ok, Socket} -> - lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), + _ = lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), gen_tcp:controlling_process(Socket, Pid), Pid ! {connected, Socket}; {error, Reason} -> NewRetries = Retries - 1, - lager:warning("~p can't connect to ~p:~p. Reason: ~p, ~p retries left", - [Name, Host, Port, Reason, NewRetries]), + _ = lager:warning("~p can't connect to ~p:~p. Reason: ~p, ~p retries left", + [Name, Host, Port, Reason, NewRetries]), timer:sleep(Timeout), connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) end. @@ -398,14 +446,28 @@ get_messages_from(Ets, Retries) -> _Error when Retries > 0 -> get_messages_from(Ets, Retries - 1); _Error -> - lager:warning("giving up on reading from the ETS buffer"), + _ = lager:warning("giving up on reading from the ETS buffer"), [] end. -parse_correlation_id(Bin, #state{fetching = void}) -> - {ok, _CorrelationId, _NewBin} = kafkerl_protocol:parse_correlation_id(Bin); -parse_correlation_id(Bin, #state{fetching = CorrelationId}) -> +parse_correlation_id(Bin, void) -> + kafkerl_protocol:parse_correlation_id(Bin); +parse_correlation_id(Bin, CorrelationId) -> {ok, CorrelationId, Bin}. +get_fetch(_CorrelationId, []) -> + not_found; +get_fetch(CorrelationId, [Fetch = #fetch{correlation_id = CorrelationId} | _T]) -> + Fetch; +get_fetch(CorrelationId, [_ | T]) -> + get_fetch(CorrelationId, T). + +send_messages(_Consumer, []) -> + ok; +send_messages(Consumer, [Event | T]) -> + case send_messages(Consumer, Event) of + ok -> send_messages(Consumer, T); + Error -> Error + end; send_messages(Consumer, Event) -> kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 69f3533..089818e 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -24,36 +24,43 @@ handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). --type broker_mapping_key() :: {topic(), partition()}. +-type kafler_host() :: string(). +-type kafler_port() :: 1..65535. +-type address() :: {kafler_host(), kafler_port()}. + +-type filters() :: all | [atom()]. + +-type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. --record(state, {brokers = [] :: [socket_address()], +-record(state, {brokers = [] :: [address()], broker_mapping = void :: [broker_mapping()] | void, - client_id = <<>> :: client_id(), + client_id = <<>> :: kafkerl_protocol:client_id(), max_metadata_retries = -1 :: integer(), retry_interval = 1 :: non_neg_integer(), config = [] :: {atom(), any()}, autocreate_topics = false :: boolean(), - callbacks = [] :: [{filters(), callback()}], + callbacks = [] :: [{filters(), kafkerl:callback()}], known_topics = [] :: [binary()], - pending = [] :: [basic_message()], + pending = [] :: [kafkerl:basic_message()], last_metadata_request = 0 :: integer(), metadata_request_cd = 0 :: integer(), last_dump_name = {"", 0} :: {string(), integer()}, default_fetch_options = [] :: kafkerl:options()}). -type state() :: #state{}. +-export_type([address/0]). + %%============================================================================== %% API %%============================================================================== --spec start_link(atom(), any()) -> {ok, pid()} | ignore | error(). +-spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). start_link(Name, Config) -> gen_server:start_link({local, Name}, ?MODULE, [Config], []). --spec send(kafkerl:server_ref(), basic_message(), kafkerl:options()) -> - ok | error(). +-spec send(kafkerl:server_ref(), kafkerl:basic_message(), kafkerl:options()) -> + ok | kafkerl:error(). send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), case ets_buffer:write(Buffer, Message) of @@ -69,13 +76,13 @@ send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> gen_server:call(ServerRef, {send, Message}) end. --spec fetch(kafkerl:server_ref(), topic(), partition(), kafkerl:options()) -> +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> ok. fetch(ServerRef, Topic, Partition, Options) -> gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). -spec get_partitions(kafkerl:server_ref()) -> - [{topic(), [partition()]}] | error(). + [{kafkerl:topic(), [kafkerl:partition()]}] | kafkerl:error(). get_partitions(ServerRef) -> case gen_server:call(ServerRef, {get_partitions}) of {ok, Mapping} -> @@ -84,13 +91,14 @@ get_partitions(ServerRef) -> Error end. --spec subscribe(kafkerl:server_ref(), callback()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok | kafkerl:error(). subscribe(ServerRef, Callback) -> subscribe(ServerRef, Callback, all). --spec subscribe(kafkerl:server_ref(), callback(), filters()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), kafkerl:callback(), filters()) -> + ok | kafkerl:error(). subscribe(ServerRef, Callback, Filter) -> gen_server:call(ServerRef, {subscribe, {Filter, Callback}}). --spec unsubscribe(kafkerl:server_ref(), callback()) -> ok. +-spec unsubscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok. unsubscribe(ServerRef, Callback) -> gen_server:call(ServerRef, {unsubscribe, Callback}). @@ -98,16 +106,20 @@ unsubscribe(ServerRef, Callback) -> request_metadata(ServerRef) -> gen_server:call(ServerRef, {request_metadata}). --spec request_metadata(kafkerl:server_ref(), [topic()] | boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()] | boolean()) -> + ok. request_metadata(ServerRef, TopicsOrForced) -> gen_server:call(ServerRef, {request_metadata, TopicsOrForced}). --spec request_metadata(kafkerl:server_ref(), [topic()], boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()], boolean()) -> + ok. request_metadata(ServerRef, Topics, Forced) -> gen_server:call(ServerRef, {request_metadata, Topics, Forced}). --spec produce_succeeded(kafkerl:server_ref(), - [{topic(), partition(), [binary()], integer()}]) -> ok. +-spec produce_succeeded(kafkerl:server_ref(), [{kafkerl:topic(), + kafkerl:partition(), + [binary()], + integer()}]) -> ok. produce_succeeded(ServerRef, Messages) -> gen_server:cast(ServerRef, {produce_succeeded, Messages}). @@ -511,7 +523,7 @@ make_metadata_request(State = #state{brokers = Brokers, spawn_monitor(?MODULE, do_request_metadata, Params). get_timestamp() -> - {A, B, C} = erlang:now(), + {A, B, C} = erlang:timestamp(), (A * 1000000 + B) * 1000 + C div 1000. %%============================================================================== diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 11b18a9..763d719 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -1,19 +1,89 @@ -module(kafkerl_protocol). -author('hernanrivasacosta@gmail.com'). --export([build_produce_request/4, build_fetch_request/5, +-export([build_produce_request/3, build_produce_request/4, + build_fetch_request/5, build_metadata_request/3]). -export([parse_correlation_id/1, parse_produce_response/1, parse_fetch_response/1, parse_fetch_response/2, parse_metadata_response/1]). --include("kafkerl.hrl"). +%% Common +-type error_code() :: -1..16. +-type correlation_id() :: non_neg_integer(). +-type broker_id() :: integer(). +-type broker() :: {broker_id(), kafkerl_connector:address()}. + +%% Requests +-type client_id() :: binary(). +-type merged_message() :: kafkerl:basic_message() | + {kafkerl:topic(), [{kafkerl:partition(), kafkerl:payload()}]} | + [merged_message()]. +-type fetch_offset() :: integer(). +-type fetch_max_bytes() :: integer(). +-type fetch_partition() :: {kafkerl:partition(), fetch_offset(), fetch_max_bytes()} | + [fetch_partition()]. +-type fetch_request() :: {kafkerl:topic(), fetch_partition()} | + [fetch_request()]. + +%% Responses +-type produce_partition() :: {kafkerl:partition(), error_code(), integer()}. +-type produce_topic() :: {kafkerl:topic(), [produce_partition()]}. +-type produce_response() :: {ok, correlation_id(), [produce_topic()]}. +-type replica() :: integer(). +-type isr() :: integer(). +-type partition_metadata() :: {error_code(), kafkerl:partition(), broker_id(), + [replica()], [isr()]}. +-type topic_metadata() :: {error_code(), kafkerl:topic(), [partition_metadata()]}. +-type metadata() :: {[broker()], [topic_metadata()]}. +-type metadata_response() :: {ok, correlation_id(), metadata()} | + kafkerl:error(). +-type messages() :: [{kafkerl:topic(), + [{{kafkerl:partition(), integer()}, + [binary() | {binary(), binary()}]}]}]. +-type fetch_state() :: {binary(), integer(), [any()]} | void. +-type fetch_response() :: {ok, integer(), messages()} | + {incomplete, integer(), messages(), fetch_state()} | + kafkerl:error(). + +% Compression +-define(COMPRESSION_NONE, none). +-define(COMPRESSION_GZIP, gzip). +-define(COMPRESSION_SNAPPY, snappy). +-define(KAFKERL_COMPRESSION_TYPES, [?COMPRESSION_NONE, + ?COMPRESSION_GZIP, + ?COMPRESSION_SNAPPY]). + +%% Configuration +-type compression() :: ?COMPRESSION_NONE | + ?COMPRESSION_GZIP | + ?COMPRESSION_SNAPPY. + +% API keys +-define(PRODUCE_KEY, 0). +-define(FETCH_KEY, 1). +-define(OFFSET_KEY, 2). +-define(METADATA_KEY, 3). + +% C style binary types +-define(SHORT, 16/signed-integer). +-define(INT, 32/signed-integer). +-define(UCHAR, 8/unsigned-integer). +-define(USHORT, 16/unsigned-integer). +-define(UINT, 32/unsigned-integer). +-define(ULONG, 64/unsigned-integer). + +% Type exports +-export_type([merged_message/0, client_id/0, correlation_id/0, fetch_state/0]). %%============================================================================== %% API %%============================================================================== % Message building +-spec build_produce_request(merged_message(), client_id(), correlation_id()) -> iodata(). +build_produce_request(Data, ClientId, CorrelationId) -> + build_produce_request(Data, ClientId, CorrelationId, ?COMPRESSION_NONE). -spec build_produce_request(merged_message(), client_id(), correlation_id(), compression()) -> iodata(). build_produce_request(Data, ClientId, CorrelationId, Compression) -> @@ -26,7 +96,8 @@ build_fetch_request(Data, ClientId, CorrelationId, MaxWait, MinBytes) -> {Size, Request} = build_fetch_request(Data, MaxWait, MinBytes), [build_request_header(ClientId, ?FETCH_KEY, CorrelationId, Size), Request]. --spec build_metadata_request(topic() | [topic()], correlation_id(), +-spec build_metadata_request(kafkerl:topic() | [kafkerl:topic()], + correlation_id(), client_id()) -> iodata(). build_metadata_request(Topics, CorrelationId, ClientId) -> {_Size, Request} = build_metadata_request(Topics), @@ -34,23 +105,23 @@ build_metadata_request(Topics, CorrelationId, ClientId) -> % Message parsing -spec parse_correlation_id(binary()) -> {ok, integer(), binary()}. -parse_correlation_id(<<_Size:32/unsigned-integer, - CorrelationId:32/unsigned-integer, +parse_correlation_id(<<_Size:?UINT, + CorrelationId:?UINT, Remainder/binary>>) -> {ok, CorrelationId, Remainder}. -spec parse_produce_response(binary()) -> produce_response(). -parse_produce_response(<<_Size:32/unsigned-integer, - CorrelationId:32/unsigned-integer, - TopicCount:32/unsigned-integer, +parse_produce_response(<<_Size:?UINT, + CorrelationId:?UINT, + TopicCount:?UINT, TopicsBin/binary>>) -> {ok, Topics} = parse_produced_topics(TopicCount, TopicsBin), {ok, CorrelationId, Topics}. -spec parse_fetch_response(binary()) -> fetch_response(). -parse_fetch_response(<<_Size:32/unsigned-integer, - CorrelationId:32/unsigned-integer, - TopicCount:32/unsigned-integer, +parse_fetch_response(<<_Size:?UINT, + CorrelationId:?UINT, + TopicCount:?UINT, TopicsBin/binary>>) -> case parse_topics(TopicCount, TopicsBin) of {ok, Topics} -> @@ -71,11 +142,11 @@ parse_fetch_response(Bin, {Remainder, CorrelationId, Steps}) -> parse_steps(NewBin, CorrelationId, Steps). -spec parse_metadata_response(binary()) -> metadata_response(). -parse_metadata_response(<>) -> case parse_brokers(BrokerCount, BrokersBin) of - {ok, Brokers, <>} -> + {ok, Brokers, <>} -> case parse_topic_metadata(TopicCount, TopicsBin) of {ok, Metadata} -> {ok, CorrelationId, {Brokers, Metadata}}; @@ -95,16 +166,16 @@ build_request_header(ClientId, ApiKey, CorrelationId) -> % Build the header (http://goo.gl/5SNNTV) ApiVersion = 0, % The version should be 0, it's not a placeholder ClientIdSize = byte_size(ClientId), - [<>, + [<>, ClientId]. build_request_header(ClientId, ApiKey, CorrelationId, RequestSize) -> % 10 is the size of the header MessageSize = byte_size(ClientId) + RequestSize + 10, - [<>, + [<>, build_request_header(ClientId, ApiKey, CorrelationId)]. %% PRODUCE REQUEST @@ -119,14 +190,14 @@ build_produce_request({Topic, Partition, Messages}, Compression) -> TopicSize = byte_size(Topic), {Size, MessageSet} = build_message_set(Messages, Compression), {Size + TopicSize + 24, - [<<-1:16/signed-integer, - -1:32/signed-integer, % Timeout - 1:32/unsigned-integer, % TopicCount - TopicSize:16/unsigned-integer>>, + [<<-1:?SHORT, + -1:?INT, % Timeout + 1:?UINT, % TopicCount + TopicSize:?USHORT>>, Topic, - <<1:32/unsigned-integer, % PartitionCount - Partition:32/unsigned-integer, - Size:32/unsigned-integer>>, + <<1:?UINT, % PartitionCount + Partition:?UINT, + Size:?UINT>>, MessageSet]}; build_produce_request(Data, Compression) -> % Build the body of the request with multiple topics/partitions @@ -135,9 +206,9 @@ build_produce_request(Data, Compression) -> {TopicsSize, Topics} = build_topics(Data, Compression), % 10 is the size of the header {TopicsSize + 10, - [<<-1:16/signed-integer, % RequiredAcks - -1:32/signed-integer, % Timeout - TopicCount:32/unsigned-integer>>, + [<<-1:?SHORT, % RequiredAcks + -1:?INT, % Timeout + TopicCount:?UINT>>, Topics]}. build_topics(Topics, Compression) -> @@ -157,9 +228,9 @@ build_topic({Topic, Partitions}, Compression) -> {Size, BuiltPartitions} = build_partitions(Partitions, Compression), % 6 is the size of both the partition count int and the topic size int {Size + TopicSize + 6, - [<>, + PartitionCount:?UINT>>, BuiltPartitions]}. build_partitions(Partitions, Compression) -> @@ -177,8 +248,8 @@ build_partition({Partition, Messages}, Compression) -> {Size, MessageSet} = build_message_set(Messages, Compression), % 8 is the size of the header, 4 bytes of the partition and 4 for the size {Size + 8, - [<>, + [<>, MessageSet]}. % Docs at http://goo.gl/4W7J0r @@ -207,18 +278,18 @@ build_message(Bin) -> Crc = erlang:crc32(Message), % 12 is the size of the offset plus the size int itself {Size + 12, - [<>, + [<>, Message]}. get_message_header(MessageSize, Compression) -> MagicByte = 0, % Version id Attributes = compression_to_int(Compression), - <>. + <>. compression_to_int(?COMPRESSION_NONE) -> 0; compression_to_int(?COMPRESSION_GZIP) -> 1; @@ -241,26 +312,26 @@ build_fetch_request({Topic, {Partition, Offset, MaxBytes}}, MaxWait, MinBytes) -> TopicSize = byte_size(Topic), {TopicSize + 38, - [<<-1:32/signed-integer, % ReplicaId - MaxWait:32/unsigned-integer, - MinBytes:32/unsigned-integer, - 1:32/unsigned-integer, % TopicCount - TopicSize:16/unsigned-integer>>, + [<<-1:?INT, % ReplicaId + MaxWait:?UINT, + MinBytes:?UINT, + 1:?UINT, % TopicCount + TopicSize:?USHORT>>, Topic, - <<1:32/unsigned-integer, % PartitionCount - Partition:32/unsigned-integer, - Offset:64/unsigned-integer, - MaxBytes:32/unsigned-integer>>]}; + <<1:?UINT, % PartitionCount + Partition:?UINT, + Offset:?ULONG, + MaxBytes:?UINT>>]}; build_fetch_request(Data, MaxWait, MinBytes) -> ReplicaId = -1, % This should always be -1 TopicCount = length(Data), {TopicSize, Topics} = build_fetch_topics(Data), % 16 is the size of the header {TopicSize + 16, - [<>, + [<>, Topics]}. build_fetch_topics(Topics) -> @@ -280,9 +351,9 @@ build_fetch_topic({Topic, Partitions}) -> {Size, BuiltPartitions} = build_fetch_partitions(Partitions), % 6 is the size of the topicSize's 16 bytes + 32 from the partition count {Size + TopicSize + 6, - [<>, + PartitionCount:?UINT>>, BuiltPartitions]}. build_fetch_partitions(Partitions) -> @@ -296,20 +367,20 @@ build_fetch_partitions([H | T] = _Partitions, {OldSize, IOList}) -> build_fetch_partition({Partition, Offset, MaxBytes}) -> {16, - <>}. + <>}. build_metadata_request([]) -> % Builds an empty metadata request that returns all topics and partitions - {4, <<0:32/unsigned-integer>>}; + {4, <<0:?UINT>>}; build_metadata_request(Topic) when is_binary(Topic) -> build_metadata_request([Topic]); build_metadata_request(Topics) -> TopicCount = length(Topics), {Size, BuiltTopics} = build_metadata_topics(Topics), {Size + 4, - [<>, + [<>, BuiltTopics]}. build_metadata_topics(Topics) -> @@ -319,7 +390,7 @@ build_metadata_topics([] = _Topics, {Size, IOList}) -> {Size, lists:reverse(IOList)}; build_metadata_topics([H | T] = _Partitions, {OldSize, IOList}) -> Size = byte_size(H), - Topic = [<>, H], + Topic = [<>, H], build_metadata_topics(T, {OldSize + Size + 2, [Topic | IOList]}). %%============================================================================== @@ -334,9 +405,9 @@ parse_produced_topics(Count, <<>>, Acc) when Count =< 0 -> parse_produced_topics(Count, Bin, Acc) when Count =< 0 -> lager:warning("Finished parsing produce response, ignoring bytes: ~p", [Bin]), {ok, lists:reverse(Acc)}; -parse_produced_topics(Count, <>, Acc) -> {ok, Partitions, Remainder} = parse_produced_partitions(PartitionCount, PartitionsBin), @@ -347,9 +418,9 @@ parse_produced_partitions(Count, Bin) -> parse_produced_partitions(Count, Bin, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Bin}; -parse_produced_partitions(Count, <>, Acc) -> PartitionData = {Partition, ErrorCode, Offset}, parse_produced_partitions(Count - 1, Remainder, [PartitionData | Acc]). @@ -377,9 +448,9 @@ parse_topics(Count, Bin, Acc) -> Error end. -parse_topic(<>) -> case parse_partitions(PartitionCount, PartitionsBin) of {ok, Partitions, Remainder} -> @@ -413,10 +484,10 @@ parse_partitions(Count, Bin, Acc) -> Error end. -parse_partition(<>) -> Partition = {PartitionId, HighwaterMarkOffset}, case parse_message_set(MessageSetSize, MessageSetBin) of @@ -426,8 +497,8 @@ parse_partition(<>) -> kafkerl_error:get_error_tuple(ErrorCode); parse_partition(<<>>) -> @@ -446,21 +517,21 @@ parse_message_set(RemainingSize, Bin, Acc) -> {incomplete, lists:reverse(Acc), {Bin, [{message_set, RemainingSize}]}} end. -parse_message(<<_Offset:64/unsigned-integer, - MessageSize:32/signed-integer, +parse_message(<<_Offset:?ULONG, + MessageSize:?INT, Message:MessageSize/binary, Remainder/binary>>) -> - <<_Crc:32/unsigned-integer, - _MagicByte:8/unsigned-integer, - _Attributes:8/unsigned-integer, + <<_Crc:?UINT, + _MagicByte:?UCHAR, + _Attributes:?UCHAR, KeyValue/binary>> = Message, KV = case KeyValue of - <> -> + <> -> {Key, Value}; % 4294967295 is -1 and it signifies an empty Key http://goo.gl/Ssl4wq - <<4294967295:32/unsigned-integer, - ValueSize:32/unsigned-integer, Value:ValueSize/binary>> -> + <<4294967295:?UINT, + ValueSize:?UINT, Value:ValueSize/binary>> -> Value end, % 12 is the size of the offset plus the size of the MessageSize int @@ -474,10 +545,10 @@ parse_brokers(Count, Bin) -> parse_brokers(Count, Bin, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Bin}; -parse_brokers(Count, <>, Acc) -> HostStr = binary_to_list(Host), parse_brokers(Count - 1, Remainder, [{Id, {HostStr, Port}} | Acc]). @@ -490,18 +561,18 @@ parse_topic_metadata(Count, <<>>, Acc) when Count =< 0 -> parse_topic_metadata(Count, Bin, Acc) when Count =< 0 -> lager:warning("Finished parsing topic metadata, ignoring bytes: ~p", [Bin]), {ok, lists:reverse(Acc)}; -parse_topic_metadata(Count, <<0:16/signed-integer, - TopicSize:16/unsigned-integer, +parse_topic_metadata(Count, <<0:?SHORT, + TopicSize:?USHORT, TopicName:TopicSize/binary, - PartitionCount:32/unsigned-integer, + PartitionCount:?UINT, PartitionsBin/binary>>, Acc) -> {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, PartitionsBin), TopicMetadata = {0, TopicName, PartitionsMetadata}, parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]); -parse_topic_metadata(Count, <>, Acc) -> {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(0, Remainder), TopicMetadata = {ErrorCode, <<"unkown">>, PartitionsMetadata}, @@ -512,13 +583,13 @@ parse_partition_metadata(Count, Bin) -> parse_partition_metadata(Count, Remainder, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Remainder}; -parse_partition_metadata(Count, <>, Acc) -> {ok, Replicas, Remainder} = parse_replica_metadata(ReplicaCount, ReplicasBin), - <> = Remainder, + <> = Remainder, {ok, Isr, IsrRemainder} = parse_isr_metadata(IsrCount, IsrBin), PartitionMetadata = {ErrorCode, Partition, Leader, Replicas, Isr}, parse_partition_metadata(Count - 1, IsrRemainder, [PartitionMetadata | Acc]). @@ -528,7 +599,7 @@ parse_replica_metadata(Count, Bin) -> parse_replica_metadata(Count, Remainder, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Remainder}; -parse_replica_metadata(Count, <>, Acc) -> parse_replica_metadata(Count - 1, Remainder, [Replica | Acc]). @@ -537,7 +608,7 @@ parse_isr_metadata(Count, Bin) -> parse_isr_metadata(Count, Remainder, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Remainder}; -parse_isr_metadata(Count, <>, Acc) -> parse_isr_metadata(Count - 1, Remainder, [Isr | Acc]). diff --git a/src/kafkerl_sup.erl b/src/kafkerl_sup.erl index b720d83..fd989b4 100644 --- a/src/kafkerl_sup.erl +++ b/src/kafkerl_sup.erl @@ -5,8 +5,6 @@ -export([start_link/0, init/1]). --include("kafkerl.hrl"). - -define(SERVER, ?MODULE). -type restart_strategy() :: {supervisor:strategy(), diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index b7d8f70..6b7b515 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -7,17 +7,14 @@ -export([buffer_name/2]). -export([gather_consume_responses/0, gather_consume_responses/1]). --include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). - %%============================================================================== %% API %%============================================================================== --spec send_error(callback(), any()) -> ok. +-spec send_error(kafkerl:callback(), any()) -> ok. send_error(Callback, Reason) -> send_event(Callback, {error, Reason}). --spec send_event(callback(), any()) -> ok | {error, {bad_callback, any()}}. +-spec send_event(kafkerl:callback(), any()) -> ok | {error, {bad_callback, any()}}. send_event({M, F}, Data) -> spawn(fun() -> M:F(Data) end), ok; @@ -33,18 +30,24 @@ send_event(Function, Data) when is_function(Function, 1) -> send_event(BadCallback, _Data) -> {error, {bad_callback, BadCallback}}. +default_tcp_options() -> + % This list has to be sorted + [{mode, binary}, {packet, 0}]. get_tcp_options(Options) -> % TODO: refactor - lists:ukeymerge(1, lists:sort(proplists:unfold(Options)), ?DEFAULT_TCP_OPTS). + UnfoldedOptions = proplists:unfold(Options), + lists:ukeymerge(1, lists:sort(UnfoldedOptions), default_tcp_options()). % This is rather costly, and for obvious reasons does not maintain the order of % the partitions or topics, but it does keep the order of the messages within a % specific topic-partition pair --spec merge_messages([basic_message()]) -> merged_message(). +-spec merge_messages([kafkerl_protocol:basic_message()]) -> + kafkerl_protocol:merged_message(). merge_messages(Topics) -> merge_topics(Topics). % Not as costly, but still avoid this in a place where performance is critical --spec split_messages(merged_message()) -> [basic_message()]. +-spec split_messages(kafkerl_protocol:merged_message()) -> + [kafkerl_protocol:basic_message()]. split_messages({Topic, {Partition, Messages}}) -> {Topic, Partition, Messages}; split_messages({Topic, Partitions}) -> @@ -64,7 +67,8 @@ valid_message(L) when is_list(L) -> valid_message(_Any) -> false. --spec buffer_name(topic(), partition()) -> atom(). +-spec buffer_name(kafkerl_protocol:topic(), kafkerl_protocol:partition()) -> + atom(). buffer_name(Topic, Partition) -> Bin = <>, binary_to_atom(Bin, utf8). From 45fc255d24bc47a58773c1a83e5f50e8ffcd9e7c Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Thu, 21 Jan 2016 14:40:39 -0300 Subject: [PATCH 24/72] minor spec fixes --- src/kafkerl_protocol.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 763d719..6c80855 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -41,10 +41,10 @@ kafkerl:error(). -type messages() :: [{kafkerl:topic(), [{{kafkerl:partition(), integer()}, - [binary() | {binary(), binary()}]}]}]. + [binary() | {binary(), binary()}]}]}]. -type fetch_state() :: {binary(), integer(), [any()]} | void. --type fetch_response() :: {ok, integer(), messages()} | - {incomplete, integer(), messages(), fetch_state()} | +-type fetch_response() :: {ok, correlation_id(), messages()} | + {incomplete, correlation_id(), messages(), fetch_state()} | kafkerl:error(). % Compression From 039d6eed5f650f012de9d7cc2927e32a9f060a77 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Mon, 25 Jan 2016 00:26:26 -0300 Subject: [PATCH 25/72] fixes and improvements --- src/kafkerl.erl | 15 +- src/kafkerl_broker_connection.erl | 234 +++++++++++++++++++----------- src/kafkerl_connector.erl | 135 ++++++++++------- src/kafkerl_protocol.erl | 15 +- src/kafkerl_utils.erl | 21 ++- 5 files changed, 262 insertions(+), 158 deletions(-) diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 32fb06e..ba92fc9 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -9,6 +9,8 @@ -export([version/0]). %% Types +-type offset() :: integer(). + -type callback() :: pid() | fun() | {atom(), atom()} | @@ -18,7 +20,7 @@ {consumer, callback()} | {min_bytes, integer()} | {max_wait, integer()} | - {offset, integer()} | + {offset, offset()} | {fetch_interval, false | integer()}. -type options() :: [option()]. -type server_ref() :: atom() | pid(). @@ -67,7 +69,7 @@ produce(ServerRef, Topic, Partition, Message, Options) -> consume(Topic, Partition) -> consume(?MODULE, Topic, Partition, []). --spec consume(topic(), partition(), options()) -> ok | [binary()] | error(); +-spec consume(topic(), partition(), options()) -> ok | error(); (server_ref(), topic(), partition()) -> ok | error(). consume(Topic, Partition, Options) when is_list(Options) -> consume(?MODULE, Topic, Partition, Options); @@ -75,13 +77,16 @@ consume(ServerRef, Topic, Partition) -> consume(ServerRef, Topic, Partition, []). -spec consume(server_ref(), topic(), partition(), options()) -> - ok | [binary()] | error(). + ok | {[payload()], offset()} | error(). consume(ServerRef, Topic, Partition, Options) -> - case lists:keyfind(consumer, 1, Options) of - false -> + case {proplists:get_value(consumer, Options, undefined), + proplists:get_value(fetch_interval, Options, false)} of + {undefined, false} -> NewOptions = [{consumer, self()} | Options], kafkerl_connector:fetch(ServerRef, Topic, Partition, NewOptions), kafkerl_utils:gather_consume_responses(); + {undefined, _} -> + {error, fetch_interval_specified_with_no_consumer}; _ -> kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) end. diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 0cccbfb..f9bb48a 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -25,23 +25,27 @@ options = undefined :: kafkerl:options(), state = void :: kafkerl_protocol:fetch_state()}). --record(state, {name = undefined :: atom(), - buffers = [] :: [atom()], - conn_idx = undefined :: conn_idx(), - client_id = undefined :: binary(), - socket = undefined :: port(), - address = undefined :: kafkerl_connector:address(), - connector = undefined :: pid(), - tref = undefined :: any(), - tcp_options = [] :: [any()], - max_retries = 0 :: integer(), - retry_interval = 0 :: integer(), - request_number = 0 :: integer(), - pending_requests = [] :: [integer()], - max_time_queued = 0 :: integer(), - ets = undefined :: atom(), - fetches = [] :: [#fetch{}], - current_fetch = void :: void | kafkerl_protocol:correlation_id()}). +-record(state, {name = undefined :: atom(), + buffers = [] :: [atom()], + conn_idx = undefined :: conn_idx(), + client_id = undefined :: binary(), + socket = undefined :: port(), + address = undefined :: kafkerl_connector:address(), + connector = undefined :: pid(), + tref = undefined :: any(), + tcp_options = [] :: [any()], + max_retries = 0 :: integer(), + retry_interval = 0 :: integer(), + request_number = 0 :: integer(), + pending_requests = [] :: [integer()], + max_time_queued = 0 :: integer(), + ets = undefined :: atom(), + fetches = [] :: [#fetch{}], + current_fetch = void :: kafkerl_protocol:correlation_id() | + void, + scheduled_fetches = [] :: [{{kafkerl:topic(), + kafkerl:partition()}, + timer:tref()}]}). -type state() :: #state{}. %%============================================================================== @@ -68,11 +72,13 @@ add_buffer(ServerRef, Buffer) -> clear_buffers(ServerRef) -> gen_server:call(ServerRef, {clear_buffers}). --spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> ok. +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), + kafkerl:options()) -> ok | kafkerl:error(). fetch(ServerRef, Topic, Partition, Options) -> gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). --spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> ok. +-spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> + ok. stop_fetch(ServerRef, Topic, Partition) -> gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). @@ -108,7 +114,7 @@ handle_info({flush, Time}, State) -> {ok, _Tref} = queue_flush(Time), handle_flush(State); handle_info(Msg, State = #state{name = Name}) -> - _ = lager:notice("~p received unexpected info message: ~p on ~p", [Name, Msg]), + _ = lager:notice("~p got unexpected info message: ~p on ~p", [Name, Msg]), {noreply, State}. % Boilerplate @@ -143,9 +149,9 @@ init([Id, Connector, Address, Config, Name]) -> {ok, _Tref} = queue_flush(MaxTimeQueued), {ok, State}; {errors, Errors} -> - lists:foreach(fun(E) -> - _ = lager:critical("broker connection config error ~p", [E]) - end, Errors), + ok = lists:foreach(fun(E) -> + _ = lager:critical("configuration error: ~p", [E]) + end, Errors), {stop, bad_config} end. @@ -184,54 +190,89 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, handle_fetch(ServerRef, Topic, Partition, Options, State = #state{fetches = Fetches, client_id = ClientId, - socket = Socket, name = Name}) -> - {ok, CorrelationId, NewState} = build_correlation_id(State), - Offset = proplists:get_value(offset, Options, 0), - Request = {Topic, {Partition, Offset, 2147483647}}, - MaxWait = proplists:get_value(max_wait, Options), - MinBytes = proplists:get_value(min_bytes, Options), - Payload = kafkerl_protocol:build_fetch_request(Request, - ClientId, - CorrelationId, - MaxWait, - MinBytes), - case gen_tcp:send(Socket, Payload) of - {error, Reason} -> - _ = lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - ok = gen_tcp:close(Socket), - {reply, {error, no_connection}, handle_tcp_close(State)}; - ok -> - _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), - NewFetch = #fetch{correlation_id = CorrelationId, - server_ref = ServerRef, - topic = Topic, - partition = Partition, - options = Options}, - {reply, ok, NewState#state{fetches = [NewFetch | Fetches]}} - end; -handle_fetch(_ServerRef, _Topic, _Partition, _Options, State) -> - {reply, {error, fetch_in_progress}, State}. + socket = Socket, name = Name, + scheduled_fetches = ScheduledFetches}) -> + Scheduled = proplists:get_bool(scheduled, Options), + case {get_fetch(Topic, Partition, Fetches), + lists:keytake({Topic, Partition}, 1, ScheduledFetches), + Scheduled} of + % An scheduled fetch we can't identify? We ignore it + {_, false, true} -> + lager:warning("ignoring unknown scheduled fetch"), + {reply, ok, State}; + % We are already fetching that topic/partition pair + {#fetch{}, _, false} -> + {reply, {error, fetch_in_progress}, State}; + % We have a scheduled fetch for that topic/partition pair and this is not an + % scheduled fetch + {not_found, Tuple, false} when is_tuple(Tuple) -> + {reply, {error, fetch_in_progress}, State}; + % We have a valid fetch request! + {not_found, KeyTakeResult, Scheduled} -> + {ok, CorrelationId, NewState} = build_correlation_id(State), + Offset = proplists:get_value(offset, Options, 0), + Request = {Topic, {Partition, Offset, 2147483647}}, + MaxWait = proplists:get_value(max_wait, Options), + MinBytes = proplists:get_value(min_bytes, Options), + Payload = kafkerl_protocol:build_fetch_request(Request, + ClientId, + CorrelationId, + MaxWait, + MinBytes), + case gen_tcp:send(Socket, Payload) of + {error, Reason} -> + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + ok = gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(State)}; + ok -> + _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), + NewFetch = #fetch{correlation_id = CorrelationId, + server_ref = ServerRef, + topic = Topic, + partition = Partition, + options = Options}, + NewScheduledFetches = case KeyTakeResult of + false -> ScheduledFetches; + {_, _, List} -> List + end, + {reply, ok, NewState#state{fetches = [NewFetch | Fetches], + scheduled_fetches = NewScheduledFetches}} + end + end. handle_stop_fetch(Topic, Partition, State) -> - % Leave current fetch as it is - NewFetches = remove_fetch(Topic, Partition, State#state.fetches), - {reply, ok, State#state{fetches = NewFetches}}. + % Cancel any timers we have for scheduled fetches + case lists:keytake({Topic, Partition}, 1, State#state.scheduled_fetches) of + false -> + NewFetches = remove_fetch(Topic, Partition, false, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches}}; + {value, {{Topic, Partition}, TRef}, NewScheduledFetches} -> + _ = timer:cancel(TRef), + NewFetches = remove_fetch(Topic, Partition, force, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches, + scheduled_fetches = NewScheduledFetches}} + end. -remove_fetch(Topic, Partition, CurrentFetches) -> - remove_fetch(Topic, Partition, CurrentFetches, []). -remove_fetch(_Topic, _Partition, [], Acc) -> +remove_fetch(Topic, Partition, Force, CurrentFetches) -> + remove_fetch(Topic, Partition, Force, CurrentFetches, []). + +remove_fetch(_Topic, _Partition, _Force, [], Acc) -> Acc; -remove_fetch(Topic, Partition, +remove_fetch(Topic, Partition, force, + [#fetch{topic = Topic, partition = Partition} | T], Acc) -> + % If we are forcing the removal, just remove the fetch + Acc ++ T; +remove_fetch(Topic, Partition, _, [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> % Clearing the fetch options ensures this fetch will stop sending any messages % since there is no consumer. This also removes the fetch_interval so it won't % be requested again. % Simply removing the fetch here doesn't work since we will still get a server - % response, but we will not be able to properly handle it. + % response, but we won't be able to handle it. [Fetch#fetch{options = []} | Acc] ++ T; -remove_fetch(Topic, Partition, [H | T], Acc) -> - remove_fetch(Topic, Partition, T, [H | Acc]). +remove_fetch(Topic, Partition, Force, [H | T], Acc) -> + remove_fetch(Topic, Partition, Force, T, [H | Acc]). % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, @@ -253,8 +294,9 @@ handle_tcp_data(Bin, State = #state{fetches = Fetches, handle_produce_response(Bin, State) end. -handle_fetch_response(Bin, Fetch, State = #state{name = Name, - fetches = Fetches}) -> +handle_fetch_response(Bin, Fetch, + State = #state{fetches = Fetches, + scheduled_fetches = ScheduledFetches}) -> Options = Fetch#fetch.options, Consumer = proplists:get_value(consumer, Options), case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of @@ -266,24 +308,29 @@ handle_fetch_response(Bin, Fetch, State = #state{name = Name, [] -> []; _ -> [{consumed, Messages}, {offset, Offset}] end), + NewFetches = lists:delete(Fetch, Fetches), + NewState = State#state{current_fetch = void, fetches = NewFetches}, case proplists:get_value(fetch_interval, Options, false) of - false -> {ok, State#state{current_fetch = void}}; - Interval -> - NewOptions = lists:keyreplace(offset, 1, Options, {offset, Offset}), - Arguments = [Fetch#fetch.server_ref, Fetch#fetch.topic, - Fetch#fetch.partition, NewOptions], - _ = timer:apply_after(Interval, ?MODULE, fetch, Arguments), - {ok, State#state{current_fetch = void, - fetches = lists:delete(Fetch, Fetches)}} + false -> + {ok, NewState}; + Interval -> + NewOptions = kafkerl_utils:proplists_set(Options, [{scheduled, true}, + {offset, Offset}]), + Topic = Fetch#fetch.topic, + Partition = Fetch#fetch.partition, + ServerRef = Fetch#fetch.server_ref, + Arguments = [ServerRef, Topic, Partition, NewOptions], + {ok, Tref} = timer:apply_after(Interval, ?MODULE, fetch, Arguments), + NewScheduledFetches = [{{Topic, Partition}, Tref} | ScheduledFetches], + {ok, NewState#state{scheduled_fetches = NewScheduledFetches}} end; {incomplete, CorrelationId, Data, NewFetchState} -> ok = case Data of - [{_, [{_, Messages}]}] -> + [{_, [{_, Messages = [_ | _]}]}] -> send_messages(Consumer, {consumed, Messages}); _ -> % On some cases, kafka will return an incomplete response with no - % messages, in this case since we don't have anything to send, we - % just need to update the fetch state. + % messages, but we shouldn't send the empty message list. ok end, {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | @@ -291,7 +338,8 @@ handle_fetch_response(Bin, Fetch, State = #state{name = Name, current_fetch = CorrelationId}}; Error -> ok = send_messages(Consumer, Error), - {ok, State#state{current_fetch = void, fetches = lists:delete(Fetch, Fetches)}} + NewFetches = lists:delete(Fetch, Fetches), + {ok, State#state{current_fetch = void, fetches = NewFetches}} end. handle_produce_response(Bin, State = #state{connector = Connector, name = Name, @@ -303,9 +351,10 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, ets:delete(EtsName, CorrelationId), {Errors, Successes} = split_errors_and_successes(Topics), % First, send the offsets and messages that were delivered - spawn(fun() -> - notify_success_to_connector(Successes, Messages, Connector) - end), + _ = spawn(fun() -> + notify_success(Successes, Messages, Connector) + end), + % Then handle the errors case handle_errors(Errors, Messages, Name) of ignore -> {ok, State}; @@ -315,8 +364,7 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, {ok, State} end; _ -> - _ = lager:warning("~p was unable to properly process produce response", - [Name]), + _ = lager:warning("~p was unable to get produce response", [Name]), {error, invalid_produce_response} end; Other -> @@ -332,14 +380,14 @@ resend_messages(Messages, Connector) -> F = fun(M) -> kafkerl_connector:send(Connector, M, []) end, lists:foreach(F, Messages). -notify_success_to_connector([], _Messages, _Pid) -> +notify_success([], _Messages, _Pid) -> ok; -notify_success_to_connector([{Topic, Partition, Offset} | T], Messages, Pid) -> +notify_success([{Topic, Partition, Offset} | T], Messages, Pid) -> MergedMessages = kafkerl_utils:merge_messages(Messages), Partitions = partitions_in_topic(Topic, MergedMessages), M = messages_in_partition(Partition, Partitions), kafkerl_connector:produce_succeeded(Pid, {Topic, Partition, M, Offset}), - notify_success_to_connector(T, Messages, Pid). + notify_success(T, Messages, Pid). partitions_in_topic(Topic, Messages) -> lists:flatten([P || {T, P} <- Messages, T =:= Topic]). @@ -389,8 +437,9 @@ handle_error({Topic, Partition, Error}, Messages, Name) Message -> {true, Message} end; handle_error({Topic, Partition, Error}, _Messages, Name) -> + ErrorName = kafkerl_error:get_error_name(Error), _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", - [Name, kafkerl_error:get_error_name(Error), Topic, Partition]), + [Name, ErrorName, Topic, Partition]), false. get_message_for_error(Topic, Partition, SavedMessages, Name) -> @@ -422,7 +471,8 @@ connect(Pid, Name, TCPOpts, {Host, Port} = Address, Timeout, Retries) -> Pid ! {connected, Socket}; {error, Reason} -> NewRetries = Retries - 1, - _ = lager:warning("~p can't connect to ~p:~p. Reason: ~p, ~p retries left", + _ = lager:warning("~p unable to connect to ~p:~p. Reason: ~p + (~p retries left)", [Name, Host, Port, Reason, NewRetries]), timer:sleep(Timeout), connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) @@ -457,11 +507,19 @@ parse_correlation_id(Bin, CorrelationId) -> get_fetch(_CorrelationId, []) -> not_found; -get_fetch(CorrelationId, [Fetch = #fetch{correlation_id = CorrelationId} | _T]) -> - Fetch; -get_fetch(CorrelationId, [_ | T]) -> +get_fetch(CorrelationId, [H = #fetch{correlation_id = CorrelationId} | _T]) -> + H; +get_fetch(CorrelationId, [_H | T]) -> get_fetch(CorrelationId, T). +get_fetch(_Topic, _Partition, []) -> + not_found; +get_fetch(Topic, Partition, [H = #fetch{topic = Topic, + partition = Partition} | _T]) -> + H; +get_fetch(Topic, Partition, [_H | T]) -> + get_fetch(Topic, Partition, T). + send_messages(_Consumer, []) -> ok; send_messages(Consumer, [Event | T]) -> diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 089818e..e15e23b 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -10,7 +10,7 @@ % Produce -export([send/3]). % Consume --export([fetch/4]). +-export([fetch/4, stop_fetch/3]). % Common -export([subscribe/2, subscribe/3, unsubscribe/2]). % Only for internal use @@ -34,20 +34,20 @@ -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. --record(state, {brokers = [] :: [address()], - broker_mapping = void :: [broker_mapping()] | void, - client_id = <<>> :: kafkerl_protocol:client_id(), - max_metadata_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), - config = [] :: {atom(), any()}, - autocreate_topics = false :: boolean(), - callbacks = [] :: [{filters(), kafkerl:callback()}], - known_topics = [] :: [binary()], - pending = [] :: [kafkerl:basic_message()], - last_metadata_request = 0 :: integer(), - metadata_request_cd = 0 :: integer(), - last_dump_name = {"", 0} :: {string(), integer()}, - default_fetch_options = [] :: kafkerl:options()}). +-record(state, {brokers = [] :: [address()], + broker_mapping = void :: [broker_mapping()] | void, + client_id = <<>> :: kafkerl_protocol:client_id(), + max_metadata_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + config = [] :: {atom(), any()}, + autocreate_topics = false :: boolean(), + callbacks = [] :: [{filters(), kafkerl:callback()}], + known_topics = [] :: [binary()], + pending = [] :: [kafkerl:basic_message()], + last_metadata_request = 0 :: integer(), + metadata_request_cd = 0 :: integer(), + last_dump_name = {"", 0} :: {string(), integer()}, + default_fetch_options = [] :: kafkerl:options()}). -type state() :: #state{}. -export_type([address/0]). @@ -72,15 +72,20 @@ send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> ok end; Error -> - lager:debug("unable to send message to ~p, reason: ~p", [Buffer, Error]), + _ = lager:debug("unable to write on ~p, reason: ~p", [Buffer, Error]), gen_server:call(ServerRef, {send, Message}) end. --spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> - ok. +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), + kafkerl:options()) -> ok | kafkerl:error(). fetch(ServerRef, Topic, Partition, Options) -> gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). +-spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> + ok. +stop_fetch(ServerRef, Topic, Partition) -> + gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). + -spec get_partitions(kafkerl:server_ref()) -> [{kafkerl:topic(), [kafkerl:partition()]}] | kafkerl:error(). get_partitions(ServerRef) -> @@ -91,7 +96,8 @@ get_partitions(ServerRef) -> Error end. --spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok | kafkerl:error(). +-spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> + ok | kafkerl:error(). subscribe(ServerRef, Callback) -> subscribe(ServerRef, Callback, all). -spec subscribe(kafkerl:server_ref(), kafkerl:callback(), filters()) -> @@ -132,15 +138,17 @@ handle_call({dump_buffer_to_disk, Buffer, Options}, _From, State) -> {DumpNameStr, _} = DumpName = get_ets_dump_name(State#state.last_dump_name), AllMessages = ets_buffer:read_all(Buffer), FilePath = proplists:get_value(dump_location, Options, "") ++ DumpNameStr, - ok = case file:write_file(FilePath, term_to_binary(AllMessages)) of - ok -> lager:debug("Dumped unsent messages at ~p", [FilePath]); - Error -> lager:critical("Unable to save messages, reason: ~p", [Error]) - end, + _ = case file:write_file(FilePath, term_to_binary(AllMessages)) of + ok -> lager:debug("Dumped unsent messages at ~p", [FilePath]); + Error -> lager:critical("Unable to save messages, reason: ~p", [Error]) + end, {reply, ok, State#state{last_dump_name = DumpName}}; handle_call({send, Message}, _From, State) -> handle_send(Message, State); handle_call({fetch, Topic, Partition, Options}, _From, State) -> {reply, handle_fetch(Topic, Partition, Options, State), State}; +handle_call({stop_fetch, Topic, Partition}, _From, State) -> + {reply, handle_stop_fetch(Topic, Partition, State), State}; handle_call({request_metadata}, _From, State) -> {reply, ok, handle_request_metadata(State, [])}; handle_call({request_metadata, Forced}, _From, State) when is_boolean(Forced) -> @@ -170,7 +178,7 @@ handle_info({metadata_updated, []}, State) -> handle_info({metadata_updated, Mapping}, State) -> % Create the topic mapping (this also starts the broker connections) NewBrokerMapping = get_broker_mapping(Mapping, State), - lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), + _ = lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), % Get the partition data to send to the subscribers and send it PartitionData = get_partitions_from_mapping(NewBrokerMapping), Callbacks = State#state.callbacks, @@ -178,7 +186,7 @@ handle_info({metadata_updated, Mapping}, State) -> % Add to the list of known topics NewTopics = lists:sort([T || {T, _P} <- PartitionData]), NewKnownTopics = lists:umerge(NewTopics, State#state.known_topics), - lager:debug("Known topics: ~p", [NewKnownTopics]), + _ = lager:debug("Known topics: ~p", [NewKnownTopics]), % Reverse the pending messages and try to send them again RPending = lists:reverse(State#state.pending), ok = lists:foreach(fun(P) -> send(self(), P, []) end, RPending), @@ -189,11 +197,11 @@ handle_info({'DOWN', Ref, process, _, normal}, State) -> true = demonitor(Ref), {noreply, State}; handle_info({'DOWN', Ref, process, _, Reason}, State) -> - lager:error("metadata request failed, reason: ~p", [Reason]), + _ = lager:error("metadata request failed, reason: ~p", [Reason]), true = demonitor(Ref), {noreply, handle_request_metadata(State, [], true)}; handle_info(Msg, State) -> - lager:notice("Unexpected info message received: ~p on ~p", [Msg, State]), + _ = lager:notice("Unexpected info message received: ~p on ~p", [Msg, State]), {noreply, State}. -spec handle_cast(any(), state()) -> {noreply, state()}. @@ -238,44 +246,51 @@ init([Config]) -> {ok, State}; {errors, Errors} -> lists:foreach(fun(E) -> - lager:critical("Connector config error ~p", [E]) + _ = lager:critical("Connector config error ~p", [E]) end, Errors), {stop, bad_config} end. handle_send(Message, State = #state{autocreate_topics = false}) -> + lager:critical("a.1 ~p", [Message]), % The topic didn't exist, ignore {Topic, _Partition, Payload} = Message, - lager:error("Dropping ~p sent to non existing topic ~p", [Payload, Topic]), - {reply, ok, State}; + _ = lager:error("Dropped ~p sent to non existing topic ~p", [Payload, Topic]), + {reply, {error, non_existing_topic}, State}; handle_send(Message, State = #state{broker_mapping = void, pending = Pending}) -> - % Maybe have a new buffer + lager:critical("b.1 ~p", [Message]), + % We should consider saving this to a new buffer instead of using the state. {reply, ok, State#state{pending = [Message | Pending]}}; handle_send(Message, State = #state{broker_mapping = Mapping, pending = Pending, known_topics = KnownTopics}) -> + lager:critical("c.1 ~p", [Message]), {Topic, Partition, Payload} = Message, case lists:any(fun({K, _}) -> K =:= {Topic, Partition} end, Mapping) of true -> - % We need to check if the topic/partition pair exists, this is because the - % ets takes some time to start, so some messages could be lost. - % Therefore if we have the topic/partition, just send it again (the order - % will suffer though) - send(self(), Message, []), + % The ets takes some time to be available after being created, so we check + % if the topic/partition pair is in the mapping and if it does, we know we + % just need to send it again. The order is not guaranteed in this case, so + % if that's a concern, don't rely on autocreate_topics (besides, don't use + % autocreate_topics on production since it opens another can of worms). + ok = send(self(), Message, []), {reply, ok, State}; false -> - % Now, if the topic/partition was not valid, we need to check if the topic - % exists, if it does, just drop the message as we can assume no partitions - % are created. + % However, if the topic/partition pair does not exist, we need to check if + % the topic exists. If the topic exists, we drop the message because kafka + % can't add partitions on the fly. case lists:any(fun({{T, _}, _}) -> T =:= Topic end, Mapping) of true -> - lager:error("Dropping ~p sent to topic ~p, partition ~p", - [Payload, Topic, Partition]), - {reply, ok, State}; + _ = lager:error("Dropped ~p sent to topic ~p, partition ~p", + [Payload, Topic, Partition]), + {reply, {error, bad_partition}, State}; false -> NewKnownTopics = lists:umerge([Topic], KnownTopics), NewState = State#state{pending = [Message | Pending]}, - {reply, ok, handle_request_metadata(NewState, NewKnownTopics)} + lager:critical("X"), + R={reply, ok, handle_request_metadata(NewState, NewKnownTopics)}, + lager:critical("X2"), + R end end. @@ -290,6 +305,18 @@ handle_fetch(Topic, Partition, Options, State) -> kafkerl_broker_connection:fetch(Broker, Topic, Partition, NewOptions) end. +handle_stop_fetch(_Topic, _Partition, #state{broker_mapping = void}) -> + % Ignore, there's no fetch in progress + ok; +handle_stop_fetch(Topic, Partition, State) -> + case lists:keyfind({Topic, Partition}, 1, State#state.broker_mapping) of + false -> + % Ignore, there's no fetch in progress + ok; + {_, Broker} -> + kafkerl_broker_connection:stop_fetch(Broker, Topic, Partition) + end. + handle_get_partitions(#state{broker_mapping = void}) -> {error, not_available}; handle_get_partitions(#state{broker_mapping = Mapping}) -> @@ -355,7 +382,7 @@ do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> do_request_metadata([], _TCPOpts, _Request) -> {error, all_down}; do_request_metadata([{Host, Port} = _Broker | T], TCPOpts, Request) -> - lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), % Connect to the Broker case gen_tcp:connect(Host, Port, TCPOpts) of {error, Reason} -> @@ -435,12 +462,12 @@ expand_topic({?NO_ERROR, Topic, Partitions}) -> {true, {Topic, Partitions}}; expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> % Replica not available can be ignored, still, show a warning - lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), + _ = lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), {true, {Topic, Partitions}}; expand_topic({Error, Topic, _Partitions}) -> - lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), + _ = lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), {true, {Topic, []}}. expand_partitions(Metadata) -> @@ -453,13 +480,13 @@ expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, _, _} | T]}, Acc) -> - lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), + _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), ExpandedPartition = {{Topic, Partition}, Leader}, expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), + _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> @@ -530,5 +557,5 @@ get_timestamp() -> %% Error handling %%============================================================================== warn_metadata_request(Host, Port, Reason) -> - lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). + _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). \ No newline at end of file diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 6c80855..5d5d482 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -18,11 +18,13 @@ %% Requests -type client_id() :: binary(). -type merged_message() :: kafkerl:basic_message() | - {kafkerl:topic(), [{kafkerl:partition(), kafkerl:payload()}]} | + {kafkerl:topic(), + [{kafkerl:partition(), kafkerl:payload()}]} | [merged_message()]. -type fetch_offset() :: integer(). -type fetch_max_bytes() :: integer(). --type fetch_partition() :: {kafkerl:partition(), fetch_offset(), fetch_max_bytes()} | +-type fetch_partition() :: {kafkerl:partition(), fetch_offset(), + fetch_max_bytes()} | [fetch_partition()]. -type fetch_request() :: {kafkerl:topic(), fetch_partition()} | [fetch_request()]. @@ -35,7 +37,8 @@ -type isr() :: integer(). -type partition_metadata() :: {error_code(), kafkerl:partition(), broker_id(), [replica()], [isr()]}. --type topic_metadata() :: {error_code(), kafkerl:topic(), [partition_metadata()]}. +-type topic_metadata() :: {error_code(), kafkerl:topic(), + [partition_metadata()]}. -type metadata() :: {[broker()], [topic_metadata()]}. -type metadata_response() :: {ok, correlation_id(), metadata()} | kafkerl:error(). @@ -44,7 +47,8 @@ [binary() | {binary(), binary()}]}]}]. -type fetch_state() :: {binary(), integer(), [any()]} | void. -type fetch_response() :: {ok, correlation_id(), messages()} | - {incomplete, correlation_id(), messages(), fetch_state()} | + {incomplete, correlation_id(), messages(), + fetch_state()} | kafkerl:error(). % Compression @@ -81,7 +85,8 @@ %% API %%============================================================================== % Message building --spec build_produce_request(merged_message(), client_id(), correlation_id()) -> iodata(). +-spec build_produce_request(merged_message(), client_id(), correlation_id()) -> + iodata(). build_produce_request(Data, ClientId, CorrelationId) -> build_produce_request(Data, ClientId, CorrelationId, ?COMPRESSION_NONE). -spec build_produce_request(merged_message(), client_id(), correlation_id(), diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 6b7b515..8eac465 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -6,6 +6,7 @@ -export([merge_messages/1, split_messages/1, valid_message/1]). -export([buffer_name/2]). -export([gather_consume_responses/0, gather_consume_responses/1]). +-export([proplists_set/2]). %%============================================================================== %% API @@ -73,6 +74,17 @@ buffer_name(Topic, Partition) -> Bin = <>, binary_to_atom(Bin, utf8). +-type proplist_value() :: {atom(), any()}. +-type proplist() :: [proplist_value]. +-spec proplists_set(proplist(), proplist_value() | [proplist_value()]) -> + proplist(). +proplists_set(Proplist, {K, _V} = NewValue) -> + lists:keyreplace(K, 1, Proplist, NewValue); +proplists_set(Proplist, []) -> + Proplist; +proplists_set(Proplist, [H | T]) -> + proplists_set(proplists_set(Proplist, H), T). + %%============================================================================== %% Utils %%============================================================================== @@ -141,15 +153,12 @@ gather_consume_responses(Timeout) -> gather_consume_responses(Timeout, []). gather_consume_responses(Timeout, Acc) -> receive - {message_count, _} -> - % Ignore this one - gather_consume_responses(Timeout, Acc); {consumed, Messages} -> gather_consume_responses(Timeout, Acc ++ Messages); - {consume_done, Messages} -> - Acc ++ Messages; + {offset, Offset} -> + {Acc, Offset}; {error, _Reason} = Error -> Error after Timeout -> - {error, {no_response, Acc}} + [] end. \ No newline at end of file From 8729d9d20cf751a68e864d8269cae852fa02a591 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Mon, 25 Jan 2016 12:21:52 -0300 Subject: [PATCH 26/72] updated readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f870605..5e437ff 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -kafkerl v1.0.2 +kafkerl v2.0.0 ============== [![Gitter](https://badges.gitter.im/Join Chat.svg)](https://gitter.im/HernanRivasAcosta/kafkerl?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -Apache Kafka 0.8 high performance producer for erlang. -Developed thanks to the support and sponsorship of [TigerText](http://www.tigertext.com/). +Apache Kafka 0.8.2 high performance producer/consumer for erlang. +Developed thanks to the support and sponsorship of [TigerText](http://www.tigertext.com/) and [Inaka](https://github.com/inaka/). ##Features (aka, why kafkerl?) - Fast binary creation. @@ -12,12 +12,12 @@ Developed thanks to the support and sponsorship of [TigerText](http://www.tigert - Messages are not lost but cached before sending to kafka. - Handles server side errors and broker/leadership changes. - Flexible API allows consumer of messages to define pids, funs or M:F pairs as callbacks for the received messages. + - Simple yet flexible consumer API to retrieve the messages from Kafka. ##Missing features (aka, what I am working on but haven't finished yet) - - Though the library can parse kafka messages, the consumers are not implemented in this version. - There is no communication with Zookeeper. - Tests suites. -Special thanks to [@nitzanharel](https://github.com/nitzanharel) who found some really nasty bugs and helped me understand the subtleties of kafka's design and to the rest of the [TigerText](http://www.tigertext.com/) team for their support and code reviews. +Special thanks to [@nitzanharel](https://github.com/nitzanharel) who found some really nasty bugs and helped me understand the subtleties of kafka's design and to the rest of the [TigerText](http://www.tigertext.com/) and [Inaka](https://github.com/inaka/) teams for their support and code reviews. \ No newline at end of file From 8e6362460860bf0258ee6ae8ed08170e7646aef7 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 8 Apr 2016 11:30:17 -0300 Subject: [PATCH 27/72] fixed a parsing error for some metadata responses --- src/kafkerl_protocol.erl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 5d5d482..f05b396 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -566,21 +566,22 @@ parse_topic_metadata(Count, <<>>, Acc) when Count =< 0 -> parse_topic_metadata(Count, Bin, Acc) when Count =< 0 -> lager:warning("Finished parsing topic metadata, ignoring bytes: ~p", [Bin]), {ok, lists:reverse(Acc)}; -parse_topic_metadata(Count, <<0:?SHORT, - TopicSize:?USHORT, - TopicName:TopicSize/binary, +parse_topic_metadata(Count, <>, Acc) -> {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, PartitionsBin), - TopicMetadata = {0, TopicName, PartitionsMetadata}, + TopicMetadata = {ErrorCode, <<"unknown">>, PartitionsMetadata}, parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]); parse_topic_metadata(Count, <>, Acc) -> - {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(0, Remainder), - TopicMetadata = {ErrorCode, <<"unkown">>, PartitionsMetadata}, + TopicSize:?USHORT, + TopicName:TopicSize/binary, + PartitionCount:?UINT, + PartitionsBin/binary>>, Acc) -> + {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, + PartitionsBin), + TopicMetadata = {ErrorCode, TopicName, PartitionsMetadata}, parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]). parse_partition_metadata(Count, Bin) -> From 3388619ebb9d82feef030b72c8210fedc719508f Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Tue, 5 Apr 2016 16:45:30 -0300 Subject: [PATCH 28/72] Revamped the way kafkerl handles messages that need to be written to disk --- rel/kafkerl.app.config | 8 +- src/kafkerl.erl | 22 +--- src/kafkerl_connector.erl | 213 ++++++++++++++++++------------- src/kafkerl_metadata_handler.erl | 58 +++++++++ src/kafkerl_utils.erl | 6 +- 5 files changed, 196 insertions(+), 111 deletions(-) create mode 100644 src/kafkerl_metadata_handler.erl diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 2542515..241a2b1 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -10,11 +10,15 @@ {broker_tcp_timeout, 1000}, {max_metadata_retries, -1}, {assume_autocreate_topics, true}, - {metadata_tcp_timeout, 1000}, + {metadata_tcp_timeout, 5000}, {max_queue_size, 20}, % In items, per topic/partition {max_time_queued, 5}, % In seconds {metadata_request_cooldown, 1500}, % In milliseconds {consumer_min_bytes, 1}, - {consumer_max_wait, 1500}]}, + {consumer_max_wait, 1500}, + {dump_location, "dumps/"}, + {flush_to_disk_every, 20000}, % In milliseconds + {max_buffer_size, 5000}, + {save_messages_for_bad_topics, true}]}, {topics, [test1, test2, test3]}, {tests, [{kafka_installation, "~/kafka"}]}]}]. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index ba92fc9..27238e2 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -2,7 +2,7 @@ -author('hernanrivasacosta@gmail.com'). -export([start/0, start/2]). --export([produce/3, produce/4, produce/5, +-export([produce/3, consume/2, consume/3, consume/4, stop_consuming/2, stop_consuming/3, request_metadata/0, request_metadata/1, request_metadata/2, partitions/0, partitions/1]). @@ -16,7 +16,6 @@ {atom(), atom()} | {atom(), atom(), [any()]}. -type option() :: {buffer_size, integer() | infinity} | - {dump_location, string()} | {consumer, callback()} | {min_bytes, integer()} | {max_wait, integer()} | @@ -51,19 +50,8 @@ start(_StartType, _StartArgs) -> %% Produce API -spec produce(topic(), partition(), payload()) -> ok. produce(Topic, Partition, Message) -> - produce(?MODULE, Topic, Partition, Message, []). - --spec produce(server_ref(), topic(), partition(), payload()) -> ok; - (topic(), partition(), payload(), options()) -> ok. -produce(Topic, Partition, Message, Options) when is_list(Options) -> - produce(?MODULE, {Topic, Partition, Message}, Options); -produce(ServerRef, Topic, Partition, Message) -> - produce(ServerRef, {Topic, Partition, Message}, []). - --spec produce(server_ref(), topic(), partition(), payload(), options()) -> ok. -produce(ServerRef, Topic, Partition, Message, Options) -> - kafkerl_connector:send(ServerRef, {Topic, Partition, Message}, Options). - + kafkerl_connector:send({Topic, Partition, Message}). + %% Consume API -spec consume(topic(), partition()) -> ok | error(). consume(Topic, Partition) -> @@ -104,13 +92,13 @@ stop_consuming(ServerRef, Topic, Partition) -> request_metadata() -> request_metadata(?MODULE). --spec request_metadata(atom() | [topic()]) -> ok. +-spec request_metadata(server_ref() | [topic()]) -> ok. request_metadata(Topics) when is_list(Topics) -> request_metadata(?MODULE, Topics); request_metadata(ServerRef) -> kafkerl_connector:request_metadata(ServerRef). --spec request_metadata(atom(), [topic()]) -> ok. +-spec request_metadata(server_ref(), [topic()]) -> ok. request_metadata(ServerRef, Topics) -> kafkerl_connector:request_metadata(ServerRef, Topics). diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index e15e23b..0aeab59 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -8,7 +8,7 @@ -export([request_metadata/1, request_metadata/2, request_metadata/3, get_partitions/1]). % Produce --export([send/3]). +-export([send/1]). % Consume -export([fetch/4, stop_fetch/3]). % Common @@ -37,17 +37,16 @@ -record(state, {brokers = [] :: [address()], broker_mapping = void :: [broker_mapping()] | void, client_id = <<>> :: kafkerl_protocol:client_id(), - max_metadata_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), config = [] :: {atom(), any()}, autocreate_topics = false :: boolean(), callbacks = [] :: [{filters(), kafkerl:callback()}], known_topics = [] :: [binary()], - pending = [] :: [kafkerl:basic_message()], - last_metadata_request = 0 :: integer(), - metadata_request_cd = 0 :: integer(), last_dump_name = {"", 0} :: {string(), integer()}, - default_fetch_options = [] :: kafkerl:options()}). + default_fetch_options = [] :: kafkerl:options(), + dump_location = "" :: string(), + max_buffer_size = 0 :: integer(), + save_bad_messages = false :: boolean(), + metadata_handler = void :: atom()}). -type state() :: #state{}. -export_type([address/0]). @@ -57,23 +56,25 @@ %%============================================================================== -spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). start_link(Name, Config) -> - gen_server:start_link({local, Name}, ?MODULE, [Config], []). + gen_server:start_link({local, Name}, ?MODULE, [Config, Name], []). --spec send(kafkerl:server_ref(), kafkerl:basic_message(), kafkerl:options()) -> +-spec send(kafkerl:basic_message()) -> ok | kafkerl:error(). -send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> +send({Topic, Partition, _Payload} = Message) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), case ets_buffer:write(Buffer, Message) of NewSize when is_integer(NewSize) -> - case lists:keyfind(buffer_size, 1, Options) of - {buffer_size, MaxSize} when NewSize > MaxSize -> - gen_server:call(ServerRef, {dump_buffer_to_disk, Buffer, Options}); - _ -> - ok - end; + ok; Error -> _ = lager:debug("unable to write on ~p, reason: ~p", [Buffer, Error]), - gen_server:call(ServerRef, {send, Message}) + case ets_buffer:write(kafkerl_utils:default_buffer_name(), Message) of + NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> + ok; + _ -> + _ = lager:critical("unable to write to default buffer, reason: ~p", + [Error]), + ok + end end. -spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), @@ -134,17 +135,6 @@ produce_succeeded(ServerRef, Messages) -> %%============================================================================== -spec handle_call(any(), any(), state()) -> {reply, ok, state()} | {reply, {error, any()}, state()}. -handle_call({dump_buffer_to_disk, Buffer, Options}, _From, State) -> - {DumpNameStr, _} = DumpName = get_ets_dump_name(State#state.last_dump_name), - AllMessages = ets_buffer:read_all(Buffer), - FilePath = proplists:get_value(dump_location, Options, "") ++ DumpNameStr, - _ = case file:write_file(FilePath, term_to_binary(AllMessages)) of - ok -> lager:debug("Dumped unsent messages at ~p", [FilePath]); - Error -> lager:critical("Unable to save messages, reason: ~p", [Error]) - end, - {reply, ok, State#state{last_dump_name = DumpName}}; -handle_call({send, Message}, _From, State) -> - handle_send(Message, State); handle_call({fetch, Topic, Partition, Options}, _From, State) -> {reply, handle_fetch(Topic, Partition, Options, State), State}; handle_call({stop_fetch, Topic, Partition}, _From, State) -> @@ -170,6 +160,10 @@ handle_call({unsubscribe, Callback}, _From, State) -> NewCallbacks = lists:keydelete(Callback, 2, State#state.callbacks), {reply, ok, State#state{callbacks = NewCallbacks}}. +-spec handle_info(any(), state()) -> {noreply, state()} | + {stop, {error, any()}, state()}. +handle_info(dump_buffer_tick, State) -> + {noreply, handle_dump_buffer_to_disk(State)}; handle_info(metadata_timeout, State) -> {stop, {error, unable_to_retrieve_metadata}, State}; handle_info({metadata_updated, []}, State) -> @@ -187,10 +181,8 @@ handle_info({metadata_updated, Mapping}, State) -> NewTopics = lists:sort([T || {T, _P} <- PartitionData]), NewKnownTopics = lists:umerge(NewTopics, State#state.known_topics), _ = lager:debug("Known topics: ~p", [NewKnownTopics]), - % Reverse the pending messages and try to send them again - RPending = lists:reverse(State#state.pending), - ok = lists:foreach(fun(P) -> send(self(), P, []) end, RPending), - {noreply, State#state{broker_mapping = NewBrokerMapping, pending = [], + % TODO: Maybe retry from the dumps + {noreply, State#state{broker_mapping = NewBrokerMapping, callbacks = NewCallbacks, known_topics = NewKnownTopics}}; handle_info({'DOWN', Ref, process, _, normal}, State) -> @@ -219,81 +211,50 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %%============================================================================== %% Handlers %%============================================================================== -init([Config]) -> +init([Config, Name]) -> + % The schema indicates what is expected of the configuration, it validates and + % normalizes the configuration Schema = [{brokers, [{string, {integer, {1, 65535}}}], required}, - {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, {client_id, binary, {default, <<"kafkerl_client">>}}, {topics, [binary], required}, - {metadata_tcp_timeout, positive_integer, {default, 1500}}, {assume_autocreate_topics, boolean, {default, false}}, - {metadata_request_cooldown, positive_integer, {default, 333}}, {consumer_min_bytes, positive_integer, {default, 1}}, - {consumer_max_wait, positive_integer, {default, 1500}}], + {consumer_max_wait, positive_integer, {default, 1500}}, + {dump_location, string, {default, ""}}, + {max_buffer_size, positive_integer, {default, 500}}, + {save_messages_for_bad_topics, boolean, {default, true}}, + {flush_to_disk_every, positive_integer, {default, 10000}}], case normalizerl:normalize_proplist(Schema, Config) of - {ok, [Brokers, MaxMetadataRetries, ClientId, Topics, RetryInterval, - AutocreateTopics, MetadataRequestCooldown, MinBytes, MaxWait]} -> + {ok, [Brokers, ClientId, Topics, AutocreateTopics, MinBytes, MaxWait, + DumpLocation, MaxBufferSize, SaveBadMessages, FlushToDiskInterval]} -> + % Start the metadata request handler + MetadataHandlerName = metadata_handler_name(Name), + {ok, _} = kafkerl_metadata_handler:start(MetadataHandlerName, Config), + % Build the default fetch options DefaultFetchOptions = [{min_bytes, MinBytes}, {max_wait, MaxWait}], State = #state{config = Config, known_topics = Topics, brokers = Brokers, client_id = ClientId, - retry_interval = RetryInterval, + dump_location = DumpLocation, + max_buffer_size = MaxBufferSize, + save_bad_messages = SaveBadMessages, autocreate_topics = AutocreateTopics, - max_metadata_retries = MaxMetadataRetries, - metadata_request_cd = MetadataRequestCooldown, - default_fetch_options = DefaultFetchOptions}, + default_fetch_options = DefaultFetchOptions, + metadata_handler = MetadataHandlerName}, + % Create a buffer to hold unsent messages + _ = ets_buffer:create(kafkerl_utils:default_buffer_name(), fifo), + % Start the interval that manages the buffers holding unsent messages + {ok, _TRef} = timer:send_interval(FlushToDiskInterval, dump_buffer_tick), {_Pid, _Ref} = make_metadata_request(State), {ok, State}; {errors, Errors} -> - lists:foreach(fun(E) -> - _ = lager:critical("Connector config error ~p", [E]) - end, Errors), + ok = lists:foreach(fun(E) -> + _ = lager:critical("Connector config error ~p", [E]) + end, Errors), {stop, bad_config} end. -handle_send(Message, State = #state{autocreate_topics = false}) -> - lager:critical("a.1 ~p", [Message]), - % The topic didn't exist, ignore - {Topic, _Partition, Payload} = Message, - _ = lager:error("Dropped ~p sent to non existing topic ~p", [Payload, Topic]), - {reply, {error, non_existing_topic}, State}; -handle_send(Message, State = #state{broker_mapping = void, - pending = Pending}) -> - lager:critical("b.1 ~p", [Message]), - % We should consider saving this to a new buffer instead of using the state. - {reply, ok, State#state{pending = [Message | Pending]}}; -handle_send(Message, State = #state{broker_mapping = Mapping, pending = Pending, - known_topics = KnownTopics}) -> - lager:critical("c.1 ~p", [Message]), - {Topic, Partition, Payload} = Message, - case lists:any(fun({K, _}) -> K =:= {Topic, Partition} end, Mapping) of - true -> - % The ets takes some time to be available after being created, so we check - % if the topic/partition pair is in the mapping and if it does, we know we - % just need to send it again. The order is not guaranteed in this case, so - % if that's a concern, don't rely on autocreate_topics (besides, don't use - % autocreate_topics on production since it opens another can of worms). - ok = send(self(), Message, []), - {reply, ok, State}; - false -> - % However, if the topic/partition pair does not exist, we need to check if - % the topic exists. If the topic exists, we drop the message because kafka - % can't add partitions on the fly. - case lists:any(fun({{T, _}, _}) -> T =:= Topic end, Mapping) of - true -> - _ = lager:error("Dropped ~p sent to topic ~p, partition ~p", - [Payload, Topic, Partition]), - {reply, {error, bad_partition}, State}; - false -> - NewKnownTopics = lists:umerge([Topic], KnownTopics), - NewState = State#state{pending = [Message | Pending]}, - lager:critical("X"), - R={reply, ok, handle_request_metadata(NewState, NewKnownTopics)}, - lager:critical("X2"), - R - end - end. - handle_fetch(_Topic, _Partition, _Options, #state{broker_mapping = void}) -> {error, not_connected}; handle_fetch(Topic, Partition, Options, State) -> @@ -345,9 +306,76 @@ handle_request_metadata(State, NewTopics, _) -> State#state{broker_mapping = void, known_topics = NewKnownTopics, last_metadata_request = LastMetadataUpdate}. +handle_dump_buffer_to_disk(State = #state{dump_location = DumpLocation, + last_dump_name = LastDumpName}) -> + % Get the buffer name and all the messages from it + Buffer = kafkerl_utils:default_buffer_name(), + MessagesInBuffer = ets_buffer:read_all(Buffer), + % Split them between the ones that should be retried and those that don't + {ToDump, ToRetry} = split_message_dump(MessagesInBuffer, State), + % Retry the messages on an async function (to avoid locking this gen_server) + ok = retry_messages(ToRetry), + % And dump the messages that need to be dumped into a file + case ToDump of + [_ | _] = Messages -> + % Get the name of the file we want to write to + {DumpNameStr, _} = NewDumpName = get_ets_dump_name(LastDumpName), + % Build the location + WorkingDirectory = case file:get_cwd() of + {ok, Path} -> Path; + {error, _} -> "" + end, + FilePath = filename:join([WorkingDirectory, DumpLocation, DumpNameStr]), + % Write to disk + _ = case file:write_file(FilePath, term_to_binary(Messages)) of + ok -> + lager:info("Dumped unsent messages at ~p", [FilePath]); + Error -> + lager:critical("Unable to save messages, reason: ~p", [Error]) + end, + State#state{last_dump_name = NewDumpName}; + _ -> + State + end. + %%============================================================================== %% Utils %%============================================================================== +retry_messages([]) -> + ok; +retry_messages(Messages) -> + _Pid = spawn(fun() -> [send(M) || M <- Messages] end), + ok. + +split_message_dump(Messages, #state{known_topics = KnownTopics, + max_buffer_size = MaxBufferSize, + save_bad_messages = SaveBadMessages}) + when is_list(Messages) -> + + % Split messages between for topics kafkerl knows exist and those that do not. + {Known, Unknown} = lists:partition(fun({Topic, _Partition, _Payload}) -> + lists:member(Topic, KnownTopics) + end, Messages), + % The messages to be dumped are those from unkown topics (if the settings call + % for it) and those from known topics if the buffer size is too large. + % The messages to be retried are those from the known topics, as long as their + % number does not exceed the MaxBufferSize. + case {SaveBadMessages, length(Known) >= MaxBufferSize} of + {true, true} -> + {Unknown ++ Known, []}; + {false, true} -> + {Known, []}; + {true, false} -> + {Unknown, Known}; + {false, false} -> + {[], Known} + end; +% If the messages are not a list, then it's an ets error, report it and move on. +% And yes, those messages are gone forever +split_message_dump(Error, _State) -> + lager:error("Unable to get messages from buffer, reason: ~p", [Error]), + {[], []}. + get_ets_dump_name({OldName, Counter}) -> {{Year, Month, Day}, {Hour, Minute, Second}} = calendar:local_time(), Ts = io_lib:format("~4.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B_", @@ -355,9 +383,9 @@ get_ets_dump_name({OldName, Counter}) -> PartialNewName = "kafkerl_messages_" ++ lists:flatten(Ts), case lists:prefix(PartialNewName, OldName) of true -> - {PartialNewName ++ integer_to_list(Counter + 1) ++ ".dump", Counter + 1}; + {PartialNewName ++ integer_to_list(Counter) ++ ".dump", Counter + 1}; _ -> - {PartialNewName ++ "0.dump", 0} + {PartialNewName ++ "0.dump", 1} end. get_metadata_tcp_options() -> @@ -430,6 +458,9 @@ send_event(Event, Callbacks) -> send_event(Event, Callback) =:= ok end, Callbacks). +metadata_handler_name(ServerName) -> + list_to_binary([atom_to_list(ServerName), "_metadata_handler"]). + %%============================================================================== %% Request building %%============================================================================== diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl new file mode 100644 index 0000000..bfb9c7f --- /dev/null +++ b/src/kafkerl_metadata_handler.erl @@ -0,0 +1,58 @@ +-module(kafkerl_metadata_handler). +-author('hernanrivasacosta@gmail.com'). + +-behaviour(gen_fsm). + +%% API +-export([request_metadata/2]). +-export([idle/2, requesting/2, on_cooldown/2]). +% gen_fsm +-export([start_link/1, init/1]). + +-record(state, {max_metadata_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + metadata_request_cd = 0 :: integer()}). + +%%============================================================================== +%% API +%%============================================================================== +-spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). +start_link(Name, Config) -> + gen_fsm:start_link({local, Name}, ?MODULE, [Config], []). + +-spec request_metadata(atom(), [topic()]) -> ok. +request_metadata(ServerRef, Topics) -> + ok. + +%%============================================================================== +%% States +%%============================================================================== +idle(_, State) -> + {next_state, open, {[], Code}, 30000};. + +requesting(_, State) -> + ok. + +on_cooldown(_, State) -> + ok. + +%%============================================================================== +%% Handlers +%%============================================================================== +init([Config]) -> + Schema = [{metadata_tcp_timeout, positive_integer, {default, 1500}}, + {metadata_request_cooldown, positive_integer, {default, 333}}, + {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}], + case normalizerl:normalize_proplist(Schema, Config) of + {ok, [RetryInterval, MetadataRequestCD, MaxMetadataRetries]} -> + State = #state{config = Config, + retry_interval = RetryInterval, + metadata_request_cd = MetadataRequestCD, + max_metadata_retries = MaxMetadataRetries}, + {ok, State}; + {errors, Errors} -> + ok = lists:foreach(fun(E) -> + _ = lager:critical("Metadata config error ~p", [E]) + end, Errors), + {stop, bad_config} + end. \ No newline at end of file diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 8eac465..79a718b 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -4,7 +4,7 @@ -export([send_event/2, send_error/2]). -export([get_tcp_options/1]). -export([merge_messages/1, split_messages/1, valid_message/1]). --export([buffer_name/2]). +-export([buffer_name/2, default_buffer_name/0]). -export([gather_consume_responses/0, gather_consume_responses/1]). -export([proplists_set/2]). @@ -74,6 +74,10 @@ buffer_name(Topic, Partition) -> Bin = <>, binary_to_atom(Bin, utf8). +-spec default_buffer_name() -> atom(). +default_buffer_name() -> + default_message_buffer. + -type proplist_value() :: {atom(), any()}. -type proplist() :: [proplist_value]. -spec proplists_set(proplist(), proplist_value() | [proplist_value()]) -> From 06034c9817dc59c4eecd53151159897fb99fab28 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 15 Apr 2016 11:52:38 -0300 Subject: [PATCH 29/72] improved the metadata handling --- rel/kafkerl.app.config | 7 +- src/kafkerl.erl | 56 ++--- src/kafkerl_broker_connection.erl | 63 ++--- src/kafkerl_connector.erl | 380 ++++++------------------------ src/kafkerl_metadata_handler.erl | 314 ++++++++++++++++++++++-- src/kafkerl_sup.erl | 5 +- src/kafkerl_utils.erl | 4 +- 7 files changed, 422 insertions(+), 407 deletions(-) diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 241a2b1..0c88a29 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -1,19 +1,18 @@ [{lager, [{colored, true}, {handlers, [{lager_console_backend, [debug,true]}]}]}, - {kafkerl, [%{gen_server_name, kafkerl_client}, - {disabled, false}, + {kafkerl, [{disabled, false}, {conn_config, [{brokers, [{"localhost", 9090}, {"localhost", 9091}, {"localhost", 9092}]}, {client_id, kafkerl_client}, % Sent to kafka {max_broker_retries, 2}, {broker_tcp_timeout, 1000}, - {max_metadata_retries, -1}, + {max_metadata_retries, 3}, {assume_autocreate_topics, true}, {metadata_tcp_timeout, 5000}, {max_queue_size, 20}, % In items, per topic/partition {max_time_queued, 5}, % In seconds - {metadata_request_cooldown, 1500}, % In milliseconds + {metadata_request_cooldown, 3500}, % In milliseconds {consumer_min_bytes, 1}, {consumer_max_wait, 1500}, {dump_location, "dumps/"}, diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 27238e2..3b423c6 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -3,9 +3,9 @@ -export([start/0, start/2]). -export([produce/3, - consume/2, consume/3, consume/4, stop_consuming/2, stop_consuming/3, - request_metadata/0, request_metadata/1, request_metadata/2, - partitions/0, partitions/1]). + consume/2, consume/3, stop_consuming/2, + request_metadata/0, request_metadata/1, + partitions/0]). -export([version/0]). %% Types @@ -31,8 +31,8 @@ -type payload() :: binary() | [binary()]. -type basic_message() :: {topic(), partition(), payload()}. --export_type([server_ref/0, error/0, options/0, callback/0, - topic/0, partition/0, payload/0, basic_message/0]). +-export_type([server_ref/0, error/0, options/0, topic/0, partition/0, payload/0, + callback/0, basic_message/0]). %%============================================================================== %% API @@ -55,63 +55,43 @@ produce(Topic, Partition, Message) -> %% Consume API -spec consume(topic(), partition()) -> ok | error(). consume(Topic, Partition) -> - consume(?MODULE, Topic, Partition, []). + consume(Topic, Partition, []). --spec consume(topic(), partition(), options()) -> ok | error(); - (server_ref(), topic(), partition()) -> ok | error(). -consume(Topic, Partition, Options) when is_list(Options) -> - consume(?MODULE, Topic, Partition, Options); -consume(ServerRef, Topic, Partition) -> - consume(ServerRef, Topic, Partition, []). - --spec consume(server_ref(), topic(), partition(), options()) -> - ok | {[payload()], offset()} | error(). -consume(ServerRef, Topic, Partition, Options) -> +-spec consume(topic(), partition(), options()) -> ok | + {[payload()], offset()} | + error(). +consume(Topic, Partition, Options) -> case {proplists:get_value(consumer, Options, undefined), proplists:get_value(fetch_interval, Options, false)} of {undefined, false} -> NewOptions = [{consumer, self()} | Options], - kafkerl_connector:fetch(ServerRef, Topic, Partition, NewOptions), + kafkerl_connector:fetch(Topic, Partition, NewOptions), kafkerl_utils:gather_consume_responses(); {undefined, _} -> {error, fetch_interval_specified_with_no_consumer}; _ -> - kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) + kafkerl_connector:fetch(Topic, Partition, Options) end. -spec stop_consuming(topic(), partition()) -> ok. stop_consuming(Topic, Partition) -> - stop_consuming(?MODULE, Topic, Partition). - --spec stop_consuming(server_ref(), topic(), partition()) -> ok. -stop_consuming(ServerRef, Topic, Partition) -> - kafkerl_connector:stop_fetch(ServerRef, Topic, Partition). + kafkerl_connector:stop_fetch(Topic, Partition). %% Metadata API -spec request_metadata() -> ok. request_metadata() -> - request_metadata(?MODULE). + request_metadata([]). --spec request_metadata(server_ref() | [topic()]) -> ok. +-spec request_metadata([topic()]) -> ok. request_metadata(Topics) when is_list(Topics) -> - request_metadata(?MODULE, Topics); -request_metadata(ServerRef) -> - kafkerl_connector:request_metadata(ServerRef). - --spec request_metadata(server_ref(), [topic()]) -> ok. -request_metadata(ServerRef, Topics) -> - kafkerl_connector:request_metadata(ServerRef, Topics). + kafkerl_connector:request_metadata(Topics). %% Partitions -spec partitions() -> [{topic(), [partition()]}] | error(). partitions() -> - partitions(?MODULE). - --spec partitions(server_ref()) -> [{topic(), [partition()]}] | error(). -partitions(ServerRef) -> - kafkerl_connector:get_partitions(ServerRef). + kafkerl_connector:get_partitions(). %% Utils -spec version() -> {integer(), integer(), integer()}. version() -> - {2, 0, 0}. \ No newline at end of file + {3, 0, 0}. \ No newline at end of file diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index f9bb48a..9e70e6e 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -8,7 +8,7 @@ % Only for internal use -export([connect/6]). % Supervisors --export([start_link/4]). +-export([start_link/3]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). @@ -31,7 +31,6 @@ client_id = undefined :: binary(), socket = undefined :: port(), address = undefined :: kafkerl_connector:address(), - connector = undefined :: pid(), tref = undefined :: any(), tcp_options = [] :: [any()], max_retries = 0 :: integer(), @@ -51,12 +50,12 @@ %%============================================================================== %% API %%============================================================================== --spec start_link(conn_idx(), pid(), kafkerl_connector:address(), any()) -> +-spec start_link(conn_idx(), kafkerl_connector:address(), any()) -> start_link_response(). -start_link(Id, Connector, Address, Config) -> +start_link(Id, Address, Config) -> NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), Name = list_to_atom(NameStr), - Params = [Id, Connector, Address, Config, Name], + Params = [Id, Address, Config, Name], case gen_server:start_link({local, Name}, ?MODULE, Params, []) of {ok, Pid} -> {ok, Name, Pid}; @@ -128,7 +127,7 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %%============================================================================== %% Handlers %%============================================================================== -init([Id, Connector, Address, Config, Name]) -> +init([Id, Address, Config, Name]) -> Schema = [{tcp_options, [any], {default, []}}, {retry_interval, positive_integer, {default, 1000}}, {max_retries, positive_integer, {default, 3}}, @@ -137,13 +136,17 @@ init([Id, Connector, Address, Config, Name]) -> case normalizerl:normalize_proplist(Schema, Config) of {ok, [TCPOpts, RetryInterval, MaxRetries, ClientId, MaxTimeQueued]} -> NewTCPOpts = kafkerl_utils:get_tcp_options(TCPOpts), - EtsName = list_to_atom(atom_to_list(Name) ++ "_ets"), - ets:new(EtsName, [named_table, public, {write_concurrency, true}, - {read_concurrency, true}]), - State = #state{conn_idx = Id, tcp_options = NewTCPOpts, address = Address, - max_retries = MaxRetries, retry_interval = RetryInterval, - connector = Connector, client_id = ClientId, name = Name, - max_time_queued = MaxTimeQueued, ets = EtsName}, + Ets = list_to_atom(atom_to_list(Name) ++ "_ets"), + _ = ets:new(Ets, ets_options()), + State = #state{ets = Ets, + name = Name, + conn_idx = Id, + address = Address, + client_id = ClientId, + max_retries = MaxRetries, + tcp_options = NewTCPOpts, + retry_interval = RetryInterval, + max_time_queued = MaxTimeQueued}, Params = [self(), Name, NewTCPOpts, Address, RetryInterval, MaxRetries], _Pid = spawn_link(?MODULE, connect, Params), {ok, _Tref} = queue_flush(MaxTimeQueued), @@ -159,9 +162,8 @@ handle_flush(State = #state{socket = undefined}) -> {noreply, State}; handle_flush(State = #state{buffers = []}) -> {noreply, State}; -handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, - client_id = ClientId, connector = Connector, - name = Name}) -> +handle_flush(State = #state{ets = EtsName, socket = Socket, buffers = Buffers, + name = Name, client_id = ClientId}) -> {ok, CorrelationId, NewState} = build_correlation_id(State), % TODO: Maybe buffer all this messages in case something goes wrong AllMessages = get_all_messages(Buffers), @@ -180,7 +182,7 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, [Name, Reason]), gen_tcp:close(Socket), ets:delete_all_objects(EtsName, CorrelationId), - ok = resend_messages(MergedMessages, Connector), + ok = resend_messages(MergedMessages), {noreply, handle_tcp_close(NewState)}; ok -> _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), @@ -231,6 +233,7 @@ handle_fetch(ServerRef, Topic, Partition, Options, server_ref = ServerRef, topic = Topic, partition = Partition, + %options = [scheduled | Options]}, options = Options}, NewScheduledFetches = case KeyTakeResult of false -> ScheduledFetches; @@ -342,8 +345,7 @@ handle_fetch_response(Bin, Fetch, {ok, State#state{current_fetch = void, fetches = NewFetches}} end. -handle_produce_response(Bin, State = #state{connector = Connector, name = Name, - ets = EtsName}) -> +handle_produce_response(Bin, State = #state{name = Name, ets = EtsName}) -> case kafkerl_protocol:parse_produce_response(Bin) of {ok, CorrelationId, Topics} -> case ets:lookup(EtsName, CorrelationId) of @@ -352,15 +354,15 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, {Errors, Successes} = split_errors_and_successes(Topics), % First, send the offsets and messages that were delivered _ = spawn(fun() -> - notify_success(Successes, Messages, Connector) + notify_success(Successes, Messages) end), % Then handle the errors case handle_errors(Errors, Messages, Name) of ignore -> {ok, State}; {request_metadata, MessagesToResend} -> - kafkerl_connector:request_metadata(Connector), - ok = resend_messages(MessagesToResend, Connector), + kafkerl_connector:request_metadata(), + ok = resend_messages(MessagesToResend), {ok, State} end; _ -> @@ -376,18 +378,18 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, %%============================================================================== %% Utils %%============================================================================== -resend_messages(Messages, Connector) -> - F = fun(M) -> kafkerl_connector:send(Connector, M, []) end, +resend_messages(Messages) -> + F = fun(M) -> kafkerl_connector:send(M) end, lists:foreach(F, Messages). -notify_success([], _Messages, _Pid) -> +notify_success([], _Messages) -> ok; -notify_success([{Topic, Partition, Offset} | T], Messages, Pid) -> +notify_success([{Topic, Partition, Offset} | T], Messages) -> MergedMessages = kafkerl_utils:merge_messages(Messages), Partitions = partitions_in_topic(Topic, MergedMessages), M = messages_in_partition(Partition, Partitions), - kafkerl_connector:produce_succeeded(Pid, {Topic, Partition, M, Offset}), - notify_success(T, Messages, Pid). + kafkerl_connector:produce_succeeded({Topic, Partition, M, Offset}), + notify_success(T, Messages). partitions_in_topic(Topic, Messages) -> lists:flatten([P || {T, P} <- Messages, T =:= Topic]). @@ -528,4 +530,7 @@ send_messages(Consumer, [Event | T]) -> Error -> Error end; send_messages(Consumer, Event) -> - kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file + kafkerl_utils:send_event(Consumer, Event). + +ets_options() -> + [named_table, public, {write_concurrency, true}, {read_concurrency, true}]. \ No newline at end of file diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 0aeab59..e1c3e54 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -5,20 +5,19 @@ %% API % Metadata --export([request_metadata/1, request_metadata/2, request_metadata/3, - get_partitions/1]). +-export([request_metadata/0, request_metadata/1, get_partitions/0]). % Produce -export([send/1]). % Consume --export([fetch/4, stop_fetch/3]). +-export([fetch/3, stop_fetch/2]). % Common --export([subscribe/2, subscribe/3, unsubscribe/2]). -% Only for internal use --export([do_request_metadata/6, make_metadata_request/1]). -% Only for broker connections --export([produce_succeeded/2]). +-export([subscribe/1, subscribe/2, unsubscribe/1]). +% Only used by broker connections +-export([produce_succeeded/1]). +% Only used by the metadata handler +-export([topic_mapping_updated/1]). % Supervisors --export([start_link/2]). +-export([start_link/1]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). @@ -29,24 +28,20 @@ -type kafler_port() :: 1..65535. -type address() :: {kafler_host(), kafler_port()}. --type filters() :: all | [atom()]. +-type filters() :: all | [atom()]. -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. --record(state, {brokers = [] :: [address()], - broker_mapping = void :: [broker_mapping()] | void, - client_id = <<>> :: kafkerl_protocol:client_id(), - config = [] :: {atom(), any()}, +-record(state, {broker_mapping = void :: [broker_mapping()] | void, + config = [] :: [{atom(), any()}], autocreate_topics = false :: boolean(), callbacks = [] :: [{filters(), kafkerl:callback()}], - known_topics = [] :: [binary()], last_dump_name = {"", 0} :: {string(), integer()}, default_fetch_options = [] :: kafkerl:options(), dump_location = "" :: string(), max_buffer_size = 0 :: integer(), - save_bad_messages = false :: boolean(), - metadata_handler = void :: atom()}). + save_bad_messages = false :: boolean()}). -type state() :: #state{}. -export_type([address/0]). @@ -54,9 +49,9 @@ %%============================================================================== %% API %%============================================================================== --spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). -start_link(Name, Config) -> - gen_server:start_link({local, Name}, ?MODULE, [Config, Name], []). +-spec start_link(any()) -> {ok, pid()} | ignore | kafkerl:error(). +start_link(Config) -> + gen_server:start_link({local, kafkerl}, ?MODULE, [Config], []). -spec send(kafkerl:basic_message()) -> ok | kafkerl:error(). @@ -77,58 +72,52 @@ send({Topic, Partition, _Payload} = Message) -> end end. --spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), - kafkerl:options()) -> ok | kafkerl:error(). -fetch(ServerRef, Topic, Partition, Options) -> - gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). +-spec fetch(kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> + ok | kafkerl:error(). +fetch(Topic, Partition, Options) -> + gen_server:call(kafkerl, {fetch, Topic, Partition, Options}). --spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> - ok. -stop_fetch(ServerRef, Topic, Partition) -> - gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). +-spec stop_fetch(kafkerl:topic(), kafkerl:partition()) -> ok. +stop_fetch(Topic, Partition) -> + gen_server:call(kafkerl, {stop_fetch, Topic, Partition}). --spec get_partitions(kafkerl:server_ref()) -> - [{kafkerl:topic(), [kafkerl:partition()]}] | kafkerl:error(). -get_partitions(ServerRef) -> - case gen_server:call(ServerRef, {get_partitions}) of +-spec get_partitions() -> [{kafkerl:topic(), [kafkerl:partition()]}] | + kafkerl:error(). +get_partitions() -> + case gen_server:call(kafkerl, {get_partitions}) of {ok, Mapping} -> get_partitions_from_mapping(Mapping); Error -> Error end. --spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> - ok | kafkerl:error(). -subscribe(ServerRef, Callback) -> - subscribe(ServerRef, Callback, all). --spec subscribe(kafkerl:server_ref(), kafkerl:callback(), filters()) -> - ok | kafkerl:error(). -subscribe(ServerRef, Callback, Filter) -> - gen_server:call(ServerRef, {subscribe, {Filter, Callback}}). --spec unsubscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok. -unsubscribe(ServerRef, Callback) -> - gen_server:call(ServerRef, {unsubscribe, Callback}). +-spec subscribe(kafkerl:callback()) -> ok | kafkerl:error(). +subscribe(Callback) -> + subscribe(Callback, all). +-spec subscribe(kafkerl:callback(), filters()) -> ok | kafkerl:error(). +subscribe(Callback, Filter) -> + gen_server:call(kafkerl, {subscribe, {Filter, Callback}}). --spec request_metadata(kafkerl:server_ref()) -> ok. -request_metadata(ServerRef) -> - gen_server:call(ServerRef, {request_metadata}). +-spec unsubscribe(kafkerl:callback()) -> ok. +unsubscribe(Callback) -> + gen_server:call(kafkerl, {unsubscribe, Callback}). --spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()] | boolean()) -> - ok. -request_metadata(ServerRef, TopicsOrForced) -> - gen_server:call(ServerRef, {request_metadata, TopicsOrForced}). +-spec request_metadata() -> ok. +request_metadata() -> + gen_server:call(kafkerl, {request_metadata, []}). --spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()], boolean()) -> - ok. -request_metadata(ServerRef, Topics, Forced) -> - gen_server:call(ServerRef, {request_metadata, Topics, Forced}). +-spec request_metadata([kafkerl:topic()]) -> ok. +request_metadata(Topics) -> + gen_server:call(kafkerl, {request_metadata, Topics}). + +-spec produce_succeeded([{kafkerl:topic(), kafkerl:partition(), + [binary()], integer()}]) -> ok. +produce_succeeded(Produced) -> + gen_server:cast(kafkerl, {produce_succeeded, Produced}). --spec produce_succeeded(kafkerl:server_ref(), [{kafkerl:topic(), - kafkerl:partition(), - [binary()], - integer()}]) -> ok. -produce_succeeded(ServerRef, Messages) -> - gen_server:cast(ServerRef, {produce_succeeded, Messages}). +-spec topic_mapping_updated(any()) -> ok. +topic_mapping_updated(TopicMapping) -> + gen_server:cast(kafkerl, {topic_mapping_updated, TopicMapping}). %%============================================================================== %% gen_server callbacks @@ -139,14 +128,8 @@ handle_call({fetch, Topic, Partition, Options}, _From, State) -> {reply, handle_fetch(Topic, Partition, Options, State), State}; handle_call({stop_fetch, Topic, Partition}, _From, State) -> {reply, handle_stop_fetch(Topic, Partition, State), State}; -handle_call({request_metadata}, _From, State) -> - {reply, ok, handle_request_metadata(State, [])}; -handle_call({request_metadata, Forced}, _From, State) when is_boolean(Forced) -> - {reply, ok, handle_request_metadata(State, [], true)}; handle_call({request_metadata, Topics}, _From, State) -> - {reply, ok, handle_request_metadata(State, Topics)}; -handle_call({request_metadata, Topics, Forced}, _From, State) -> - {reply, ok, handle_request_metadata(State, Topics, Forced)}; + {reply, handle_request_metadata(Topics), State}; handle_call({get_partitions}, _From, State) -> {reply, handle_get_partitions(State), State}; handle_call({subscribe, Callback}, _From, State) -> @@ -168,39 +151,25 @@ handle_info(metadata_timeout, State) -> {stop, {error, unable_to_retrieve_metadata}, State}; handle_info({metadata_updated, []}, State) -> % If the metadata arrived empty request it again - {noreply, handle_request_metadata(State#state{broker_mapping = []}, [])}; -handle_info({metadata_updated, Mapping}, State) -> - % Create the topic mapping (this also starts the broker connections) - NewBrokerMapping = get_broker_mapping(Mapping, State), - _ = lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), - % Get the partition data to send to the subscribers and send it - PartitionData = get_partitions_from_mapping(NewBrokerMapping), - Callbacks = State#state.callbacks, - NewCallbacks = send_event({partition_update, PartitionData}, Callbacks), - % Add to the list of known topics - NewTopics = lists:sort([T || {T, _P} <- PartitionData]), - NewKnownTopics = lists:umerge(NewTopics, State#state.known_topics), - _ = lager:debug("Known topics: ~p", [NewKnownTopics]), - % TODO: Maybe retry from the dumps - {noreply, State#state{broker_mapping = NewBrokerMapping, - callbacks = NewCallbacks, - known_topics = NewKnownTopics}}; -handle_info({'DOWN', Ref, process, _, normal}, State) -> - true = demonitor(Ref), + ok = handle_request_metadata([]), {noreply, State}; -handle_info({'DOWN', Ref, process, _, Reason}, State) -> - _ = lager:error("metadata request failed, reason: ~p", [Reason]), - true = demonitor(Ref), - {noreply, handle_request_metadata(State, [], true)}; +%handle_info({metadata_updated, Mapping}, State) -> + handle_info(Msg, State) -> _ = lager:notice("Unexpected info message received: ~p on ~p", [Msg, State]), {noreply, State}. -spec handle_cast(any(), state()) -> {noreply, state()}. -handle_cast({produce_succeeded, Messages}, State) -> +handle_cast({produce_succeeded, Produced}, State) -> + Callbacks = State#state.callbacks, + NewCallbacks = send_event({produced, Produced}, Callbacks), + {noreply, State#state{callbacks = NewCallbacks}}; +handle_cast({topic_mapping_updated, NewMapping}, State) -> + % Get the partition data to send to the subscribers and send it + PartitionData = get_partitions_from_mapping(NewMapping), Callbacks = State#state.callbacks, - NewCallbacks = send_event({produced, Messages}, Callbacks), - {noreply, State#state{callbacks = NewCallbacks}}. + NewCallbacks = send_event({partition_update, PartitionData}, Callbacks), + {noreply, State#state{callbacks = NewCallbacks, broker_mapping = NewMapping}}. % Boilerplate -spec terminate(atom(), state()) -> ok. @@ -211,13 +180,10 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %%============================================================================== %% Handlers %%============================================================================== -init([Config, Name]) -> +init([Config]) -> % The schema indicates what is expected of the configuration, it validates and % normalizes the configuration - Schema = [{brokers, [{string, {integer, {1, 65535}}}], required}, - {client_id, binary, {default, <<"kafkerl_client">>}}, - {topics, [binary], required}, - {assume_autocreate_topics, boolean, {default, false}}, + Schema = [{assume_autocreate_topics, boolean, {default, false}}, {consumer_min_bytes, positive_integer, {default, 1}}, {consumer_max_wait, positive_integer, {default, 1500}}, {dump_location, string, {default, ""}}, @@ -225,28 +191,23 @@ init([Config, Name]) -> {save_messages_for_bad_topics, boolean, {default, true}}, {flush_to_disk_every, positive_integer, {default, 10000}}], case normalizerl:normalize_proplist(Schema, Config) of - {ok, [Brokers, ClientId, Topics, AutocreateTopics, MinBytes, MaxWait, - DumpLocation, MaxBufferSize, SaveBadMessages, FlushToDiskInterval]} -> + {ok, [AutocreateTopics, MinBytes, MaxWait, DumpLocation, + MaxBufferSize, SaveBadMessages, FlushToDiskInterval]} -> % Start the metadata request handler - MetadataHandlerName = metadata_handler_name(Name), - {ok, _} = kafkerl_metadata_handler:start(MetadataHandlerName, Config), + {ok, _} = kafkerl_metadata_handler:start_link(Config), % Build the default fetch options DefaultFetchOptions = [{min_bytes, MinBytes}, {max_wait, MaxWait}], State = #state{config = Config, - known_topics = Topics, - brokers = Brokers, - client_id = ClientId, dump_location = DumpLocation, max_buffer_size = MaxBufferSize, save_bad_messages = SaveBadMessages, autocreate_topics = AutocreateTopics, - default_fetch_options = DefaultFetchOptions, - metadata_handler = MetadataHandlerName}, + default_fetch_options = DefaultFetchOptions}, % Create a buffer to hold unsent messages _ = ets_buffer:create(kafkerl_utils:default_buffer_name(), fifo), % Start the interval that manages the buffers holding unsent messages {ok, _TRef} = timer:send_interval(FlushToDiskInterval, dump_buffer_tick), - {_Pid, _Ref} = make_metadata_request(State), + ok = kafkerl_metadata_handler:request_metadata([]), {ok, State}; {errors, Errors} -> ok = lists:foreach(fun(E) -> @@ -283,28 +244,8 @@ handle_get_partitions(#state{broker_mapping = void}) -> handle_get_partitions(#state{broker_mapping = Mapping}) -> {ok, Mapping}. -handle_request_metadata(State, Topics) -> - handle_request_metadata(State, Topics, false). - -% Ignore it if the topic mapping is void, we are already requesting the metadata -handle_request_metadata(State = #state{broker_mapping = void}, _, false) -> - State; -handle_request_metadata(State, NewTopics, _) -> - SortedNewTopics = lists:sort(NewTopics), - NewKnownTopics = lists:umerge(State#state.known_topics, SortedNewTopics), - Now = get_timestamp(), - LastRequest = State#state.last_metadata_request, - Cooldown = State#state.metadata_request_cd, - LastMetadataUpdate = case Cooldown - (Now - LastRequest) of - Negative when Negative =< 0 -> - _ = make_metadata_request(State), - Now; - Time -> - _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]), - LastRequest - end, - State#state{broker_mapping = void, known_topics = NewKnownTopics, - last_metadata_request = LastMetadataUpdate}. +handle_request_metadata(Topics) -> + kafkerl_metadata_handler:request_metadata(Topics). handle_dump_buffer_to_disk(State = #state{dump_location = DumpLocation, last_dump_name = LastDumpName}) -> @@ -347,11 +288,11 @@ retry_messages(Messages) -> _Pid = spawn(fun() -> [send(M) || M <- Messages] end), ok. -split_message_dump(Messages, #state{known_topics = KnownTopics, - max_buffer_size = MaxBufferSize, +split_message_dump(Messages, #state{max_buffer_size = MaxBufferSize, save_bad_messages = SaveBadMessages}) when is_list(Messages) -> + KnownTopics = kafkerl_metadata_handler:get_known_topics(), % Split messages between for topics kafkerl knows exist and those that do not. {Known, Unknown} = lists:partition(fun({Topic, _Partition, _Payload}) -> lists:member(Topic, KnownTopics) @@ -388,64 +329,6 @@ get_ets_dump_name({OldName, Counter}) -> {PartialNewName ++ "0.dump", 1} end. -get_metadata_tcp_options() -> - kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). - -do_request_metadata(Pid, _Brokers, _TCPOpts, 0, _RetryInterval, _Request) -> - Pid ! metadata_timeout; -do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> - case do_request_metadata(Brokers, TCPOpts, Request) of - {ok, TopicMapping} -> - Pid ! {metadata_updated, TopicMapping}; - _Error -> - timer:sleep(RetryInterval), - NewRetries = case Retries of - -1 -> -1; - N -> N - 1 - end, - do_request_metadata(Pid, Brokers, TCPOpts, NewRetries, RetryInterval, - Request) - end. - -do_request_metadata([], _TCPOpts, _Request) -> - {error, all_down}; -do_request_metadata([{Host, Port} = _Broker | T], TCPOpts, Request) -> - _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), - % Connect to the Broker - case gen_tcp:connect(Host, Port, TCPOpts) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Failed, try with the next one in the list - do_request_metadata(T, TCPOpts, Request); - {ok, Socket} -> - % On success, send the metadata request - case gen_tcp:send(Socket, Request) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Unable to send request, try the next broker - do_request_metadata(T, TCPOpts, Request); - ok -> - case gen_tcp:recv(Socket, 0, 6000) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - gen_tcp:close(Socket), - % Nothing received (probably a timeout), try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, Data} -> - gen_tcp:close(Socket), - case kafkerl_protocol:parse_metadata_response(Data) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % The parsing failed, try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, _CorrelationId, Metadata} -> - % We received a metadata response, make sure it has brokers - {ok, get_topic_mapping(Metadata)} - end - end - end - end. - send_event(Event, {all, Callback}) -> kafkerl_utils:send_event(Callback, Event); send_event({EventName, _Data} = Event, {Events, Callback}) -> @@ -458,100 +341,6 @@ send_event(Event, Callbacks) -> send_event(Event, Callback) =:= ok end, Callbacks). -metadata_handler_name(ServerName) -> - list_to_binary([atom_to_list(ServerName), "_metadata_handler"]). - -%%============================================================================== -%% Request building -%%============================================================================== -metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> - kafkerl_protocol:build_metadata_request([], 0, ClientId); -metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, - NewTopics) -> - AllTopics = lists:umerge(KnownTopics, NewTopics), - kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). - -%%============================================================================== -%% Topic/broker mapping -%%============================================================================== -get_topic_mapping({BrokerMetadata, TopicMetadata}) -> - % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] - Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), - % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] - Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), - % Converts the BrokerIds from the previous array into socket addresses - lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> - case lists:keyfind(BrokerId, 1, BrokerMetadata) of - {BrokerId, HostData} -> - {true, {{Topic, Partition, BrokerId}, HostData}}; - _Any -> - false - end - end, Partitions). - -expand_topic({?NO_ERROR, Topic, Partitions}) -> - {true, {Topic, Partitions}}; -expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> - % Replica not available can be ignored, still, show a warning - _ = lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, Partitions}}; -expand_topic({Error, Topic, _Partitions}) -> - _ = lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, []}}. - -expand_partitions(Metadata) -> - expand_partitions(Metadata, []). - -expand_partitions({_Topic, []}, Acc) -> - {true, Acc}; -expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, - _, _} | T]}, Acc) -> - _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - expand_partitions({Topic, T}, Acc). - -get_broker_mapping(TopicMapping, State) -> - get_broker_mapping(TopicMapping, State, 0, []). - -get_broker_mapping([], _State, _N, Acc) -> - [{Key, Address} || {_ConnId, Key, Address} <- Acc]; -get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], - State = #state{config = Config}, N, Acc) -> - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), - {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of - false -> - {start_broker_connection(N, Address, Config), N + 1}; - {ConnId, _, BrokerConnection} -> - {BrokerConnection, N} - end, - - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), - kafkerl_broker_connection:add_buffer(Conn, Buffer), - - NewMapping = {ConnId, {Topic, Partition}, Conn}, - get_broker_mapping(T, State, NewN, [NewMapping | Acc]). - -start_broker_connection(N, Address, Config) -> - case kafkerl_broker_connection:start_link(N, self(), Address, Config) of - {ok, Name, _Pid} -> - Name; - {error, {already_started, Pid}} -> - kafkerl_broker_connection:clear_buffers(Pid), - Pid - end. - % This is used to return the available partitions for each topic get_partitions_from_mapping(Mapping) -> F = fun({{Topic, Partition}, _}, Acc) -> @@ -568,25 +357,4 @@ send_mapping_to(_NewCallback, #state{broker_mapping = void}) -> ok; send_mapping_to(NewCallback, #state{broker_mapping = Mapping}) -> Partitions = get_partitions_from_mapping(Mapping), - send_event({partition_update, Partitions}, NewCallback). - -make_metadata_request(State = #state{brokers = Brokers, - known_topics = Topics, - max_metadata_retries = MaxMetadataRetries, - retry_interval = RetryInterval}) -> - Request = metadata_request(State, Topics), - % Start requesting metadata - Params = [self(), Brokers, get_metadata_tcp_options(), MaxMetadataRetries, - RetryInterval, Request], - spawn_monitor(?MODULE, do_request_metadata, Params). - -get_timestamp() -> - {A, B, C} = erlang:timestamp(), - (A * 1000000 + B) * 1000 + C div 1000. - -%%============================================================================== -%% Error handling -%%============================================================================== -warn_metadata_request(Host, Port, Reason) -> - _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). \ No newline at end of file + send_event({partition_update, Partitions}, NewCallback). \ No newline at end of file diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index bfb9c7f..ef91183 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -4,55 +4,319 @@ -behaviour(gen_fsm). %% API --export([request_metadata/2]). +-export([request_metadata/1, get_known_topics/0]). +%% States -export([idle/2, requesting/2, on_cooldown/2]). -% gen_fsm --export([start_link/1, init/1]). +%% Internal +-export([make_request/3]). +%% gen_fsm +-export([start_link/1, init/1, handle_info/3, terminate/3, code_change/4, + handle_event/3, handle_sync_event/4]). + +-include("kafkerl.hrl"). --record(state, {max_metadata_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), - metadata_request_cd = 0 :: integer()}). +-record(state, {config = [] :: [{atom(), any()}], + client_id = <<>> :: kafkerl_protocol:client_id(), + brokers = [] :: [kafkerl_connector:address()], + max_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + cooldown = 0 :: integer(), + known_topics = [] :: [kafkerl:topic()], + next_topics = [] :: [kafkerl:topic()]}). +-type state() :: #state{}. %%============================================================================== %% API %%============================================================================== --spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). -start_link(Name, Config) -> - gen_fsm:start_link({local, Name}, ?MODULE, [Config], []). +-spec start_link(any()) -> {ok, pid()} | ignore | kafkerl:error(). +start_link(Config) -> + gen_fsm:start_link({local, ?MODULE}, ?MODULE, [Config], []). --spec request_metadata(atom(), [topic()]) -> ok. -request_metadata(ServerRef, Topics) -> - ok. +-spec request_metadata([kafkerl:topic()]) -> ok. +request_metadata(Topics) -> + gen_fsm:send_event(?MODULE, {request, Topics}). + +-spec get_known_topics() -> ok. +get_known_topics() -> + gen_fsm:sync_send_all_state_event(?MODULE, get_known_topics). %%============================================================================== %% States %%============================================================================== -idle(_, State) -> - {next_state, open, {[], Code}, 30000};. +-spec idle(any(), state()) -> {next_state, atom(), state()}. +idle({request, Topics}, State = #state{known_topics = KnownTopics}) -> + % Add the requested topics to the state + SortedTopics = lists:usort(KnownTopics), + NewKnownTopics = lists:umerge(Topics, SortedTopics), + NewState = State#state{known_topics = NewKnownTopics}, + % Make the request + ok = schedule_metadata_request(NewState), + % And move the the requesting state + {next_state, requesting, NewState}. -requesting(_, State) -> - ok. +-spec requesting(any(), state()) -> {next_state, atom(), state()}. +% Handle a new metadata request while there's one in progress +requesting({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> + SortedTopics = lists:usort(NewTopics), % This also removes repeated entries + % If the request is for known topics, then we can safely ignore it, otherwise, + % queue a metadata request + NewState = case SortedTopics -- KnownTopics of + [] -> State; + _ -> request_metadata([]), + State#state{known_topics = lists:umerge(KnownTopics, + SortedTopics)} + end, + {next_state, requesting, NewState}; +% Handle the updated metadata +requesting({metadata_updated, RawMapping}, State) -> + % Create the topic mapping (this also starts the broker connections) + NewMapping = get_broker_mapping(RawMapping, State), + _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping]), + ok = kafkerl_connector:topic_mapping_updated(NewMapping), + {next_state, idle, State}; +% If we have no more retries left, go on cooldown +requesting({metadata_retry, 0}, State = #state{cooldown = Cooldown}) -> + Params = [?MODULE, on_timer], + {ok, _} = timer:apply_after(Cooldown, gen_fsm, send_event, Params), + {next_state, on_cooldown, State}; +% If we have more retries to do, schedule a new retry +requesting({metadata_retry, Retries}, State) -> + ok = schedule_metadata_request(Retries, State), + {next_state, requesting, State}. -on_cooldown(_, State) -> +-spec on_cooldown(any(), state()) -> {next_state, atom(), state()}. +on_cooldown({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> + % Since we are on cooldown (the time between consecutive requests) we only add + % the topics to the scheduled next request + SortedTopics = lists:usort(NewTopics), + State#state{known_topics = lists:umerge(KnownTopics, SortedTopics)}; +on_cooldown(on_timer, State) -> + ok = schedule_metadata_request(State), + {next_state, requesting, State}. + +%%============================================================================== +%% Events +%%============================================================================== +handle_sync_event(get_known_topics, _From, StateName, State) -> + Reply = State#state.known_topics, + {reply, Reply, StateName, State}. + +%%============================================================================== +%% gen_fsm boilerplate +%%============================================================================== +-spec handle_info(any(), atom(), state()) -> {next_state, atom(), state()}. +handle_info(Message, StateName, State) -> + lager:info("received unexpected message ~p", [Message]), + {next_state, StateName, State}. + +-spec code_change(any(), atom(), state(), any()) -> {ok, atom(), state()}. +code_change(_OldVsn, StateName, StateData, _Extra) -> + {ok, StateName, StateData}. + +-spec terminate(any(), atom(), state()) -> ok. +terminate(_Reason, _StateName, _StateData) -> ok. +-spec handle_event(any(), atom(), state()) -> {next_state, atom(), state()}. +handle_event(_Event, StateName, StateData) -> + {next_state, StateName, StateData}. + +%-spec handle_sync_event(any(), any(), atom(), state()) -> +% {next_state, atom(), state()}. +%handle_sync_event(_Event, _From, StateName, StateData) -> +% {next_state, StateName, StateData}. + %%============================================================================== %% Handlers %%============================================================================== init([Config]) -> - Schema = [{metadata_tcp_timeout, positive_integer, {default, 1500}}, + Schema = [{client_id, binary, {default, <<"kafkerl_client">>}}, + {metadata_tcp_timeout, positive_integer, {default, 1500}}, {metadata_request_cooldown, positive_integer, {default, 333}}, - {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}], + {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, + {brokers, [{string, {integer, {1, 65535}}}], required}, + {topics, [binary], required}], case normalizerl:normalize_proplist(Schema, Config) of - {ok, [RetryInterval, MetadataRequestCD, MaxMetadataRetries]} -> - State = #state{config = Config, - retry_interval = RetryInterval, - metadata_request_cd = MetadataRequestCD, - max_metadata_retries = MaxMetadataRetries}, - {ok, State}; + {ok, [ClientId, RetryInterval, Cooldown, MaxRetries, Brokers, Topics]} -> + State = #state{config = Config, + known_topics = Topics, + brokers = Brokers, + cooldown = Cooldown, + client_id = ClientId, + max_retries = MaxRetries, + retry_interval = RetryInterval}, + {ok, idle, State}; {errors, Errors} -> ok = lists:foreach(fun(E) -> _ = lager:critical("Metadata config error ~p", [E]) end, Errors), {stop, bad_config} + end. + +%%============================================================================== +%% Request logic +%%============================================================================== +schedule_metadata_request(State) -> + schedule_metadata_request(undefined, State). + +schedule_metadata_request(Retries, State = #state{brokers = Brokers, + max_retries = MaxRetries, + known_topics = Topics, + retry_interval = Interval}) -> + Request = metadata_request(State, Topics), + case Retries of + undefined -> + Params = [Brokers, Request, MaxRetries], + _ = spawn(?MODULE, make_request, Params); + _ -> + Params = [Brokers, Request, Retries], + {ok, _} = timer:apply_after(Interval, ?MODULE, make_request, Params) + end, + ok. + +make_request(Brokers, Request, Retries) -> + case do_request_metadata(Brokers, Request) of + {ok, TopicMapping} -> + gen_fsm:send_event(?MODULE, {metadata_updated, TopicMapping}); + Error -> + _ = lager:debug("Metadata request error: ~p", [Error]), + NewRetries = case Retries of -1 -> -1; _ -> Retries - 1 end, + gen_fsm:send_event(?MODULE, {metadata_retry, NewRetries}) + end. + +do_request_metadata([], _Request) -> + {error, all_down}; +do_request_metadata([{Host, Port} = _Broker | T], Request) -> + _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + % Connect to the Broker + case gen_tcp:connect(Host, Port, get_metadata_tcp_options()) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % Failed, try with the next one in the list + do_request_metadata(T, Request); + {ok, Socket} -> + % On success, send the metadata request + case gen_tcp:send(Socket, Request) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % Unable to send request, try the next broker + do_request_metadata(T, Request); + ok -> + case gen_tcp:recv(Socket, 0, 6000) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + gen_tcp:close(Socket), + % Nothing received (probably a timeout), try the next broker + do_request_metadata(T, Request); + {ok, Data} -> + gen_tcp:close(Socket), + case kafkerl_protocol:parse_metadata_response(Data) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % The parsing failed, try the next broker + do_request_metadata(T, Request); + {ok, _CorrelationId, Metadata} -> + % We received a metadata response, make sure it has brokers + {ok, get_topic_mapping(Metadata)} + end + end + end + end. + +%%============================================================================== +%% Utils +%%============================================================================== +get_metadata_tcp_options() -> + kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). + +log_metadata_request_error(Host, Port, Reason) -> + _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). + +metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> + kafkerl_protocol:build_metadata_request([], 0, ClientId); +metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, + NewTopics) -> + AllTopics = lists:umerge(KnownTopics, NewTopics), + kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). + +%%============================================================================== +%% Topic/broker mapping +%%============================================================================== +get_topic_mapping({BrokerMetadata, TopicMetadata}) -> + % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] + Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), + % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] + Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), + % Converts the BrokerIds from the previous array into socket addresses + lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> + case lists:keyfind(BrokerId, 1, BrokerMetadata) of + {BrokerId, HostData} -> + {true, {{Topic, Partition, BrokerId}, HostData}}; + _Any -> + false + end + end, Partitions). + +expand_topic({?NO_ERROR, Topic, Partitions}) -> + {true, {Topic, Partitions}}; +expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> + % Replica not available can be ignored, still, show a warning + _ = lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, Partitions}}; +expand_topic({Error, Topic, _Partitions}) -> + _ = lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, []}}. + +expand_partitions(Metadata) -> + expand_partitions(Metadata, []). + +expand_partitions({_Topic, []}, Acc) -> + {true, Acc}; +expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, + _, _} | T]}, Acc) -> + _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> + _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + expand_partitions({Topic, T}, Acc). + +get_broker_mapping(TopicMapping, State) -> + get_broker_mapping(TopicMapping, State, 0, []). + +get_broker_mapping([], _State, _N, Acc) -> + [{Key, Address} || {_ConnId, Key, Address} <- Acc]; +get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], + State = #state{config = Config}, N, Acc) -> + Buffer = kafkerl_utils:buffer_name(Topic, Partition), + _ = ets_buffer:create(Buffer, fifo), + {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of + false -> + {start_broker_connection(N, Address, Config), N + 1}; + {ConnId, _, BrokerConnection} -> + {BrokerConnection, N} + end, + + Buffer = kafkerl_utils:buffer_name(Topic, Partition), + _ = ets_buffer:create(Buffer, fifo), + kafkerl_broker_connection:add_buffer(Conn, Buffer), + + NewMapping = {ConnId, {Topic, Partition}, Conn}, + get_broker_mapping(T, State, NewN, [NewMapping | Acc]). + +start_broker_connection(N, Address, Config) -> + case kafkerl_broker_connection:start_link(N, Address, Config) of + {ok, Name, _Pid} -> + Name; + {error, {already_started, Pid}} -> + kafkerl_broker_connection:clear_buffers(Pid), + Pid end. \ No newline at end of file diff --git a/src/kafkerl_sup.erl b/src/kafkerl_sup.erl index fd989b4..e333131 100644 --- a/src/kafkerl_sup.erl +++ b/src/kafkerl_sup.erl @@ -33,9 +33,8 @@ init([]) -> {ok, {{one_for_one, 5, 10}, ChildSpecs}}. get_connector_child_spec() -> - Name = application:get_env(kafkerl, gen_server_name, kafkerl), {ok, ConnConfig} = application:get_env(kafkerl, conn_config), Topics = application:get_env(kafkerl, topics, []), - Params = [Name, [{topics, Topics} | ConnConfig]], + Params = [[{topics, Topics} | ConnConfig]], MFA = {kafkerl_connector, start_link, Params}, - {Name, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file + {kafkerl, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 79a718b..03efb71 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -78,12 +78,12 @@ buffer_name(Topic, Partition) -> default_buffer_name() -> default_message_buffer. --type proplist_value() :: {atom(), any()}. +-type proplist_value() :: {atom(), any()} | atom(). -type proplist() :: [proplist_value]. -spec proplists_set(proplist(), proplist_value() | [proplist_value()]) -> proplist(). proplists_set(Proplist, {K, _V} = NewValue) -> - lists:keyreplace(K, 1, Proplist, NewValue); + lists:keystore(K, 1, proplists:unfold(Proplist), NewValue); proplists_set(Proplist, []) -> Proplist; proplists_set(Proplist, [H | T]) -> From 0b6d6a3802fc7a785b1a3b1c94e5d24603881e80 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 15 Apr 2016 11:54:25 -0300 Subject: [PATCH 30/72] version bump --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e437ff..984e574 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -kafkerl v2.0.0 +kafkerl v3.0.0 ============== [![Gitter](https://badges.gitter.im/Join Chat.svg)](https://gitter.im/HernanRivasAcosta/kafkerl?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) From 302a348463df3323fc961d42dc8c135384040215 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 13 May 2016 16:19:39 -0300 Subject: [PATCH 31/72] improved error messages for the default buffer --- run.sh => run | 0 src/kafkerl_connector.erl | 7 ++++--- 2 files changed, 4 insertions(+), 3 deletions(-) rename run.sh => run (100%) diff --git a/run.sh b/run similarity index 100% rename from run.sh rename to run diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index e1c3e54..5f7f2bd 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -61,13 +61,14 @@ send({Topic, Partition, _Payload} = Message) -> NewSize when is_integer(NewSize) -> ok; Error -> - _ = lager:debug("unable to write on ~p, reason: ~p", [Buffer, Error]), + _ = lager:debug("error writing on ~p, reason: ~p", [Buffer, Error]), case ets_buffer:write(kafkerl_utils:default_buffer_name(), Message) of NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> ok; _ -> - _ = lager:critical("unable to write to default buffer, reason: ~p", - [Error]), + _ = lager:critical("unable to write to default buffer, the message ~p" + " was lost lost, reason: ~p", + [Message, Error]), ok end end. From a542db7864168423416f0874961ec90edcbed696 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Mon, 16 May 2016 15:41:43 -0300 Subject: [PATCH 32/72] improvements to the return values of the produce funcion --- src/kafkerl.erl | 3 ++- src/kafkerl_connector.erl | 15 +++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 3b423c6..a0c15b8 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -24,6 +24,7 @@ -type options() :: [option()]. -type server_ref() :: atom() | pid(). +-type ok() :: {ok, atom()}. -type error() :: {error, atom() | {atom(), any()}}. -type topic() :: binary(). @@ -48,7 +49,7 @@ start(_StartType, _StartArgs) -> %% Access API %%============================================================================== %% Produce API --spec produce(topic(), partition(), payload()) -> ok. +-spec produce(topic(), partition(), payload()) -> ok() | error(). produce(Topic, Partition, Message) -> kafkerl_connector:send({Topic, Partition, Message}). diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 5f7f2bd..f3cde39 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -53,23 +53,26 @@ start_link(Config) -> gen_server:start_link({local, kafkerl}, ?MODULE, [Config], []). --spec send(kafkerl:basic_message()) -> - ok | kafkerl:error(). +-spec send(kafkerl:basic_message()) -> kafkerl:ok() | kafkerl:error(). send({Topic, Partition, _Payload} = Message) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), case ets_buffer:write(Buffer, Message) of NewSize when is_integer(NewSize) -> - ok; + % Return 'saved' when the message went to the right ETS + {ok, saved}; Error -> _ = lager:debug("error writing on ~p, reason: ~p", [Buffer, Error]), case ets_buffer:write(kafkerl_utils:default_buffer_name(), Message) of - NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> - ok; + NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> + % We return 'cached' when we needed to use the default ets table + {ok, cached}; _ -> _ = lager:critical("unable to write to default buffer, the message ~p" " was lost lost, reason: ~p", [Message, Error]), - ok + % We can safely assume that the ets existance indicates if kafkerl was + % started + {error, not_started} end end. From e9348f5fdabe928660905cfb72ad632f726df900 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Wed, 15 Jun 2016 17:13:38 +0300 Subject: [PATCH 33/72] multiple improvements --- .gitignore | 4 +- Makefile | 22 ++-------- bin/clear_kafkerl_test_topics.sh | 1 - bin/create_test_topics.sh | 6 +-- bin/delete_test_topics.sh | 1 + rel/kafkerl.app.config | 4 +- src/kafkerl.erl | 21 ++++++++-- src/kafkerl_connector.erl | 11 +++-- src/kafkerl_test_utils.erl | 55 +++++++++++++++++++++++++ src/kafkerl_utils.erl | 14 +------ test/kafkerl_SUITE.erl | 68 +++++++++++++++++++++++++++++++ test/message_validation_SUITE.erl | 67 ------------------------------ 12 files changed, 159 insertions(+), 115 deletions(-) delete mode 100755 bin/clear_kafkerl_test_topics.sh create mode 100755 bin/delete_test_topics.sh create mode 100644 src/kafkerl_test_utils.erl create mode 100644 test/kafkerl_SUITE.erl delete mode 100644 test/message_validation_SUITE.erl diff --git a/.gitignore b/.gitignore index cd4d9f8..960a23e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,6 @@ log rebar .rebar *.dump -bin/tmp \ No newline at end of file +bin/tmp +logs/ +rel/ \ No newline at end of file diff --git a/Makefile b/Makefile index 22ee8ad..bf16ef0 100644 --- a/Makefile +++ b/Makefile @@ -4,22 +4,10 @@ ERL ?= erl RUN := ${ERL} -pa ebin -pa deps/*/ebin -smp enable -s lager -setcookie ${COOKIE} -config ${CONFIG} -boot start_sasl ${ERL_ARGS} NODE ?= kafkerl CT_ARGS ?= "-vvv" -CT_LOG ?= /logs/ct -ERLARGS=-pa ${DEPS} -pa ${APPS} -smp enable -boot start_sasl -args_file ${VM_ARGS} -s lager -s redis_config -TEST_ERL_ARGS ?= ${ERLARGS} -args_file ${TEST_VM_ARGS} -config ${TEST_CONFIG} +ERLARGS=-config ${CONFIG} +TEST_ERL_ARGS ?= ${ERLARGS} REBAR ?= "rebar" -ifdef CT_SUITES - CT_SUITES_="suites=${CT_SUITES}" -else - CT_SUITES_="" -endif -ifdef CT_CASE - CT_CASE_="case=${CT_CASE}" -else - CT_CASE_="" -endif - all: ${REBAR} get-deps compile @@ -74,8 +62,4 @@ test: tests tests: @${REBAR} compile skip_deps=true - @rm -rf ${CT_LOG} - @mkdir -p ${CT_LOG} - @ERL_FLAGS="${TEST_ERL_ARGS}" \ - ERL_AFLAGS="${TEST_ERL_ARGS}" \ - ${REBAR} -v 3 skip_deps=true ${CT_SUITES_} ${CT_CASE_} ct + ${REBAR} -v 3 skip_deps=true ct diff --git a/bin/clear_kafkerl_test_topics.sh b/bin/clear_kafkerl_test_topics.sh deleted file mode 100755 index 90f1920..0000000 --- a/bin/clear_kafkerl_test_topics.sh +++ /dev/null @@ -1 +0,0 @@ -rm -rf tmp/ \ No newline at end of file diff --git a/bin/create_test_topics.sh b/bin/create_test_topics.sh index 2bad117..d474341 100755 --- a/bin/create_test_topics.sh +++ b/bin/create_test_topics.sh @@ -23,6 +23,6 @@ done # make sure the path is defined if [ ! -d "${d}" ]; then echo "invalid kafka path ${d}" ; exit 1 ; fi -"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test1 --partitions 3 --replication-factor 3 -"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test2 --partitions 3 --replication-factor 3 -"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test3 --partitions 4 --replication-factor 3 \ No newline at end of file +"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic kafkerl_test1 --partitions 1 --replication-factor 3 +"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic kafkerl_test2 --partitions 2 --replication-factor 3 +"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic kafkerl_test3 --partitions 3 --replication-factor 3 \ No newline at end of file diff --git a/bin/delete_test_topics.sh b/bin/delete_test_topics.sh new file mode 100755 index 0000000..ad84185 --- /dev/null +++ b/bin/delete_test_topics.sh @@ -0,0 +1 @@ +rm -rf tmp \ No newline at end of file diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 0c88a29..ee73239 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -19,5 +19,5 @@ {flush_to_disk_every, 20000}, % In milliseconds {max_buffer_size, 5000}, {save_messages_for_bad_topics, true}]}, - {topics, [test1, test2, test3]}, - {tests, [{kafka_installation, "~/kafka"}]}]}]. \ No newline at end of file + {topics, []}, + {tests, [{kafkerl_path, "/usr/local/Cellar/kafka/0.8.2.2/"}]}]}]. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index a0c15b8..95f36d1 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -5,7 +5,8 @@ -export([produce/3, consume/2, consume/3, stop_consuming/2, request_metadata/0, request_metadata/1, - partitions/0]). + partitions/0, + subscribe/1, subscribe/2, unsubscribe/1]). -export([version/0]). %% Types @@ -15,6 +16,7 @@ fun() | {atom(), atom()} | {atom(), atom(), [any()]}. +-type filters() :: all | [atom()]. -type option() :: {buffer_size, integer() | infinity} | {consumer, callback()} | {min_bytes, integer()} | @@ -33,7 +35,7 @@ -type basic_message() :: {topic(), partition(), payload()}. -export_type([server_ref/0, error/0, options/0, topic/0, partition/0, payload/0, - callback/0, basic_message/0]). + callback/0, basic_message/0, filters/0]). %%============================================================================== %% API @@ -54,7 +56,7 @@ produce(Topic, Partition, Message) -> kafkerl_connector:send({Topic, Partition, Message}). %% Consume API --spec consume(topic(), partition()) -> ok | error(). +-spec consume(topic(), partition()) -> {[payload()], offset()} | error(). consume(Topic, Partition) -> consume(Topic, Partition, []). @@ -92,6 +94,19 @@ request_metadata(Topics) when is_list(Topics) -> partitions() -> kafkerl_connector:get_partitions(). +%% Events +-spec subscribe(callback()) -> ok | error(). +subscribe(Callback) -> + kafkerl_connector:subscribe(Callback). + +-spec subscribe(callback(), filters()) -> ok | error(). +subscribe(Callback, Filters) -> + kafkerl_connector:subscribe(Callback, Filters). + +-spec unsubscribe(callback()) -> ok. +unsubscribe(Callback) -> + kafkerl_connector:unsubscribe(Callback). + %% Utils -spec version() -> {integer(), integer(), integer()}. version() -> diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index f3cde39..447c64c 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -28,15 +28,14 @@ -type kafler_port() :: 1..65535. -type address() :: {kafler_host(), kafler_port()}. --type filters() :: all | [atom()]. - -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. -record(state, {broker_mapping = void :: [broker_mapping()] | void, config = [] :: [{atom(), any()}], autocreate_topics = false :: boolean(), - callbacks = [] :: [{filters(), kafkerl:callback()}], + callbacks = [] :: [{kafkerl:filters(), + kafkerl:callback()}], last_dump_name = {"", 0} :: {string(), integer()}, default_fetch_options = [] :: kafkerl:options(), dump_location = "" :: string(), @@ -98,9 +97,9 @@ get_partitions() -> -spec subscribe(kafkerl:callback()) -> ok | kafkerl:error(). subscribe(Callback) -> subscribe(Callback, all). --spec subscribe(kafkerl:callback(), filters()) -> ok | kafkerl:error(). -subscribe(Callback, Filter) -> - gen_server:call(kafkerl, {subscribe, {Filter, Callback}}). +-spec subscribe(kafkerl:callback(), kafkerl:filters()) -> ok | kafkerl:error(). +subscribe(Callback, Filters) -> + gen_server:call(kafkerl, {subscribe, {Filters, Callback}}). -spec unsubscribe(kafkerl:callback()) -> ok. unsubscribe(Callback) -> diff --git a/src/kafkerl_test_utils.erl b/src/kafkerl_test_utils.erl new file mode 100644 index 0000000..1a79c7e --- /dev/null +++ b/src/kafkerl_test_utils.erl @@ -0,0 +1,55 @@ +-module(kafkerl_test_utils). +-author('hernanrivasacosta@gmail.com'). + +-export([start_kafka/0, start_kafka/1, create_test_topics/0, stop_kafka/0]). + +%%============================================================================== +%% API +%%============================================================================== +-spec start_kafka() -> ok. +start_kafka() -> + start_kafka(false). + +-spec start_kafka(boolean()) -> ok. +start_kafka(CreateTestTopics) -> + % Clean all the logs + lager:critical("1"), + [] = os:cmd("rm -rf bin/tmp"), + % Start zookeeper and kafka + lager:critical("2"), + Path = get_path(), + lager:critical("3"), + [] = os:cmd("./bin/start_zk.sh -d " ++ Path ++ " -c bin/zookeeper.properties"), + lager:critical("4"), + [] = os:cmd("./bin/start_broker.sh -d " ++ Path ++ " -c bin/server0.properties"), + lager:critical("5"), + [] = os:cmd("./bin/start_broker.sh -d " ++ Path ++ " -c bin/server1.properties"), + lager:critical("6"), + [] = os:cmd("./bin/start_broker.sh -d " ++ Path ++ " -c bin/server2.properties"), + lager:critical("7"), + % Create the test topics and partitions + case CreateTestTopics of + true -> create_test_topics(); + false -> ok + end. + +-spec create_test_topics() -> ok. +create_test_topics() -> + Path = get_path(), + % TODO: If kafka doesn't start properly, this will never return + [] = os:cmd("./bin/create_test_topics.sh -d " ++ Path), + ok. + +-spec stop_kafka() -> ok. +stop_kafka() -> + % Stop both zookeeper and kafka + [] = os:cmd("./bin/stop_zk.sh"), + [] = os:cmd("./bin/stop_all_brokers.sh"), + ok. + +%%============================================================================== +%% Utils +%%============================================================================== +get_path() -> + {ok, TestProps} = application:get_env(kafkerl, tests), + proplists:get_value(kafkerl_path, TestProps). \ No newline at end of file diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 03efb71..e7e3b34 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -3,7 +3,7 @@ -export([send_event/2, send_error/2]). -export([get_tcp_options/1]). --export([merge_messages/1, split_messages/1, valid_message/1]). +-export([merge_messages/1, split_messages/1]). -export([buffer_name/2, default_buffer_name/0]). -export([gather_consume_responses/0, gather_consume_responses/1]). -export([proplists_set/2]). @@ -56,18 +56,6 @@ split_messages({Topic, Partitions}) -> split_messages(Topics) -> lists:flatten([split_messages(Topic) || Topic <- Topics]). --spec valid_message(any()) -> boolean(). -valid_message({Topic, Partition, Messages}) -> - is_binary(Topic) andalso is_integer(Partition) andalso Partition >= 0 andalso - (is_binary(Messages) orelse is_list_of_binaries(Messages)); -valid_message({Topic, Partition}) -> - is_binary(Topic) andalso (is_partition(Partition) orelse - is_partition_list(Partition)); -valid_message(L) when is_list(L) -> - lists:all(fun valid_message/1, L); -valid_message(_Any) -> - false. - -spec buffer_name(kafkerl_protocol:topic(), kafkerl_protocol:partition()) -> atom(). buffer_name(Topic, Partition) -> diff --git a/test/kafkerl_SUITE.erl b/test/kafkerl_SUITE.erl new file mode 100644 index 0000000..a81ca26 --- /dev/null +++ b/test/kafkerl_SUITE.erl @@ -0,0 +1,68 @@ +-module(kafkerl_SUITE). +-author('hernanrivasacosta@gmail.com'). + +-export([produce_and_consume/1]). + +-export([init_per_suite/1, end_per_suite/1, init_per_testcase/2, + end_per_testcase/2, all/0]). + +-type config() :: [{atom(), term()}]. + +-spec all() -> [atom()]. +all() -> + [produce_and_consume]. + +-spec init_per_suite(config()) -> [config()]. +init_per_suite(Config) -> + Config. + +-spec end_per_suite(config()) -> [config()]. +end_per_suite(Config) -> + Config. + +-spec init_per_testcase(atom(), config()) -> [config()]. +init_per_testcase(TestCase, Config) -> + Config. + +-spec end_per_testcase(atom(), config()) -> [config()]. +end_per_testcase(TestCase, Config) -> + kafkerl_test_utils:stop_kafka(), + Config. + +%%============================================================================== +%% Tests +%%============================================================================== +-spec produce_and_consume(config()) -> ok. +produce_and_consume(_Config) -> + % Start by producing a message while kafkerl has not been started + ct:pal("sending initial message"), + {error, not_started} = kafkerl:produce(<<"kafkerl_test3">>, 0, <<"ignore">>), + % Start kafkerl + ct:pal("starting kafkerl"), + ok = kafkerl:start(), + % Produce on some non existing topic, it will be cached + ct:pal("producing a message that will be cached"), + {ok, cached} = kafkerl:produce(<<"kafkerl_test3">>, 0, <<"msg1">>), + % Start kafka + ct:pal("starting kafkerl"), + ok = kafkerl_test_utils:start_kafka(), + % Create the topics and get the metadata + %ct:pal("create the topics"), + %ok = kafkerl_test_utils:create_test_topics(), + ct:pal("request the metadata"), + ok = kafkerl:request_metadata(), + % Wait for the metadata to be updated + ok = receive + {partition_update, PU = [_ | _]} -> + ct:pal("got an update (~p)!", [PU]), + ok + after 7500 -> + ct:pal("no update :("), + error + end, + % Send another message + ct:pal("send a message"), + {ok, saved} = kafkerl:produce(<<"kafkerl_test3">>, 0, <<"msg2">>), + % Wait a bit for the messages to be sent + + ok. \ No newline at end of file diff --git a/test/message_validation_SUITE.erl b/test/message_validation_SUITE.erl deleted file mode 100644 index 682e832..0000000 --- a/test/message_validation_SUITE.erl +++ /dev/null @@ -1,67 +0,0 @@ --module(message_validation_SUITE). --author('hernanrivasacosta@gmail.com'). - --export([valid_messages/1, invalid_messages/1]). - --export([init_per_suite/1, end_per_suite/1, init_per_testcase/2, - end_per_testcase/2, all/0]). - --type config() :: [{atom(), term()}]. - --spec all() -> [atom()]. -all() -> - [valid_messages, invalid_messages]. - --spec init_per_suite(config()) -> [config()]. -init_per_suite(Config) -> - Config. - --spec end_per_suite(config()) -> [config()]. -end_per_suite(Config) -> - Config. - --spec init_per_testcase(atom(), config()) -> [config()]. -init_per_testcase(TestCase, Config) -> - Config. - --spec end_per_testcase(atom(), config()) -> [config()]. -end_per_testcase(TestCase, Config) -> - Config. - -%%============================================================================== -%% Tests -%%============================================================================== --spec valid_messages(config()) -> ok. -valid_messages(_Config) -> - true = kafkerl_producer:valid_topics({<<"topic">>, 1, <<"msg">>}), - true = kafkerl_producer:valid_topics({<<"topic">>, {1, <<"msg">>}}), - true = kafkerl_producer:valid_topics({<<"topic">>, [{1, <<"msg">>}]}), - true = kafkerl_producer:valid_topics({<<"topic">>, [{1, <<"msg1">>}, - {2, <<"msg2">>}]}), - true = kafkerl_producer:valid_topics([{<<"topic">>, 1, <<"msg">>}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, 1, <<"msg1">>}, - {<<"topic2">>, 1, <<"msg2">>}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, {1, <<"msg1">>}}, - {<<"topic2">>, 1, <<"msg2">>}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, [{1, <<"msg1">>}]}, - {<<"topic2">>, {1, <<"msg2">>}}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, [{1, <<"msg1">>}, - {2, <<"msg2">>}]}, - {<<"topic2">>, {1, <<"msg3">>}}]), - ok. - --spec invalid_messages(config()) -> ok. -invalid_messages(_Config) -> - false = kafkerl_producer:valid_topics(<<"test">>), - false = kafkerl_producer:valid_topics({<<"test">>, 1}), - false = kafkerl_producer:valid_topics({<<"test">>, <<"msg">>}), - false = kafkerl_producer:valid_topics({<<"test">>, [<<"msg">>]}), - false = kafkerl_producer:valid_topics({<<"test">>, [1, <<"msg">>]}), - false = kafkerl_producer:valid_topics([]), - false = kafkerl_producer:valid_topics([<<"test">>]), - false = kafkerl_producer:valid_topics({undefined, 1, <<"msg">>}), - false = kafkerl_producer:valid_topics({<<"topic">>, 1, undefined}), - false = kafkerl_producer:valid_topics([{<<"topic1">>, [{1, <<"msg1">>}, - {2, undefined}]}, - {<<"topic2">>, {1, <<"msg3">>}}]), - ok. \ No newline at end of file From fecbc3bd8381bab4209b0ec1dbc8d9cf988debca Mon Sep 17 00:00:00 2001 From: georgeye Date: Thu, 23 Jun 2016 11:15:59 -0700 Subject: [PATCH 34/72] update epocxy --- rebar.config | 2 +- src/kafkerl.app.src | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index 8134a63..f7bd45a 100644 --- a/rebar.config +++ b/rebar.config @@ -18,5 +18,5 @@ {deps, [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, - {epocxy, "1.0.0", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.0"}}}, + {epocxy, "1.0.1", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.1"}}}, {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. diff --git a/src/kafkerl.app.src b/src/kafkerl.app.src index c60cecf..8859cfb 100644 --- a/src/kafkerl.app.src +++ b/src/kafkerl.app.src @@ -1,6 +1,6 @@ {application, kafkerl, [{description, []}, - {vsn, "1.0"}, + {vsn, "1.1"}, {registered, []}, {applications, [kernel, stdlib]}, {mod, {kafkerl, []}}, From 05a50c933659ceb541a25e378074338de8209336 Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Thu, 7 Jul 2016 22:36:55 +0800 Subject: [PATCH 35/72] kafkerl refact --- rebar.config | 3 ++- rel/kafkerl.app.config | 2 +- src/kafkerl_buffer.erl | 34 ++++++++++++++++++++++++++ src/kafkerl_connector.erl | 2 +- src/kafkerl_metadata_handler.erl | 41 ++++++++++++++++++++++---------- 5 files changed, 67 insertions(+), 15 deletions(-) create mode 100644 src/kafkerl_buffer.erl diff --git a/rebar.config b/rebar.config index 2154eed..612d491 100644 --- a/rebar.config +++ b/rebar.config @@ -19,4 +19,5 @@ [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, {epocxy, "1.0.0", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.0"}}}, - {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. \ No newline at end of file + {eper, "0.69", {git, "git@github.com:tigertext/eper.git", "HEAD" }}, + {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index ee73239..9d7eb53 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -2,7 +2,7 @@ {handlers, [{lager_console_backend, [debug,true]}]}]}, {kafkerl, [{disabled, false}, {conn_config, [{brokers, [{"localhost", 9090}, - {"localhost", 9091}, + {"localhost", 9094}, {"localhost", 9092}]}, {client_id, kafkerl_client}, % Sent to kafka {max_broker_retries, 2}, diff --git a/src/kafkerl_buffer.erl b/src/kafkerl_buffer.erl new file mode 100644 index 0000000..4483120 --- /dev/null +++ b/src/kafkerl_buffer.erl @@ -0,0 +1,34 @@ + +-module(kafkerl_buffer). +-author("anders"). +-behavior(gen_server). + +%% API +-export([start_link/0, init/1, create_buffer/2, handle_call/3, start_link/1, create_buffer/1]). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +start_link(_) -> + start_link(). + + +init([]) -> + {ok, []}. + +create_buffer(Name, Type) -> + gen_server:call(?MODULE, {create_buffer, Name, Type}). +create_buffer(Name) -> + gen_server:call(?MODULE, {create_buffer, Name}). + +handle_call({create_buffer, Name, Type}, _from, State) -> + Alredy_Exists = ets_buffer:list(Name) =/= [], + Res = ets_buffer:create(Name, Type), + lager:debug("buffer ~p type ~p created ~p, already exists ~p", [Name, Type, Res, Alredy_Exists]), + {reply, ok, State}; +handle_call({create_buffer, Name}, _From, State) -> + Res = ets_buffer:create(Name), + lager:debug("buffer ~p created ~p", [Name, Res]), + {reply, ok, State}. + + diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 447c64c..72b3a95 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -207,7 +207,7 @@ init([Config]) -> autocreate_topics = AutocreateTopics, default_fetch_options = DefaultFetchOptions}, % Create a buffer to hold unsent messages - _ = ets_buffer:create(kafkerl_utils:default_buffer_name(), fifo), + _ = kafkerl_buffer:create_buffer(kafkerl_utils:default_buffer_name(), fifo), % Start the interval that manages the buffers holding unsent messages {ok, _TRef} = timer:send_interval(FlushToDiskInterval, dump_buffer_tick), ok = kafkerl_metadata_handler:request_metadata([]), diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index ef91183..58c9f90 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -22,7 +22,9 @@ retry_interval = 1 :: non_neg_integer(), cooldown = 0 :: integer(), known_topics = [] :: [kafkerl:topic()], - next_topics = [] :: [kafkerl:topic()]}). + next_topics = [] :: [kafkerl:topic()], + broker_connections = [], + connection_index = 0}). -type state() :: #state{}. %%============================================================================== @@ -70,10 +72,14 @@ requesting({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> % Handle the updated metadata requesting({metadata_updated, RawMapping}, State) -> % Create the topic mapping (this also starts the broker connections) - NewMapping = get_broker_mapping(RawMapping, State), - _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping]), - ok = kafkerl_connector:topic_mapping_updated(NewMapping), - {next_state, idle, State}; + {N, TopicMapping} = get_broker_mapping(RawMapping, State), + _ = lager:debug("Refreshed topic mapping: ~p", [TopicMapping]), + + NewMapping2 = [{{Topic, Partition}, Conn} || + {_ConnId, {Topic, Partition}, Conn} <- TopicMapping], + _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping2]), + ok = kafkerl_connector:topic_mapping_updated(NewMapping2), + {next_state, idle, State#state{connection_index = N, broker_connections = TopicMapping}}; % If we have no more retries left, go on cooldown requesting({metadata_retry, 0}, State = #state{cooldown = Cooldown}) -> Params = [?MODULE, on_timer], @@ -105,9 +111,18 @@ handle_sync_event(get_known_topics, _From, StateName, State) -> %% gen_fsm boilerplate %%============================================================================== -spec handle_info(any(), atom(), state()) -> {next_state, atom(), state()}. +handle_info({'EXIT', Pid, Reason}, StateName, State) -> + lager:info("process ~p crashed with reason ~p ", [Pid, Reason]), + BrokerConnections = [{Name, {Topic, Partition}, Conn} || {Name, {Topic, Partition}, Conn} <- State#state.broker_connections, + whereis(Conn) /= Pid, whereis(Conn) /= undefined], + lager:info("current connections ~p, updated connections ~p ~n", [State#state.broker_connections, BrokerConnections]), + timer:apply_after(1000, ?MODULE, request_metadata, [[]]), + {next_state, StateName, State#state{broker_connections = BrokerConnections}}; + handle_info(Message, StateName, State) -> - lager:info("received unexpected message ~p", [Message]), - {next_state, StateName, State}. + lager:info("received unexpected message ~p", [Message]), + {next_state, StateName, State}. + -spec code_change(any(), atom(), state(), any()) -> {ok, atom(), state()}. code_change(_OldVsn, StateName, StateData, _Extra) -> @@ -145,6 +160,7 @@ init([Config]) -> client_id = ClientId, max_retries = MaxRetries, retry_interval = RetryInterval}, + process_flag(trap_exit, true), {ok, idle, State}; {errors, Errors} -> ok = lists:foreach(fun(E) -> @@ -290,14 +306,15 @@ expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> - get_broker_mapping(TopicMapping, State, 0, []). + get_broker_mapping(TopicMapping, State, State#state.connection_index, + State#state.broker_connections). -get_broker_mapping([], _State, _N, Acc) -> - [{Key, Address} || {_ConnId, Key, Address} <- Acc]; +get_broker_mapping([], _State, N, Acc) -> + {N, lists:usort(Acc)}; get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], State = #state{config = Config}, N, Acc) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), + _ = kafkerl_buffer:create_buffer(Buffer, fifo), {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of false -> {start_broker_connection(N, Address, Config), N + 1}; @@ -306,7 +323,7 @@ get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], end, Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), + _ = kafkerl_buffer:create_buffer(Buffer, fifo), kafkerl_broker_connection:add_buffer(Conn, Buffer), NewMapping = {ConnId, {Topic, Partition}, Conn}, From 4210727cea867fc0ef1981bb2f6306950a4df12b Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Fri, 8 Jul 2016 08:06:25 +0800 Subject: [PATCH 36/72] add child --- src/kafkerl_sup.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/kafkerl_sup.erl b/src/kafkerl_sup.erl index e333131..8437c17 100644 --- a/src/kafkerl_sup.erl +++ b/src/kafkerl_sup.erl @@ -10,6 +10,7 @@ -type restart_strategy() :: {supervisor:strategy(), non_neg_integer(), non_neg_integer()}. +-define(CHILD(__Name, __Mod, __Args), {__Name, {__Mod, start_link, __Args}, permanent, 2000, worker, [__Mod]}). %%============================================================================== %% API @@ -28,13 +29,15 @@ init([]) -> lager:notice("Kafkerl is disabled, ignoring"), []; false -> - [get_connector_child_spec()] + [?CHILD(kafkerl_buffer, kafkerl_buffer, []), + get_connector_child_spec()] end, {ok, {{one_for_one, 5, 10}, ChildSpecs}}. + get_connector_child_spec() -> {ok, ConnConfig} = application:get_env(kafkerl, conn_config), Topics = application:get_env(kafkerl, topics, []), Params = [[{topics, Topics} | ConnConfig]], MFA = {kafkerl_connector, start_link, Params}, - {kafkerl, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file + {kafkerl, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file From cb813ffc61f56db6a28f6aa8c981c317d4ff6a70 Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Fri, 8 Jul 2016 23:03:13 +0800 Subject: [PATCH 37/72] handle broker leader change --- Makefile | 2 +- rel/kafkerl.app.config | 7 +- src/kafkerl_broker_connection.erl | 790 +++++++++++++++--------------- src/kafkerl_metadata_handler.erl | 418 ++++++++-------- src/kafkerl_protocol.erl | 2 +- 5 files changed, 622 insertions(+), 597 deletions(-) diff --git a/Makefile b/Makefile index bf16ef0..ed229fe 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ COOKIE ?= KAFKERL-EXAMPLE CONFIG ?= rel/kafkerl.app.config ERL ?= erl -RUN := ${ERL} -pa ebin -pa deps/*/ebin -smp enable -s lager -setcookie ${COOKIE} -config ${CONFIG} -boot start_sasl ${ERL_ARGS} +RUN := ${ERL} -pa ebin -pa deps/*/ebin -smp enable -s lager -s kafkerl -setcookie ${COOKIE} -config ${CONFIG} -boot start_sasl ${ERL_ARGS} NODE ?= kafkerl CT_ARGS ?= "-vvv" ERLARGS=-config ${CONFIG} diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 9d7eb53..cf6b2f3 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -1,9 +1,8 @@ [{lager, [{colored, true}, {handlers, [{lager_console_backend, [debug,true]}]}]}, {kafkerl, [{disabled, false}, - {conn_config, [{brokers, [{"localhost", 9090}, - {"localhost", 9094}, - {"localhost", 9092}]}, + {conn_config, [{brokers, [{"localhost", 9092}, {"localhost", 9094} + ]}, {client_id, kafkerl_client}, % Sent to kafka {max_broker_retries, 2}, {broker_tcp_timeout, 1000}, @@ -20,4 +19,4 @@ {max_buffer_size, 5000}, {save_messages_for_bad_topics, true}]}, {topics, []}, - {tests, [{kafkerl_path, "/usr/local/Cellar/kafka/0.8.2.2/"}]}]}]. \ No newline at end of file + {tests, [{kafkerl_path, "/usr/local/Cellar/kafka/0.8.2.2/"}]}]}]. diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 9e70e6e..f0610d2 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -4,117 +4,123 @@ -behaviour(gen_server). %% API --export([add_buffer/2, clear_buffers/1, fetch/4, stop_fetch/3]). +-export([add_buffer/2, clear_buffers/1, delete_buffer/2, fetch/4, stop_fetch/3]). % Only for internal use -export([connect/6]). % Supervisors -export([start_link/3]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, - handle_call/3, handle_cast/2, handle_info/2]). + handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). --type conn_idx() :: 0..1023. +-type conn_idx() :: 0..1023. -type start_link_response() :: {ok, atom(), pid()} | ignore | {error, any()}. --record(fetch, {correlation_id = 0 :: kafkerl_protocol:correlation_id(), - server_ref = undefined :: kafkerl:server_ref(), - topic = undefined :: kafkerl:topic(), - partition = undefined :: kafkerl:partition(), - options = undefined :: kafkerl:options(), - state = void :: kafkerl_protocol:fetch_state()}). - --record(state, {name = undefined :: atom(), - buffers = [] :: [atom()], - conn_idx = undefined :: conn_idx(), - client_id = undefined :: binary(), - socket = undefined :: port(), - address = undefined :: kafkerl_connector:address(), - tref = undefined :: any(), - tcp_options = [] :: [any()], - max_retries = 0 :: integer(), - retry_interval = 0 :: integer(), - request_number = 0 :: integer(), - pending_requests = [] :: [integer()], - max_time_queued = 0 :: integer(), - ets = undefined :: atom(), - fetches = [] :: [#fetch{}], - current_fetch = void :: kafkerl_protocol:correlation_id() | - void, - scheduled_fetches = [] :: [{{kafkerl:topic(), - kafkerl:partition()}, - timer:tref()}]}). +-record(fetch, {correlation_id = 0 :: kafkerl_protocol:correlation_id(), + server_ref = undefined :: kafkerl:server_ref(), + topic = undefined :: kafkerl:topic(), + partition = undefined :: kafkerl:partition(), + options = undefined :: kafkerl:options(), + state = void :: kafkerl_protocol:fetch_state()}). + +-record(state, {name = undefined :: atom(), + buffers = [] :: [atom()], + conn_idx = undefined :: conn_idx(), + client_id = undefined :: binary(), + socket = undefined :: port(), + address = undefined :: kafkerl_connector:address(), + tref = undefined :: any(), + tcp_options = [] :: [any()], + max_retries = 0 :: integer(), + retry_interval = 0 :: integer(), + request_number = 0 :: integer(), + pending_requests = [] :: [integer()], + max_time_queued = 0 :: integer(), + ets = undefined :: atom(), + fetches = [] :: [#fetch{}], + current_fetch = void :: kafkerl_protocol:correlation_id() | + void, + scheduled_fetches = [] :: [{{kafkerl:topic(), + kafkerl:partition()}, + timer:tref()}]}). -type state() :: #state{}. %%============================================================================== %% API %%============================================================================== -spec start_link(conn_idx(), kafkerl_connector:address(), any()) -> - start_link_response(). + start_link_response(). start_link(Id, Address, Config) -> - NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), - Name = list_to_atom(NameStr), - Params = [Id, Address, Config, Name], - case gen_server:start_link({local, Name}, ?MODULE, Params, []) of - {ok, Pid} -> - {ok, Name, Pid}; - Other -> - Other - end. + NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), + Name = list_to_atom(NameStr), + Params = [Id, Address, Config, Name], + case gen_server:start_link({local, Name}, ?MODULE, Params, []) of + {ok, Pid} -> + {ok, Name, Pid}; + Other -> + Other + end. -spec add_buffer(kafkerl:server_ref(), atom()) -> ok. add_buffer(ServerRef, Buffer) -> - gen_server:call(ServerRef, {add_buffer, Buffer}). + gen_server:call(ServerRef, {add_buffer, Buffer}). -spec clear_buffers(kafkerl:server_ref()) -> ok. clear_buffers(ServerRef) -> - gen_server:call(ServerRef, {clear_buffers}). + gen_server:call(ServerRef, {clear_buffers}). + +delete_buffer(ServerRef, Buffer) -> + gen_server:call(ServerRef, {delete_buffer, Buffer}). -spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), - kafkerl:options()) -> ok | kafkerl:error(). + kafkerl:options()) -> ok | kafkerl:error(). fetch(ServerRef, Topic, Partition, Options) -> - gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). + gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). -spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> - ok. + ok. stop_fetch(ServerRef, Topic, Partition) -> - gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). + gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). %%============================================================================== %% gen_server callbacks %%============================================================================== -spec handle_call(any(), any(), state()) -> {reply, ok, state()}. handle_call({add_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> - {reply, ok, State#state{buffers = [Buffer| Buffers]}}; + {reply, ok, State#state{buffers = [Buffer | Buffers]}}; handle_call({clear_buffers}, _From, State) -> - {reply, ok, State#state{buffers = []}}; + {reply, ok, State#state{buffers = []}}; +handle_call({delete_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> + {reply, ok, State#state{buffers = Buffers -- [Buffer]}}; + handle_call({fetch, ServerRef, Topic, Partition, Options}, _From, State) -> - handle_fetch(ServerRef, Topic, Partition, Options, State); + handle_fetch(ServerRef, Topic, Partition, Options, State); handle_call({stop_fetch, Topic, Partition}, _From, State) -> - handle_stop_fetch(Topic, Partition, State). + handle_stop_fetch(Topic, Partition, State). -spec handle_info(any(), state()) -> {noreply, state()}. handle_info({connected, Socket}, State) -> - handle_flush(State#state{socket = Socket}); + handle_flush(State#state{socket = Socket}); handle_info(connection_timeout, State) -> - {stop, {error, unable_to_connect}, State}; + {stop, {error, unable_to_connect}, State}; handle_info({tcp_closed, _Socket}, State = #state{name = Name, - address = {Host, Port}}) -> - _ = lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), - NewState = handle_tcp_close(State), - {noreply, NewState}; + address = {Host, Port}}) -> + _ = lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), + NewState = handle_tcp_close(State), + {noreply, NewState}; handle_info({tcp, _Socket, Bin}, State) -> - case handle_tcp_data(Bin, State) of - {ok, NewState} -> {noreply, NewState}; - {error, Reason} -> {stop, {error, Reason}, State} - end; + case handle_tcp_data(Bin, State) of + {ok, NewState} -> {noreply, NewState}; + {error, Reason} -> {stop, {error, Reason}, State} + end; handle_info({flush, Time}, State) -> - {ok, _Tref} = queue_flush(Time), - handle_flush(State); + {ok, _Tref} = queue_flush(Time), + handle_flush(State); handle_info(Msg, State = #state{name = Name}) -> - _ = lager:notice("~p got unexpected info message: ~p on ~p", [Name, Msg]), - {noreply, State}. + _ = lager:notice("~p got unexpected info message: ~p on ~p", [Name, Msg]), + {noreply, State}. % Boilerplate -spec handle_cast(any(), state()) -> {noreply, state()}. @@ -128,409 +134,413 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% Handlers %%============================================================================== init([Id, Address, Config, Name]) -> - Schema = [{tcp_options, [any], {default, []}}, - {retry_interval, positive_integer, {default, 1000}}, - {max_retries, positive_integer, {default, 3}}, - {client_id, binary, {default, <<"kafkerl_client">>}}, - {max_time_queued, positive_integer, required}], - case normalizerl:normalize_proplist(Schema, Config) of - {ok, [TCPOpts, RetryInterval, MaxRetries, ClientId, MaxTimeQueued]} -> - NewTCPOpts = kafkerl_utils:get_tcp_options(TCPOpts), - Ets = list_to_atom(atom_to_list(Name) ++ "_ets"), - _ = ets:new(Ets, ets_options()), - State = #state{ets = Ets, - name = Name, - conn_idx = Id, - address = Address, - client_id = ClientId, - max_retries = MaxRetries, - tcp_options = NewTCPOpts, - retry_interval = RetryInterval, - max_time_queued = MaxTimeQueued}, - Params = [self(), Name, NewTCPOpts, Address, RetryInterval, MaxRetries], - _Pid = spawn_link(?MODULE, connect, Params), - {ok, _Tref} = queue_flush(MaxTimeQueued), - {ok, State}; - {errors, Errors} -> - ok = lists:foreach(fun(E) -> - _ = lager:critical("configuration error: ~p", [E]) - end, Errors), - {stop, bad_config} - end. + Schema = [{tcp_options, [any], {default, []}}, + {retry_interval, positive_integer, {default, 1000}}, + {max_retries, positive_integer, {default, 3}}, + {client_id, binary, {default, <<"kafkerl_client">>}}, + {max_time_queued, positive_integer, required}], + case normalizerl:normalize_proplist(Schema, Config) of + {ok, [TCPOpts, RetryInterval, MaxRetries, ClientId, MaxTimeQueued]} -> + NewTCPOpts = kafkerl_utils:get_tcp_options(TCPOpts), + Ets = list_to_atom(atom_to_list(Name) ++ "_ets"), + _ = ets:new(Ets, ets_options()), + State = #state{ets = Ets, + name = Name, + conn_idx = Id, + address = Address, + client_id = ClientId, + max_retries = MaxRetries, + tcp_options = NewTCPOpts, + retry_interval = RetryInterval, + max_time_queued = MaxTimeQueued}, + Params = [self(), Name, NewTCPOpts, Address, RetryInterval, MaxRetries], + _Pid = spawn_link(?MODULE, connect, Params), + {ok, _Tref} = queue_flush(MaxTimeQueued), + {ok, State}; + {errors, Errors} -> + ok = lists:foreach(fun(E) -> + _ = lager:critical("configuration error: ~p", [E]) + end, Errors), + {stop, bad_config} + end. handle_flush(State = #state{socket = undefined}) -> - {noreply, State}; -handle_flush(State = #state{buffers = []}) -> - {noreply, State}; + {noreply, State}; +handle_flush(State = #state{buffers = [], name = Name}) -> + lager:error("process have connection to broker but no buffer binding!! kill me ~p", [Name]), + {noreply, State}; handle_flush(State = #state{ets = EtsName, socket = Socket, buffers = Buffers, - name = Name, client_id = ClientId}) -> - {ok, CorrelationId, NewState} = build_correlation_id(State), - % TODO: Maybe buffer all this messages in case something goes wrong - AllMessages = get_all_messages(Buffers), - case kafkerl_utils:merge_messages(AllMessages) of - [] -> - {noreply, NewState}; - MergedMessages -> - Request = kafkerl_protocol:build_produce_request(MergedMessages, - ClientId, - CorrelationId), - true = ets:insert_new(EtsName, {CorrelationId, MergedMessages}), - _ = lager:debug("~p sending ~p", [Name, Request]), - case gen_tcp:send(Socket, Request) of - {error, Reason} -> - _ = lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - gen_tcp:close(Socket), - ets:delete_all_objects(EtsName, CorrelationId), - ok = resend_messages(MergedMessages), - {noreply, handle_tcp_close(NewState)}; - ok -> - _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), - {noreply, NewState} - end - end. + name = Name, client_id = ClientId}) -> + {ok, CorrelationId, NewState} = build_correlation_id(State), + % TODO: Maybe buffer all this messages in case something goes wrong + AllMessages = get_all_messages(Buffers), + case kafkerl_utils:merge_messages(AllMessages) of + [] -> + {noreply, NewState}; + MergedMessages -> + Request = kafkerl_protocol:build_produce_request(MergedMessages, + ClientId, + CorrelationId), + true = ets:insert_new(EtsName, {CorrelationId, MergedMessages}), + _ = lager:debug("~p sending ~p", [Name, Request]), + case gen_tcp:send(Socket, Request) of + {error, Reason} -> + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + gen_tcp:close(Socket), + ets:delete_all_objects(EtsName, CorrelationId), + ok = resend_messages(MergedMessages), + {noreply, handle_tcp_close(NewState)}; + ok -> + _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), + {noreply, NewState} + end + end. handle_fetch(ServerRef, Topic, Partition, Options, - State = #state{fetches = Fetches, client_id = ClientId, - socket = Socket, name = Name, - scheduled_fetches = ScheduledFetches}) -> - Scheduled = proplists:get_bool(scheduled, Options), - case {get_fetch(Topic, Partition, Fetches), + State = #state{fetches = Fetches, client_id = ClientId, + socket = Socket, name = Name, + scheduled_fetches = ScheduledFetches}) -> + Scheduled = proplists:get_bool(scheduled, Options), + case {get_fetch(Topic, Partition, Fetches), lists:keytake({Topic, Partition}, 1, ScheduledFetches), Scheduled} of - % An scheduled fetch we can't identify? We ignore it - {_, false, true} -> - lager:warning("ignoring unknown scheduled fetch"), - {reply, ok, State}; - % We are already fetching that topic/partition pair - {#fetch{}, _, false} -> - {reply, {error, fetch_in_progress}, State}; - % We have a scheduled fetch for that topic/partition pair and this is not an - % scheduled fetch - {not_found, Tuple, false} when is_tuple(Tuple) -> - {reply, {error, fetch_in_progress}, State}; - % We have a valid fetch request! - {not_found, KeyTakeResult, Scheduled} -> - {ok, CorrelationId, NewState} = build_correlation_id(State), - Offset = proplists:get_value(offset, Options, 0), - Request = {Topic, {Partition, Offset, 2147483647}}, - MaxWait = proplists:get_value(max_wait, Options), - MinBytes = proplists:get_value(min_bytes, Options), - Payload = kafkerl_protocol:build_fetch_request(Request, - ClientId, - CorrelationId, - MaxWait, - MinBytes), - case gen_tcp:send(Socket, Payload) of - {error, Reason} -> - _ = lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - ok = gen_tcp:close(Socket), - {reply, {error, no_connection}, handle_tcp_close(State)}; - ok -> - _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), - NewFetch = #fetch{correlation_id = CorrelationId, - server_ref = ServerRef, - topic = Topic, - partition = Partition, - %options = [scheduled | Options]}, - options = Options}, - NewScheduledFetches = case KeyTakeResult of - false -> ScheduledFetches; - {_, _, List} -> List - end, - {reply, ok, NewState#state{fetches = [NewFetch | Fetches], - scheduled_fetches = NewScheduledFetches}} - end - end. + % An scheduled fetch we can't identify? We ignore it + {_, false, true} -> + lager:warning("ignoring unknown scheduled fetch"), + {reply, ok, State}; + % We are already fetching that topic/partition pair + {#fetch{}, _, false} -> + {reply, {error, fetch_in_progress}, State}; + % We have a scheduled fetch for that topic/partition pair and this is not an + % scheduled fetch + {not_found, Tuple, false} when is_tuple(Tuple) -> + {reply, {error, fetch_in_progress}, State}; + % We have a valid fetch request! + {not_found, KeyTakeResult, Scheduled} -> + {ok, CorrelationId, NewState} = build_correlation_id(State), + Offset = proplists:get_value(offset, Options, 0), + Request = {Topic, {Partition, Offset, 2147483647}}, + MaxWait = proplists:get_value(max_wait, Options), + MinBytes = proplists:get_value(min_bytes, Options), + Payload = kafkerl_protocol:build_fetch_request(Request, + ClientId, + CorrelationId, + MaxWait, + MinBytes), + case gen_tcp:send(Socket, Payload) of + {error, Reason} -> + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + ok = gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(State)}; + ok -> + _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), + NewFetch = #fetch{correlation_id = CorrelationId, + server_ref = ServerRef, + topic = Topic, + partition = Partition, + %options = [scheduled | Options]}, + options = Options}, + NewScheduledFetches = case KeyTakeResult of + false -> ScheduledFetches; + {_, _, List} -> List + end, + {reply, ok, NewState#state{fetches = [NewFetch | Fetches], + scheduled_fetches = NewScheduledFetches}} + end + end. handle_stop_fetch(Topic, Partition, State) -> - % Cancel any timers we have for scheduled fetches - case lists:keytake({Topic, Partition}, 1, State#state.scheduled_fetches) of - false -> - NewFetches = remove_fetch(Topic, Partition, false, State#state.fetches), - {reply, ok, State#state{fetches = NewFetches}}; - {value, {{Topic, Partition}, TRef}, NewScheduledFetches} -> - _ = timer:cancel(TRef), - NewFetches = remove_fetch(Topic, Partition, force, State#state.fetches), - {reply, ok, State#state{fetches = NewFetches, - scheduled_fetches = NewScheduledFetches}} - end. + % Cancel any timers we have for scheduled fetches + case lists:keytake({Topic, Partition}, 1, State#state.scheduled_fetches) of + false -> + NewFetches = remove_fetch(Topic, Partition, false, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches}}; + {value, {{Topic, Partition}, TRef}, NewScheduledFetches} -> + _ = timer:cancel(TRef), + NewFetches = remove_fetch(Topic, Partition, force, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches, + scheduled_fetches = NewScheduledFetches}} + end. remove_fetch(Topic, Partition, Force, CurrentFetches) -> - remove_fetch(Topic, Partition, Force, CurrentFetches, []). + remove_fetch(Topic, Partition, Force, CurrentFetches, []). remove_fetch(_Topic, _Partition, _Force, [], Acc) -> - Acc; + Acc; remove_fetch(Topic, Partition, force, - [#fetch{topic = Topic, partition = Partition} | T], Acc) -> - % If we are forcing the removal, just remove the fetch - Acc ++ T; + [#fetch{topic = Topic, partition = Partition} | T], Acc) -> + % If we are forcing the removal, just remove the fetch + Acc ++ T; remove_fetch(Topic, Partition, _, - [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> - % Clearing the fetch options ensures this fetch will stop sending any messages - % since there is no consumer. This also removes the fetch_interval so it won't - % be requested again. - % Simply removing the fetch here doesn't work since we will still get a server - % response, but we won't be able to handle it. - [Fetch#fetch{options = []} | Acc] ++ T; + [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> + % Clearing the fetch options ensures this fetch will stop sending any messages + % since there is no consumer. This also removes the fetch_interval so it won't + % be requested again. + % Simply removing the fetch here doesn't work since we will still get a server + % response, but we won't be able to handle it. + [Fetch#fetch{options = []} | Acc] ++ T; remove_fetch(Topic, Partition, Force, [H | T], Acc) -> - remove_fetch(Topic, Partition, Force, T, [H | Acc]). + remove_fetch(Topic, Partition, Force, T, [H | Acc]). % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, - tcp_options = TCPOpts, - max_retries = MaxRetries, - address = Address, - name = Name}) -> - Params = [self(), Name, TCPOpts, Address, RetryInterval, MaxRetries], - _Pid = spawn_link(?MODULE, connect, Params), - State#state{socket = undefined}. + tcp_options = TCPOpts, + max_retries = MaxRetries, + address = Address, + name = Name}) -> + Params = [self(), Name, TCPOpts, Address, RetryInterval, MaxRetries], + _Pid = spawn_link(?MODULE, connect, Params), + State#state{socket = undefined}. handle_tcp_data(Bin, State = #state{fetches = Fetches, - current_fetch = CurrentFetch}) -> - {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, CurrentFetch), - case get_fetch(CorrelationId, Fetches) of - Fetch = #fetch{} -> - handle_fetch_response(Bin, Fetch, State); - _ -> - handle_produce_response(Bin, State) - end. + current_fetch = CurrentFetch}) -> + {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, CurrentFetch), + case get_fetch(CorrelationId, Fetches) of + Fetch = #fetch{} -> + handle_fetch_response(Bin, Fetch, State); + _ -> + handle_produce_response(Bin, State) + end. handle_fetch_response(Bin, Fetch, - State = #state{fetches = Fetches, - scheduled_fetches = ScheduledFetches}) -> - Options = Fetch#fetch.options, - Consumer = proplists:get_value(consumer, Options), - case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of - {ok, _CorrelationId, [{_, [{{_, Offset}, Messages}]}]} -> - % The messages can be empty, for example when there are no new messages in - % this partition, if that happens, don't send anything and end the fetch. - ok = send_messages(Consumer, - case Messages of - [] -> []; - _ -> [{consumed, Messages}, {offset, Offset}] - end), - NewFetches = lists:delete(Fetch, Fetches), - NewState = State#state{current_fetch = void, fetches = NewFetches}, - case proplists:get_value(fetch_interval, Options, false) of - false -> - {ok, NewState}; - Interval -> - NewOptions = kafkerl_utils:proplists_set(Options, [{scheduled, true}, - {offset, Offset}]), - Topic = Fetch#fetch.topic, - Partition = Fetch#fetch.partition, - ServerRef = Fetch#fetch.server_ref, - Arguments = [ServerRef, Topic, Partition, NewOptions], - {ok, Tref} = timer:apply_after(Interval, ?MODULE, fetch, Arguments), - NewScheduledFetches = [{{Topic, Partition}, Tref} | ScheduledFetches], - {ok, NewState#state{scheduled_fetches = NewScheduledFetches}} - end; - {incomplete, CorrelationId, Data, NewFetchState} -> - ok = case Data of - [{_, [{_, Messages = [_ | _]}]}] -> - send_messages(Consumer, {consumed, Messages}); - _ -> - % On some cases, kafka will return an incomplete response with no - % messages, but we shouldn't send the empty message list. - ok - end, - {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | - lists:delete(Fetch, Fetches)], - current_fetch = CorrelationId}}; - Error -> - ok = send_messages(Consumer, Error), - NewFetches = lists:delete(Fetch, Fetches), - {ok, State#state{current_fetch = void, fetches = NewFetches}} - end. + State = #state{fetches = Fetches, + scheduled_fetches = ScheduledFetches}) -> + Options = Fetch#fetch.options, + Consumer = proplists:get_value(consumer, Options), + case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of + {ok, _CorrelationId, [{_, [{{_, Offset}, Messages}]}]} -> + % The messages can be empty, for example when there are no new messages in + % this partition, if that happens, don't send anything and end the fetch. + ok = send_messages(Consumer, + case Messages of + [] -> []; + _ -> [{consumed, Messages}, {offset, Offset}] + end), + NewFetches = lists:delete(Fetch, Fetches), + NewState = State#state{current_fetch = void, fetches = NewFetches}, + case proplists:get_value(fetch_interval, Options, false) of + false -> + {ok, NewState}; + Interval -> + NewOptions = kafkerl_utils:proplists_set(Options, [{scheduled, true}, + {offset, Offset}]), + Topic = Fetch#fetch.topic, + Partition = Fetch#fetch.partition, + ServerRef = Fetch#fetch.server_ref, + Arguments = [ServerRef, Topic, Partition, NewOptions], + {ok, Tref} = timer:apply_after(Interval, ?MODULE, fetch, Arguments), + NewScheduledFetches = [{{Topic, Partition}, Tref} | ScheduledFetches], + {ok, NewState#state{scheduled_fetches = NewScheduledFetches}} + end; + {incomplete, CorrelationId, Data, NewFetchState} -> + ok = case Data of + [{_, [{_, Messages = [_ | _]}]}] -> + send_messages(Consumer, {consumed, Messages}); + _ -> + % On some cases, kafka will return an incomplete response with no + % messages, but we shouldn't send the empty message list. + ok + end, + {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | + lists:delete(Fetch, Fetches)], + current_fetch = CorrelationId}}; + Error -> + ok = send_messages(Consumer, Error), + NewFetches = lists:delete(Fetch, Fetches), + {ok, State#state{current_fetch = void, fetches = NewFetches}} + end. handle_produce_response(Bin, State = #state{name = Name, ets = EtsName}) -> - case kafkerl_protocol:parse_produce_response(Bin) of - {ok, CorrelationId, Topics} -> - case ets:lookup(EtsName, CorrelationId) of - [{CorrelationId, Messages}] -> - ets:delete(EtsName, CorrelationId), - {Errors, Successes} = split_errors_and_successes(Topics), - % First, send the offsets and messages that were delivered - _ = spawn(fun() -> - notify_success(Successes, Messages) - end), - % Then handle the errors - case handle_errors(Errors, Messages, Name) of - ignore -> - {ok, State}; - {request_metadata, MessagesToResend} -> - kafkerl_connector:request_metadata(), - ok = resend_messages(MessagesToResend), - {ok, State} - end; - _ -> - _ = lager:warning("~p was unable to get produce response", [Name]), - {error, invalid_produce_response} - end; - Other -> - _ = lager:critical("~p got unexpected response when parsing message: ~p", - [Name, Other]), - {ok, State} - end. + case kafkerl_protocol:parse_produce_response(Bin) of + {ok, CorrelationId, Topics} -> + case ets:lookup(EtsName, CorrelationId) of + [{CorrelationId, Messages}] -> + ets:delete(EtsName, CorrelationId), + {Errors, Successes} = split_errors_and_successes(Topics), + % First, send the offsets and messages that were delivered + _ = spawn(fun() -> + notify_success(Successes, Messages) + end), + % Then handle the errors + case handle_errors(Errors, Messages, Name) of + ignore -> + {ok, State}; + {request_metadata, MessagesToResend} -> + kafkerl_connector:request_metadata(), + ok = resend_messages(MessagesToResend), + {ok, State} + end; + _ -> + _ = lager:warning("~p was unable to get produce response", [Name]), + {error, invalid_produce_response} + end; + Other -> + _ = lager:critical("~p got unexpected response when parsing message: ~p", + [Name, Other]), + {ok, State} + end. %%============================================================================== %% Utils %%============================================================================== resend_messages(Messages) -> - F = fun(M) -> kafkerl_connector:send(M) end, - lists:foreach(F, Messages). + F = fun(M) -> kafkerl_connector:send(M) end, + lists:foreach(F, Messages). notify_success([], _Messages) -> - ok; + ok; notify_success([{Topic, Partition, Offset} | T], Messages) -> - MergedMessages = kafkerl_utils:merge_messages(Messages), - Partitions = partitions_in_topic(Topic, MergedMessages), - M = messages_in_partition(Partition, Partitions), - kafkerl_connector:produce_succeeded({Topic, Partition, M, Offset}), - notify_success(T, Messages). - + MergedMessages = kafkerl_utils:merge_messages(Messages), + Partitions = partitions_in_topic(Topic, MergedMessages), + M = messages_in_partition(Partition, Partitions), + kafkerl_connector:produce_succeeded({Topic, Partition, M, Offset}), + notify_success(T, Messages). + partitions_in_topic(Topic, Messages) -> - lists:flatten([P || {T, P} <- Messages, T =:= Topic]). + lists:flatten([P || {T, P} <- Messages, T =:= Topic]). messages_in_partition(Partition, Messages) -> - lists:flatten([M || {P, M} <- Messages, P =:= Partition]). + lists:flatten([M || {P, M} <- Messages, P =:= Partition]). build_correlation_id(State = #state{request_number = RequestNumber, - conn_idx = ConnIdx}) -> - % CorrelationIds are 32 bit integers, of those, the first 10 bits are used for - % the connectionId (hence the 1023 limit on it) and the other 22 bits are used - % for the sequential numbering, this magic number down here is actually 2^10-1 - NextRequest = case RequestNumber > 4194303 of - true -> 0; - false -> RequestNumber + 1 - end, - CorrelationId = (ConnIdx bsl 22) bor NextRequest, - {ok, CorrelationId, State#state{request_number = NextRequest}}. + conn_idx = ConnIdx}) -> + % CorrelationIds are 32 bit integers, of those, the first 10 bits are used for + % the connectionId (hence the 1023 limit on it) and the other 22 bits are used + % for the sequential numbering, this magic number down here is actually 2^10-1 + NextRequest = case RequestNumber > 4194303 of + true -> 0; + false -> RequestNumber + 1 + end, + CorrelationId = (ConnIdx bsl 22) bor NextRequest, + {ok, CorrelationId, State#state{request_number = NextRequest}}. split_errors_and_successes(Topics) -> - split_errors_and_successes(Topics, {[], []}). + split_errors_and_successes(Topics, {[], []}). split_errors_and_successes([], Acc) -> - Acc; + Acc; split_errors_and_successes([{Topic, Partitions} | T], Acc) -> - F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> - {E, [{Topic, Partition, Offset} | S]}; - ({Partition, Error, _}, {E, S}) -> - {[{Topic, Partition, Error} | E], S} - end, - split_errors_and_successes(T, lists:foldl(F, Acc, Partitions)). + F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> + {E, [{Topic, Partition, Offset} | S]}; + ({Partition, Error, _}, {E, S}) -> + {[{Topic, Partition, Error} | E], S} + end, + split_errors_and_successes(T, lists:foldl(F, Acc, Partitions)). handle_errors([], _Messages, _Name) -> - ignore; + ignore; handle_errors(Errors, Messages, Name) -> - F = fun(E) -> handle_error(E, Messages, Name) end, - case lists:filtermap(F, Errors) of - [] -> ignore; - L -> {request_metadata, L} - end. + lager:warning("error ~p in handling errors", [Errors]), + F = fun(E) -> handle_error(E, Messages, Name) end, + case lists:filtermap(F, Errors) of + [] -> ignore; + L -> + lager:warning("found errors when sending ~p, requesting metadata", [L]), + {request_metadata, L} + end. handle_error({Topic, Partition, Error}, Messages, Name) - when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse - Error =:= ?NOT_LEADER_FOR_PARTITION orelse - Error =:= ?LEADER_NOT_AVAILABLE -> - case get_message_for_error(Topic, Partition, Messages, Name) of - undefined -> false; - Message -> {true, Message} - end; + when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse + Error =:= ?NOT_LEADER_FOR_PARTITION orelse + Error =:= ?LEADER_NOT_AVAILABLE -> + case get_message_for_error(Topic, Partition, Messages, Name) of + undefined -> false; + Message -> {true, Message} + end; handle_error({Topic, Partition, Error}, _Messages, Name) -> - ErrorName = kafkerl_error:get_error_name(Error), - _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", - [Name, ErrorName, Topic, Partition]), - false. + ErrorName = kafkerl_error:get_error_name(Error), + _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", + [Name, ErrorName, Topic, Partition]), + false. get_message_for_error(Topic, Partition, SavedMessages, Name) -> - case lists:keyfind(Topic, 1, SavedMessages) of - false -> - _ = lager:error("~p found no messages for topic ~p, partition ~p", - [Name, Topic, Partition]), - undefined; - {Topic, Partitions} -> - case lists:keyfind(Partition, 1, Partitions) of - false -> - _ = lager:error("~p found no messages for topic ~p, partition ~p", - [Name, Topic, Partition]), - undefined; - {Partition, Messages} -> - {Topic, Partition, Messages} - end - end. + case lists:keyfind(Topic, 1, SavedMessages) of + false -> + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), + undefined; + {Topic, Partitions} -> + case lists:keyfind(Partition, 1, Partitions) of + false -> + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), + undefined; + {Partition, Messages} -> + {Topic, Partition, Messages} + end + end. connect(Pid, Name, _TCPOpts, {Host, Port} = _Address, _Timeout, 0) -> - _ = lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), - Pid ! connection_timeout; + _ = lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), + Pid ! connection_timeout; connect(Pid, Name, TCPOpts, {Host, Port} = Address, Timeout, Retries) -> - _ = lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), - case gen_tcp:connect(Host, Port, TCPOpts, 5000) of - {ok, Socket} -> - _ = lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), - gen_tcp:controlling_process(Socket, Pid), - Pid ! {connected, Socket}; - {error, Reason} -> - NewRetries = Retries - 1, - _ = lager:warning("~p unable to connect to ~p:~p. Reason: ~p + _ = lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), + case gen_tcp:connect(Host, Port, TCPOpts, 5000) of + {ok, Socket} -> + _ = lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), + gen_tcp:controlling_process(Socket, Pid), + Pid ! {connected, Socket}; + {error, Reason} -> + NewRetries = Retries - 1, + _ = lager:warning("~p unable to connect to ~p:~p. Reason: ~p (~p retries left)", - [Name, Host, Port, Reason, NewRetries]), - timer:sleep(Timeout), - connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) - end. + [Name, Host, Port, Reason, NewRetries]), + timer:sleep(Timeout), + connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) + end. queue_flush(Time) -> - timer:send_after(Time * 1000, {flush, Time}). + timer:send_after(Time * 1000, {flush, Time}). get_all_messages(Buffers) -> - get_all_messages(Buffers, []). + get_all_messages(Buffers, []). get_all_messages([], Acc) -> - Acc; + Acc; get_all_messages([H | T], Acc) -> - get_all_messages(T, Acc ++ get_messages_from(H, 20)). + get_all_messages(T, Acc ++ get_messages_from(H, 20)). get_messages_from(Ets, Retries) -> - case ets_buffer:read_all(Ets) of - L when is_list(L) -> - L; - _Error when Retries > 0 -> - get_messages_from(Ets, Retries - 1); - _Error -> - _ = lager:warning("giving up on reading from the ETS buffer"), - [] - end. + case ets_buffer:read_all(Ets) of + L when is_list(L) -> + L; + _Error when Retries > 0 -> + get_messages_from(Ets, Retries - 1); + _Error -> + _ = lager:warning("giving up on reading from the ETS buffer"), + [] + end. parse_correlation_id(Bin, void) -> - kafkerl_protocol:parse_correlation_id(Bin); + kafkerl_protocol:parse_correlation_id(Bin); parse_correlation_id(Bin, CorrelationId) -> - {ok, CorrelationId, Bin}. + {ok, CorrelationId, Bin}. get_fetch(_CorrelationId, []) -> - not_found; + not_found; get_fetch(CorrelationId, [H = #fetch{correlation_id = CorrelationId} | _T]) -> - H; + H; get_fetch(CorrelationId, [_H | T]) -> - get_fetch(CorrelationId, T). + get_fetch(CorrelationId, T). get_fetch(_Topic, _Partition, []) -> - not_found; + not_found; get_fetch(Topic, Partition, [H = #fetch{topic = Topic, - partition = Partition} | _T]) -> - H; + partition = Partition} | _T]) -> + H; get_fetch(Topic, Partition, [_H | T]) -> - get_fetch(Topic, Partition, T). + get_fetch(Topic, Partition, T). send_messages(_Consumer, []) -> - ok; + ok; send_messages(Consumer, [Event | T]) -> - case send_messages(Consumer, Event) of - ok -> send_messages(Consumer, T); - Error -> Error - end; + case send_messages(Consumer, Event) of + ok -> send_messages(Consumer, T); + Error -> Error + end; send_messages(Consumer, Event) -> - kafkerl_utils:send_event(Consumer, Event). + kafkerl_utils:send_event(Consumer, Event). ets_options() -> - [named_table, public, {write_concurrency, true}, {read_concurrency, true}]. \ No newline at end of file + [named_table, public, {write_concurrency, true}, {read_concurrency, true}]. \ No newline at end of file diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index 58c9f90..4ee5be4 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -11,20 +11,21 @@ -export([make_request/3]). %% gen_fsm -export([start_link/1, init/1, handle_info/3, terminate/3, code_change/4, - handle_event/3, handle_sync_event/4]). + handle_event/3, handle_sync_event/4]). -include("kafkerl.hrl"). - --record(state, {config = [] :: [{atom(), any()}], - client_id = <<>> :: kafkerl_protocol:client_id(), - brokers = [] :: [kafkerl_connector:address()], - max_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), - cooldown = 0 :: integer(), - known_topics = [] :: [kafkerl:topic()], - next_topics = [] :: [kafkerl:topic()], - broker_connections = [], - connection_index = 0}). +-type broker_id() :: non_neg_integer(). +-type broker_connection() :: {broker_id(), {kafkerl:topic(), kafkerl:partition()}, atom()}. +-record(state, {config = [] :: [{atom(), any()}], + client_id = <<>> :: kafkerl_protocol:client_id(), + brokers = [] :: [kafkerl_connector:address()], + max_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + cooldown = 0 :: integer(), + known_topics = [] :: [kafkerl:topic()], + next_topics = [] :: [kafkerl:topic()], + broker_connections = [] :: [broker_connection()], + connection_index = 0 :: non_neg_integer()}). -type state() :: #state{}. %%============================================================================== @@ -32,80 +33,82 @@ %%============================================================================== -spec start_link(any()) -> {ok, pid()} | ignore | kafkerl:error(). start_link(Config) -> - gen_fsm:start_link({local, ?MODULE}, ?MODULE, [Config], []). + gen_fsm:start_link({local, ?MODULE}, ?MODULE, [Config], []). -spec request_metadata([kafkerl:topic()]) -> ok. request_metadata(Topics) -> - gen_fsm:send_event(?MODULE, {request, Topics}). + gen_fsm:send_event(?MODULE, {request, Topics}). -spec get_known_topics() -> ok. get_known_topics() -> - gen_fsm:sync_send_all_state_event(?MODULE, get_known_topics). + gen_fsm:sync_send_all_state_event(?MODULE, get_known_topics). %%============================================================================== %% States %%============================================================================== -spec idle(any(), state()) -> {next_state, atom(), state()}. idle({request, Topics}, State = #state{known_topics = KnownTopics}) -> - % Add the requested topics to the state - SortedTopics = lists:usort(KnownTopics), - NewKnownTopics = lists:umerge(Topics, SortedTopics), - NewState = State#state{known_topics = NewKnownTopics}, - % Make the request - ok = schedule_metadata_request(NewState), - % And move the the requesting state - {next_state, requesting, NewState}. + % Add the requested topics to the state + SortedTopics = lists:usort(KnownTopics), + NewKnownTopics = lists:umerge(Topics, SortedTopics), + NewState = State#state{known_topics = NewKnownTopics}, + % Make the request + ok = schedule_metadata_request(NewState), + % And move the the requesting state + {next_state, requesting, NewState}. -spec requesting(any(), state()) -> {next_state, atom(), state()}. % Handle a new metadata request while there's one in progress requesting({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> - SortedTopics = lists:usort(NewTopics), % This also removes repeated entries - % If the request is for known topics, then we can safely ignore it, otherwise, - % queue a metadata request - NewState = case SortedTopics -- KnownTopics of - [] -> State; - _ -> request_metadata([]), - State#state{known_topics = lists:umerge(KnownTopics, - SortedTopics)} - end, - {next_state, requesting, NewState}; + SortedTopics = lists:usort(NewTopics), % This also removes repeated entries + % If the request is for known topics, then we can safely ignore it, otherwise, + % queue a metadata request + NewState = case SortedTopics -- KnownTopics of + [] -> State; + _ -> request_metadata([]), + State#state{known_topics = lists:umerge(KnownTopics, + SortedTopics)} + end, + {next_state, requesting, NewState}; % Handle the updated metadata requesting({metadata_updated, RawMapping}, State) -> - % Create the topic mapping (this also starts the broker connections) + % Create the topic mapping (this also starts the broker connections) {N, TopicMapping} = get_broker_mapping(RawMapping, State), - _ = lager:debug("Refreshed topic mapping: ~p", [TopicMapping]), - + OldMapping = State#state.broker_connections, + %%OldMapping -- TopicMapping =/= [] andalso + lager:warning("sync broker mappings old ~p new ~p", + [OldMapping, TopicMapping]), NewMapping2 = [{{Topic, Partition}, Conn} || - {_ConnId, {Topic, Partition}, Conn} <- TopicMapping], - _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping2]), - ok = kafkerl_connector:topic_mapping_updated(NewMapping2), - {next_state, idle, State#state{connection_index = N, broker_connections = TopicMapping}}; + {_ConnId, {Topic, Partition}, Conn} <- TopicMapping], + lager:debug("Refreshed topic mapping: ~p", [NewMapping2]), + ok = kafkerl_connector:topic_mapping_updated(NewMapping2), + {next_state, idle, State#state{connection_index = N, broker_connections = TopicMapping}}; % If we have no more retries left, go on cooldown requesting({metadata_retry, 0}, State = #state{cooldown = Cooldown}) -> - Params = [?MODULE, on_timer], - {ok, _} = timer:apply_after(Cooldown, gen_fsm, send_event, Params), - {next_state, on_cooldown, State}; + Params = [?MODULE, on_timer], + {ok, _} = timer:apply_after(Cooldown, gen_fsm, send_event, Params), + {next_state, on_cooldown, State}; % If we have more retries to do, schedule a new retry requesting({metadata_retry, Retries}, State) -> - ok = schedule_metadata_request(Retries, State), - {next_state, requesting, State}. + ok = schedule_metadata_request(Retries, State), + {next_state, requesting, State}. -spec on_cooldown(any(), state()) -> {next_state, atom(), state()}. on_cooldown({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> - % Since we are on cooldown (the time between consecutive requests) we only add - % the topics to the scheduled next request - SortedTopics = lists:usort(NewTopics), - State#state{known_topics = lists:umerge(KnownTopics, SortedTopics)}; + % Since we are on cooldown (the time between consecutive requests) we only add + % the topics to the scheduled next request + SortedTopics = lists:usort(NewTopics), + State#state{known_topics = lists:umerge(KnownTopics, SortedTopics)}; on_cooldown(on_timer, State) -> - ok = schedule_metadata_request(State), - {next_state, requesting, State}. + ok = schedule_metadata_request(State), + {next_state, requesting, State}. %%============================================================================== %% Events %%============================================================================== handle_sync_event(get_known_topics, _From, StateName, State) -> - Reply = State#state.known_topics, - {reply, Reply, StateName, State}. + Reply = State#state.known_topics, + {reply, Reply, StateName, State}. %%============================================================================== %% gen_fsm boilerplate @@ -126,15 +129,15 @@ handle_info(Message, StateName, State) -> -spec code_change(any(), atom(), state(), any()) -> {ok, atom(), state()}. code_change(_OldVsn, StateName, StateData, _Extra) -> - {ok, StateName, StateData}. + {ok, StateName, StateData}. -spec terminate(any(), atom(), state()) -> ok. terminate(_Reason, _StateName, _StateData) -> - ok. + ok. -spec handle_event(any(), atom(), state()) -> {next_state, atom(), state()}. handle_event(_Event, StateName, StateData) -> - {next_state, StateName, StateData}. + {next_state, StateName, StateData}. %-spec handle_sync_event(any(), any(), atom(), state()) -> % {next_state, atom(), state()}. @@ -145,195 +148,208 @@ handle_event(_Event, StateName, StateData) -> %% Handlers %%============================================================================== init([Config]) -> - Schema = [{client_id, binary, {default, <<"kafkerl_client">>}}, - {metadata_tcp_timeout, positive_integer, {default, 1500}}, - {metadata_request_cooldown, positive_integer, {default, 333}}, - {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, - {brokers, [{string, {integer, {1, 65535}}}], required}, - {topics, [binary], required}], - case normalizerl:normalize_proplist(Schema, Config) of - {ok, [ClientId, RetryInterval, Cooldown, MaxRetries, Brokers, Topics]} -> - State = #state{config = Config, - known_topics = Topics, - brokers = Brokers, - cooldown = Cooldown, - client_id = ClientId, - max_retries = MaxRetries, - retry_interval = RetryInterval}, - process_flag(trap_exit, true), - {ok, idle, State}; - {errors, Errors} -> - ok = lists:foreach(fun(E) -> - _ = lager:critical("Metadata config error ~p", [E]) - end, Errors), - {stop, bad_config} - end. + Schema = [{client_id, binary, {default, <<"kafkerl_client">>}}, + {metadata_tcp_timeout, positive_integer, {default, 1500}}, + {metadata_request_cooldown, positive_integer, {default, 333}}, + {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, + {brokers, [{string, {integer, {1, 65535}}}], required}, + {topics, [binary], required}], + case normalizerl:normalize_proplist(Schema, Config) of + {ok, [ClientId, RetryInterval, Cooldown, MaxRetries, Brokers, Topics]} -> + State = #state{config = Config, + known_topics = Topics, + brokers = Brokers, + cooldown = Cooldown, + client_id = ClientId, + max_retries = MaxRetries, + retry_interval = RetryInterval}, + process_flag(trap_exit, true), + {ok, idle, State}; + {errors, Errors} -> + ok = lists:foreach(fun(E) -> + _ = lager:critical("Metadata config error ~p", [E]) + end, Errors), + {stop, bad_config} + end. %%============================================================================== %% Request logic %%============================================================================== schedule_metadata_request(State) -> - schedule_metadata_request(undefined, State). + schedule_metadata_request(undefined, State). schedule_metadata_request(Retries, State = #state{brokers = Brokers, - max_retries = MaxRetries, - known_topics = Topics, - retry_interval = Interval}) -> - Request = metadata_request(State, Topics), - case Retries of - undefined -> - Params = [Brokers, Request, MaxRetries], - _ = spawn(?MODULE, make_request, Params); - _ -> - Params = [Brokers, Request, Retries], - {ok, _} = timer:apply_after(Interval, ?MODULE, make_request, Params) - end, - ok. + max_retries = MaxRetries, + known_topics = Topics, + retry_interval = Interval}) -> + Request = metadata_request(State, Topics), + case Retries of + undefined -> + Params = [Brokers, Request, MaxRetries], + _ = spawn(?MODULE, make_request, Params); + _ -> + Params = [Brokers, Request, Retries], + {ok, _} = timer:apply_after(Interval, ?MODULE, make_request, Params) + end, + ok. make_request(Brokers, Request, Retries) -> - case do_request_metadata(Brokers, Request) of - {ok, TopicMapping} -> - gen_fsm:send_event(?MODULE, {metadata_updated, TopicMapping}); - Error -> - _ = lager:debug("Metadata request error: ~p", [Error]), - NewRetries = case Retries of -1 -> -1; _ -> Retries - 1 end, - gen_fsm:send_event(?MODULE, {metadata_retry, NewRetries}) - end. + case do_request_metadata(Brokers, Request) of + {ok, TopicMapping} -> + gen_fsm:send_event(?MODULE, {metadata_updated, TopicMapping}); + Error -> + _ = lager:debug("Metadata request error: ~p", [Error]), + NewRetries = case Retries of -1 -> -1; _ -> Retries - 1 end, + gen_fsm:send_event(?MODULE, {metadata_retry, NewRetries}) + end. do_request_metadata([], _Request) -> - {error, all_down}; + {error, all_down}; do_request_metadata([{Host, Port} = _Broker | T], Request) -> - _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), - % Connect to the Broker - case gen_tcp:connect(Host, Port, get_metadata_tcp_options()) of - {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - % Failed, try with the next one in the list - do_request_metadata(T, Request); - {ok, Socket} -> - % On success, send the metadata request - case gen_tcp:send(Socket, Request) of + _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + % Connect to the Broker + case gen_tcp:connect(Host, Port, get_metadata_tcp_options()) of {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - % Unable to send request, try the next broker - do_request_metadata(T, Request); - ok -> - case gen_tcp:recv(Socket, 0, 6000) of - {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - gen_tcp:close(Socket), - % Nothing received (probably a timeout), try the next broker - do_request_metadata(T, Request); - {ok, Data} -> - gen_tcp:close(Socket), - case kafkerl_protocol:parse_metadata_response(Data) of + log_metadata_request_error(Host, Port, Reason), + % Failed, try with the next one in the list + do_request_metadata(T, Request); + {ok, Socket} -> + % On success, send the metadata request + case gen_tcp:send(Socket, Request) of {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - % The parsing failed, try the next broker - do_request_metadata(T, Request); - {ok, _CorrelationId, Metadata} -> - % We received a metadata response, make sure it has brokers - {ok, get_topic_mapping(Metadata)} - end - end - end - end. + log_metadata_request_error(Host, Port, Reason), + % Unable to send request, try the next broker + do_request_metadata(T, Request); + ok -> + case gen_tcp:recv(Socket, 0, 6000) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + gen_tcp:close(Socket), + % Nothing received (probably a timeout), try the next broker + do_request_metadata(T, Request); + {ok, Data} -> + gen_tcp:close(Socket), + case kafkerl_protocol:parse_metadata_response(Data) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % The parsing failed, try the next broker + do_request_metadata(T, Request); + {ok, _CorrelationId, Metadata} -> + % We received a metadata response, make sure it has brokers + {ok, get_topic_mapping(Metadata)} + end + end + end + end. %%============================================================================== %% Utils %%============================================================================== get_metadata_tcp_options() -> - kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). + kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). log_metadata_request_error(Host, Port, Reason) -> - _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). + _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> - kafkerl_protocol:build_metadata_request([], 0, ClientId); + kafkerl_protocol:build_metadata_request([], 0, ClientId); metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, - NewTopics) -> - AllTopics = lists:umerge(KnownTopics, NewTopics), - kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). + NewTopics) -> + AllTopics = lists:umerge(KnownTopics, NewTopics), + kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). %%============================================================================== %% Topic/broker mapping %%============================================================================== get_topic_mapping({BrokerMetadata, TopicMetadata}) -> - % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] - Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), - % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] - Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), - % Converts the BrokerIds from the previous array into socket addresses - lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> - case lists:keyfind(BrokerId, 1, BrokerMetadata) of - {BrokerId, HostData} -> - {true, {{Topic, Partition, BrokerId}, HostData}}; - _Any -> - false - end - end, Partitions). + % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] + Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), + % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] + Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), + % Converts the BrokerIds from the previous array into socket addresses + lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> + case lists:keyfind(BrokerId, 1, BrokerMetadata) of + {BrokerId, HostData} -> + {true, {{Topic, Partition, BrokerId}, HostData}}; + _Any -> + false + end + end, Partitions). expand_topic({?NO_ERROR, Topic, Partitions}) -> - {true, {Topic, Partitions}}; + {true, {Topic, Partitions}}; expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> - % Replica not available can be ignored, still, show a warning - _ = lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, Partitions}}; + % Replica not available can be ignored, still, show a warning + _ = lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, Partitions}}; expand_topic({Error, Topic, _Partitions}) -> - _ = lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, []}}. + _ = lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, []}}. expand_partitions(Metadata) -> - expand_partitions(Metadata, []). + expand_partitions(Metadata, []). expand_partitions({_Topic, []}, Acc) -> - {true, Acc}; + {true, Acc}; expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, - _, _} | T]}, Acc) -> - _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); + _, _} | T]}, Acc) -> + _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - expand_partitions({Topic, T}, Acc). + _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> - get_broker_mapping(TopicMapping, State, State#state.connection_index, - State#state.broker_connections). + get_broker_mapping(TopicMapping, State, State#state.connection_index, + State#state.broker_connections). get_broker_mapping([], _State, N, Acc) -> + %% ACC is [{ConnId, {Topic, Partition}, ProcessName}] {N, lists:usort(Acc)}; -get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], - State = #state{config = Config}, N, Acc) -> - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = kafkerl_buffer:create_buffer(Buffer, fifo), - {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of - false -> - {start_broker_connection(N, Address, Config), N + 1}; - {ConnId, _, BrokerConnection} -> - {BrokerConnection, N} - end, - - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = kafkerl_buffer:create_buffer(Buffer, fifo), - kafkerl_broker_connection:add_buffer(Conn, Buffer), - - NewMapping = {ConnId, {Topic, Partition}, Conn}, - get_broker_mapping(T, State, NewN, [NewMapping | Acc]). +get_broker_mapping([{{Topic, Partition, BrokerId}, Address} | T], + State = #state{config = Config}, N, Acc) -> + %% generate the topic/partions buffer name + Buffer = kafkerl_utils:buffer_name(Topic, Partition), + _ = kafkerl_buffer:create_buffer(Buffer, fifo), + {Conn, NewN} = + case lists:keyfind(BrokerId, 1, Acc) of + false -> + %% no connection for current broker id + %% start broker connection + {start_broker_connection(N, Address, Config), N + 1}; + {BrokerId, _, BrokerConnectionProcess} -> + %% there is already connection for broker id, just continue; + {BrokerConnectionProcess, N} + end, + NewMapping = {BrokerId, {Topic, Partition}, Conn}, + kafkerl_broker_connection:add_buffer(Conn, Buffer), + NewAcc = + case lists:keyfind({Topic, Partition}, 2, Acc) of + false -> + %% the topic is not bind to any other processes, just continue + Acc; + {_BrokerId, {Topic, Partition}, ConnectionProcess} -> + % the topic is bind to other processes, ask those process to remove the binding + kafkerl_broker_connection:delete_buffer(ConnectionProcess, Buffer), + % delete current binding from Acc + lists:keydelete({Topic, Partition}, 2, Acc) + end, + get_broker_mapping(T, State, NewN, [NewMapping | NewAcc]). start_broker_connection(N, Address, Config) -> - case kafkerl_broker_connection:start_link(N, Address, Config) of - {ok, Name, _Pid} -> - Name; - {error, {already_started, Pid}} -> - kafkerl_broker_connection:clear_buffers(Pid), - Pid - end. \ No newline at end of file + case kafkerl_broker_connection:start_link(N, Address, Config) of + {ok, Name, _Pid} -> + Name; + {error, {already_started, Pid}} -> + kafkerl_broker_connection:clear_buffers(Pid), + Pid + end. diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index f05b396..57224e1 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -196,7 +196,7 @@ build_produce_request({Topic, Partition, Messages}, Compression) -> {Size, MessageSet} = build_message_set(Messages, Compression), {Size + TopicSize + 24, [<<-1:?SHORT, - -1:?INT, % Timeout + -1:?INT, % Timeout %% TODO: get timeout error from kafka when this values is set to -1, after changing it to 1000, error disappers. need to double check if this value is updated in latest kafka protocol 1:?UINT, % TopicCount TopicSize:?USHORT>>, Topic, From 5e56fd884c9a7c550bff8efc7582b98ee3ce3a11 Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Thu, 5 Mar 2015 16:22:46 -0300 Subject: [PATCH 38/72] updated the epocxy version --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index f7bd45a..fb71da9 100644 --- a/rebar.config +++ b/rebar.config @@ -19,4 +19,4 @@ [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, {epocxy, "1.0.1", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.1"}}}, - {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. + {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. \ No newline at end of file From 9e367a085edfafab98049e7030a2c7826cddf2c3 Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Wed, 29 Jul 2015 14:39:52 -0300 Subject: [PATCH 39/72] added the kafka consumer and simplified the producer API, bumped version to 2.0.0.a --- include/kafkerl.hrl | 7 +- rel/kafkerl.app.config | 5 +- src/kafkerl.erl | 154 ++++++++++++++---------------- src/kafkerl_broker_connection.erl | 98 +++++++++++++++++-- src/kafkerl_connector.erl | 81 +++++++++++----- src/kafkerl_protocol.erl | 33 +++++-- src/kafkerl_utils.erl | 20 ++++ 7 files changed, 271 insertions(+), 127 deletions(-) diff --git a/include/kafkerl.hrl b/include/kafkerl.hrl index b78cb13..64964c3 100644 --- a/include/kafkerl.hrl +++ b/include/kafkerl.hrl @@ -57,8 +57,9 @@ -type produce_response() :: {ok, correlation_id(), [produce_topic()]}. %% Fetch responses --type messages() :: [{topic(), [{{partition(), integer()}, [binary()]}]}]. --type fetch_state() :: {binary(), integer(), [any()]}. +-type messages() :: [{topic(), [{{partition(), integer()}, + [binary() | {binary(), binary()}]}]}]. +-type fetch_state() :: {binary(), integer(), [any()]} | void. -type fetch_response() :: {ok, integer(), messages()} | {incomplete, integer(), messages(), fetch_state()} | error(). @@ -91,4 +92,4 @@ -define(OFFSETS_LOAD_IN_PROGRESS_CODE, 14). -define(CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE, 15). -define(NOT_COORDINATOR_FOR_CONSUMER_CODE, 16). --define(UNKNOWN, -1). +-define(UNKNOWN, -1). \ No newline at end of file diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index a514299..2542515 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -13,7 +13,8 @@ {metadata_tcp_timeout, 1000}, {max_queue_size, 20}, % In items, per topic/partition {max_time_queued, 5}, % In seconds - {metadata_request_cooldown, 1500} % In milliseconds - ]}, + {metadata_request_cooldown, 1500}, % In milliseconds + {consumer_min_bytes, 1}, + {consumer_max_wait, 1500}]}, {topics, [test1, test2, test3]}, {tests, [{kafka_installation, "~/kafka"}]}]}]. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index cbf7430..320aa23 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -2,24 +2,26 @@ -author('hernanrivasacosta@gmail.com'). -export([start/0, start/2]). --export([version/0, - produce/1, produce/2, produce_messages_from_file/1, - produce_messages_from_file/2, produce_messages_from_file/3, - get_partitions/0, get_partitions/1, - subscribe/1, subscribe/2, subscribe/3, - unsubscribe/1, unsubscribe/2, +-export([produce/3, produce/4, produce/5, + consume/2, consume/3, consume/4, request_metadata/0, request_metadata/1, request_metadata/2, - valid_message/1]). + partitions/0, partitions/1]). +-export([version/0]). -include("kafkerl.hrl"). -include("kafkerl_consumers.hrl"). %% Types --type produce_option() :: {buffer_size, integer() | infinity} | - {dump_location, string()}. --type produce_options() :: [produce_option()]. +-type option() :: {buffer_size, integer() | infinity} | + {dump_location, string()} | + {consumer, callback()} | + {min_bytes, integer()} | + {max_wait, integer()} | + {offset, integer()}. +-type options() :: [option()]. +-type server_ref() :: atom() | pid(). --export_type([produce_options/0]). +-export_type([options/0, server_ref/0]). %%============================================================================== %% API @@ -34,79 +36,71 @@ start(_StartType, _StartArgs) -> %%============================================================================== %% Access API %%============================================================================== --spec version() -> {integer(), integer(), integer()}. -version() -> - {1, 1, 2}. - --spec produce(basic_message()) -> ok. -produce(Message) -> - produce(?MODULE, Message). --spec produce(atom(), basic_message()) -> ok; - (basic_message(), produce_options()) -> ok. -produce(Message, Options) when is_tuple(Message) -> - produce(?MODULE, Message, Options); -produce(Name, Message) -> - produce(Name, Message, []). --spec produce(atom(), basic_message(), produce_options()) -> ok. -produce(Name, Message, Options) -> - kafkerl_connector:send(Name, Message, Options). - --spec produce_messages_from_file(string()) -> ok. -produce_messages_from_file(Filename) -> - produce_messages_from_file(?MODULE, Filename). --spec produce_messages_from_file(atom(), basic_message()) -> ok; - (string(), produce_options()) -> ok. -produce_messages_from_file(Filename, Options) when is_list(Filename) -> - produce_messages_from_file(?MODULE, Filename, Options); -produce_messages_from_file(Name, Filename) -> - produce_messages_from_file(Name, Filename, []). --spec produce_messages_from_file(atom(), string(), produce_options()) -> ok. -produce_messages_from_file(Name, Filename, Options) -> - {ok, Bin} = file:read_file(Filename), - Messages = binary_to_term(Bin), - [produce(Name, M, Options) || M <- Messages], - ok. - --spec get_partitions() -> [{topic(), [partition()]}] | error(). -get_partitions() -> - get_partitions(?MODULE). --spec get_partitions(atom()) -> [{topic(), [partition()]}] | error(). -get_partitions(Name) -> - kafkerl_connector:get_partitions(Name). - --spec subscribe(callback()) -> ok. -subscribe(Callback) -> - subscribe(?MODULE, Callback). --spec subscribe(atom(), callback()) -> ok. -subscribe(Callback, all = Filter) -> - subscribe(?MODULE, Callback, Filter); -subscribe(Callback, Filter) when is_list(Filter) -> - subscribe(?MODULE, Callback, Filter); -subscribe(Name, Callback) -> - subscribe(Name, Callback, all). --spec subscribe(atom(), callback(), filters()) -> ok. -subscribe(Name, Callback, Filter) -> - kafkerl_connector:subscribe(Name, Callback, Filter). - --spec unsubscribe(callback()) -> ok. -unsubscribe(Callback) -> - unsubscribe(?MODULE, Callback). --spec unsubscribe(atom(), callback()) -> ok. -unsubscribe(Name, Callback) -> - kafkerl_connector:unsubscribe(Name, Callback). +%% Produce API +-spec produce(topic(), partition(), payload()) -> ok. +produce(Topic, Partition, Message) -> + produce(?MODULE, Topic, Partition, Message, []). + +-spec produce(server_ref(), topic(), partition(), payload()) -> ok; + (topic(), partition(), payload(), options()) -> ok. +produce(Topic, Partition, Message, Options) when is_list(Options) -> + produce(?MODULE, {Topic, Partition, Message}, Options); +produce(ServerRef, Topic, Partition, Message) -> + produce(ServerRef, {Topic, Partition, Message}, []). + +-spec produce(server_ref(), topic(), partition(), payload(), options()) -> ok. +produce(ServerRef, Topic, Partition, Message, Options) -> + kafkerl_connector:send(ServerRef, {Topic, Partition, Message}, Options). + +%% Consume API +-spec consume(topic(), partition()) -> ok | error(). +consume(Topic, Partition) -> + consume(?MODULE, Topic, Partition, []). + +-spec consume(topic(), partition(), options()) -> ok | [binary()] | error(); + (server_ref(), topic(), partition()) -> ok | error(). +consume(Topic, Partition, Options) when is_list(Options) -> + consume(?MODULE, Topic, Partition, Options); +consume(ServerRef, Topic, Partition) -> + consume(ServerRef, Topic, Partition, []). + +-spec consume(server_ref(), topic(), partition(), options()) -> + ok | [binary()] | error(). +consume(ServerRef, Topic, Partition, Options) -> + case lists:keyfind(consumer, 1, Options) of + false -> + NewOptions = [{consumer, self()} | Options], + kafkerl_connector:fetch(ServerRef, Topic, Partition, NewOptions), + kafkerl_utils:gather_consume_responses(); + _ -> + kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) + end. +%% Metadata API -spec request_metadata() -> ok. request_metadata() -> request_metadata(?MODULE). + -spec request_metadata(atom() | [topic()]) -> ok. -request_metadata(Name) when is_atom(Name) -> - kafkerl_connector:request_metadata(Name); -request_metadata(Topics) -> - request_metadata(?MODULE, Topics). +request_metadata(Topics) when is_list(Topics) -> + request_metadata(?MODULE, Topics); +request_metadata(ServerRef) -> + kafkerl_connector:request_metadata(ServerRef). + -spec request_metadata(atom(), [topic()]) -> ok. -request_metadata(Name, Topics) -> - kafkerl_connector:request_metadata(Name, Topics). +request_metadata(ServerRef, Topics) -> + kafkerl_connector:request_metadata(ServerRef, Topics). + +%% Partitions +-spec partitions() -> [{topic(), [partition()]}] | error(). +partitions() -> + partitions(?MODULE). --spec valid_message(any()) -> boolean(). -valid_message(Any) -> - kafkerl_utils:valid_message(Any). \ No newline at end of file +-spec partitions(server_ref()) -> [{topic(), [partition()]}] | error(). +partitions(ServerRef) -> + kafkerl_connector:get_partitions(ServerRef). + +%% Utils +-spec version() -> {integer(), integer(), integer()}. +version() -> + {2, 0, 0}. \ No newline at end of file diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 686cbfd..98921d1 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -4,7 +4,7 @@ -behaviour(gen_server). %% API --export([add_buffer/2, clear_buffers/1]). +-export([add_buffer/2, clear_buffers/1, fetch/4]). % Only for internal use -export([connect/6]). % Supervisors @@ -14,6 +14,7 @@ handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). +-include("kafkerl_consumers.hrl"). -type server_ref() :: atom() | pid(). -type conn_idx() :: 0..1023. @@ -33,7 +34,11 @@ request_number = 0 :: integer(), pending_requests = [] :: [integer()], max_time_queued = 0 :: integer(), - ets = undefined :: atom()}). + ets = undefined :: atom(), + fetching = void :: integer() | void, + fetches = [] :: [{correlation_id(), + callback(), + fetch_state()}]}). -type state() :: #state{}. %%============================================================================== @@ -60,6 +65,10 @@ add_buffer(ServerRef, Buffer) -> clear_buffers(ServerRef) -> gen_server:call(ServerRef, {clear_buffers}). +-spec fetch(server_ref(), topic(), partition(), kafkerl:options()) -> ok. +fetch(ServerRef, Topic, Partition, Options) -> + gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). + %%============================================================================== %% gen_server callbacks %%============================================================================== @@ -67,7 +76,9 @@ clear_buffers(ServerRef) -> handle_call({add_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> {reply, ok, State#state{buffers = [Buffer| Buffers]}}; handle_call({clear_buffers}, _From, State) -> - {reply, ok, State#state{buffers = []}}. + {reply, ok, State#state{buffers = []}}; +handle_call({fetch, Topic, Partition, Options}, _From, State) -> + handle_fetch(Topic, Partition, Options, State). -spec handle_info(any(), state()) -> {noreply, state()}. handle_info({connected, Socket}, State) -> @@ -163,6 +174,30 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, end end. +handle_fetch(Topic, Partition, Options, State = #state{client_id = ClientId, + socket = Socket, + name = Name}) -> + Offset = proplists:get_value(offset, Options, 0), + MaxWait = proplists:get_value(max_wait, Options), + MinBytes = proplists:get_value(min_bytes, Options), + {ok, CorrelationId, NewState} = build_fetch_correlation_id(Options, State), + Request = {Topic, {Partition, Offset, 2147483647}}, + Payload = kafkerl_protocol:build_fetch_request(Request, + ClientId, + CorrelationId, + MaxWait, + MinBytes), + case gen_tcp:send(Socket, Payload) of + {error, Reason} -> + lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(NewState)}; + ok -> + lager:debug("~p sent request ~p", [Name, CorrelationId]), + {reply, ok, NewState} + end. + % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, tcp_options = TCPOpts, @@ -173,8 +208,39 @@ handle_tcp_close(State = #state{retry_interval = RetryInterval, _Pid = spawn_link(?MODULE, connect, Params), State#state{socket = undefined}. -handle_tcp_data(Bin, State = #state{connector = Connector, ets = EtsName, - name = Name}) -> +handle_tcp_data(Bin, State = #state{fetches = Fetches}) -> + {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, State), + case lists:keytake(CorrelationId, 1, Fetches) of + {value, {CorrelationId, Consumer, FetchState}, NewFetches} -> + NewState = State#state{fetches = NewFetches}, + handle_fetch_response(Bin, Consumer, FetchState, NewState); + false -> + handle_produce_response(Bin, State) + end. + +handle_fetch_response(Bin, Consumer, FetchState, State) -> + case kafkerl_protocol:parse_fetch_response(Bin, FetchState) of + {ok, _CorrelationId, [{_, [{{_, MessagesInPartition}, Messages}]}]} -> + send_messages(Consumer, {message_count, MessagesInPartition}, true), + send_messages(Consumer, {consume_done, Messages}, true), + {ok, State#state{fetching = void}}; + {incomplete, CorrelationId, Topics, NewFetchState} -> + _ = case Topics of + [{_, [{_, Messages}]}] -> + send_messages(Consumer, {consumed, Messages}, false); + _ -> + ignore + end, + Fetches = State#state.fetches, + NewFetches = [{CorrelationId, Consumer, NewFetchState} | Fetches], + {ok, State#state{fetches = NewFetches, fetching = CorrelationId}}; + Error -> + kafkerl_utils:send_event(Consumer, Error), + {ok, State#state{fetching = void}} + end. + +handle_produce_response(Bin, State = #state{connector = Connector, name = Name, + ets = EtsName}) -> case kafkerl_protocol:parse_produce_response(Bin) of {ok, CorrelationId, Topics} -> case ets:lookup(EtsName, CorrelationId) of @@ -237,6 +303,12 @@ build_correlation_id(State = #state{request_number = RequestNumber, CorrelationId = (ConnIdx bsl 22) bor NextRequest, {ok, CorrelationId, State#state{request_number = NextRequest}}. +build_fetch_correlation_id(Options, State = #state{fetches = Fetches}) -> + Consumer = proplists:get_value(consumer, Options), + {ok, CorrelationId, NewState} = build_correlation_id(State), + NewFetches = [{CorrelationId, Consumer, void} | Fetches], + {ok, CorrelationId, NewState#state{fetches = NewFetches}}. + % TODO: Refactor this function, it is not sufficiently clear what it does separate_errors(Topics) -> separate_errors(Topics, {[], []}). @@ -262,8 +334,8 @@ handle_errors(Errors, Messages, Name) -> handle_error({Topic, Partition, Error}, Messages, Name) when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse - Error =:= ?LEADER_NOT_AVAILABLE orelse - Error =:= ?NOT_LEADER_FOR_PARTITION -> + Error =:= ?NOT_LEADER_FOR_PARTITION orelse + Error =:= ?LEADER_NOT_AVAILABLE -> case get_message_for_error(Topic, Partition, Messages, Name) of undefined -> false; Message -> {true, Message} @@ -328,4 +400,14 @@ get_messages_from(Ets, Retries) -> _Error -> lager:warning("giving up on reading from the ETS buffer"), [] - end. \ No newline at end of file + end. + +parse_correlation_id(Bin, #state{fetching = void}) -> + {ok, _CorrelationId, _NewBin} = kafkerl_protocol:parse_correlation_id(Bin); +parse_correlation_id(Bin, #state{fetching = CorrelationId}) -> + {ok, CorrelationId, Bin}. + +send_messages(_Consumer, {_EventType, []}, false = _SendEmptyMessages) -> + ok; +send_messages(Consumer, Event, _SendEmptyMessages) -> + kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index c5ee8bb..69f3533 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -4,8 +4,15 @@ -behaviour(gen_server). %% API --export([send/3, request_metadata/1, request_metadata/2, request_metadata/3, - subscribe/2, subscribe/3, get_partitions/1, unsubscribe/2]). +% Metadata +-export([request_metadata/1, request_metadata/2, request_metadata/3, + get_partitions/1]). +% Produce +-export([send/3]). +% Consume +-export([fetch/4]). +% Common +-export([subscribe/2, subscribe/3, unsubscribe/2]). % Only for internal use -export([do_request_metadata/6, make_metadata_request/1]). % Only for broker connections @@ -19,9 +26,8 @@ -include("kafkerl.hrl"). -include("kafkerl_consumers.hrl"). --type server_ref() :: atom() | pid(). -type broker_mapping_key() :: {topic(), partition()}. --type broker_mapping() :: {broker_mapping_key(), server_ref()}. +-type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. -record(state, {brokers = [] :: [socket_address()], broker_mapping = void :: [broker_mapping()] | void, @@ -35,7 +41,8 @@ pending = [] :: [basic_message()], last_metadata_request = 0 :: integer(), metadata_request_cd = 0 :: integer(), - last_dump_name = {"", 0} :: {string(), integer()}}). + last_dump_name = {"", 0} :: {string(), integer()}, + default_fetch_options = [] :: kafkerl:options()}). -type state() :: #state{}. %%============================================================================== @@ -45,7 +52,7 @@ start_link(Name, Config) -> gen_server:start_link({local, Name}, ?MODULE, [Config], []). --spec send(server_ref(), basic_message(), kafkerl:produce_option()) -> +-spec send(kafkerl:server_ref(), basic_message(), kafkerl:options()) -> ok | error(). send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), @@ -62,7 +69,13 @@ send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> gen_server:call(ServerRef, {send, Message}) end. --spec get_partitions(server_ref()) -> [{topic(), [partition()]}] | error(). +-spec fetch(kafkerl:server_ref(), topic(), partition(), kafkerl:options()) -> + ok. +fetch(ServerRef, Topic, Partition, Options) -> + gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). + +-spec get_partitions(kafkerl:server_ref()) -> + [{topic(), [partition()]}] | error(). get_partitions(ServerRef) -> case gen_server:call(ServerRef, {get_partitions}) of {ok, Mapping} -> @@ -71,29 +84,29 @@ get_partitions(ServerRef) -> Error end. --spec subscribe(server_ref(), callback()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), callback()) -> ok | error(). subscribe(ServerRef, Callback) -> subscribe(ServerRef, Callback, all). --spec subscribe(server_ref(), callback(), filters()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), callback(), filters()) -> ok | error(). subscribe(ServerRef, Callback, Filter) -> gen_server:call(ServerRef, {subscribe, {Filter, Callback}}). --spec unsubscribe(server_ref(), callback()) -> ok. +-spec unsubscribe(kafkerl:server_ref(), callback()) -> ok. unsubscribe(ServerRef, Callback) -> gen_server:call(ServerRef, {unsubscribe, Callback}). --spec request_metadata(server_ref()) -> ok. +-spec request_metadata(kafkerl:server_ref()) -> ok. request_metadata(ServerRef) -> gen_server:call(ServerRef, {request_metadata}). --spec request_metadata(server_ref(), [topic()] | boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [topic()] | boolean()) -> ok. request_metadata(ServerRef, TopicsOrForced) -> gen_server:call(ServerRef, {request_metadata, TopicsOrForced}). --spec request_metadata(server_ref(), [topic()], boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [topic()], boolean()) -> ok. request_metadata(ServerRef, Topics, Forced) -> gen_server:call(ServerRef, {request_metadata, Topics, Forced}). --spec produce_succeeded(server_ref(), +-spec produce_succeeded(kafkerl:server_ref(), [{topic(), partition(), [binary()], integer()}]) -> ok. produce_succeeded(ServerRef, Messages) -> gen_server:cast(ServerRef, {produce_succeeded, Messages}). @@ -114,6 +127,8 @@ handle_call({dump_buffer_to_disk, Buffer, Options}, _From, State) -> {reply, ok, State#state{last_dump_name = DumpName}}; handle_call({send, Message}, _From, State) -> handle_send(Message, State); +handle_call({fetch, Topic, Partition, Options}, _From, State) -> + {reply, handle_fetch(Topic, Partition, Options, State), State}; handle_call({request_metadata}, _From, State) -> {reply, ok, handle_request_metadata(State, [])}; handle_call({request_metadata, Forced}, _From, State) when is_boolean(Forced) -> @@ -191,18 +206,22 @@ init([Config]) -> {topics, [binary], required}, {metadata_tcp_timeout, positive_integer, {default, 1500}}, {assume_autocreate_topics, boolean, {default, false}}, - {metadata_request_cooldown, positive_integer, {default, 333}}], + {metadata_request_cooldown, positive_integer, {default, 333}}, + {consumer_min_bytes, positive_integer, {default, 1}}, + {consumer_max_wait, positive_integer, {default, 1500}}], case normalizerl:normalize_proplist(Schema, Config) of {ok, [Brokers, MaxMetadataRetries, ClientId, Topics, RetryInterval, - AutocreateTopics, MetadataRequestCooldown]} -> - State = #state{config = Config, - known_topics = Topics, - brokers = Brokers, - client_id = ClientId, - retry_interval = RetryInterval, - autocreate_topics = AutocreateTopics, - max_metadata_retries = MaxMetadataRetries, - metadata_request_cd = MetadataRequestCooldown}, + AutocreateTopics, MetadataRequestCooldown, MinBytes, MaxWait]} -> + DefaultFetchOptions = [{min_bytes, MinBytes}, {max_wait, MaxWait}], + State = #state{config = Config, + known_topics = Topics, + brokers = Brokers, + client_id = ClientId, + retry_interval = RetryInterval, + autocreate_topics = AutocreateTopics, + max_metadata_retries = MaxMetadataRetries, + metadata_request_cd = MetadataRequestCooldown, + default_fetch_options = DefaultFetchOptions}, {_Pid, _Ref} = make_metadata_request(State), {ok, State}; {errors, Errors} -> @@ -248,12 +267,22 @@ handle_send(Message, State = #state{broker_mapping = Mapping, pending = Pending, end end. +handle_fetch(_Topic, _Partition, _Options, #state{broker_mapping = void}) -> + {error, not_connected}; +handle_fetch(Topic, Partition, Options, State) -> + case lists:keyfind({Topic, Partition}, 1, State#state.broker_mapping) of + false -> + {error, {no_broker, {Topic, Partition}}}; + {_, Broker} -> + NewOptions = Options ++ State#state.default_fetch_options, + kafkerl_broker_connection:fetch(Broker, Topic, Partition, NewOptions) + end. + handle_get_partitions(#state{broker_mapping = void}) -> {error, not_available}; handle_get_partitions(#state{broker_mapping = Mapping}) -> {ok, Mapping}. - handle_request_metadata(State, Topics) -> handle_request_metadata(State, Topics, false). @@ -313,7 +342,7 @@ do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> do_request_metadata([], _TCPOpts, _Request) -> {error, all_down}; -do_request_metadata([{Host, Port} = _Broker | T] = _Brokers, TCPOpts, Request) -> +do_request_metadata([{Host, Port} = _Broker | T], TCPOpts, Request) -> lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), % Connect to the Broker case gen_tcp:connect(Host, Port, TCPOpts) of diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index deceb09..11b18a9 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -4,7 +4,8 @@ -export([build_produce_request/4, build_fetch_request/5, build_metadata_request/3]). --export([parse_produce_response/1, parse_fetch_response/1, +-export([parse_correlation_id/1, + parse_produce_response/1, parse_fetch_response/1, parse_fetch_response/2, parse_metadata_response/1]). -include("kafkerl.hrl"). @@ -32,6 +33,12 @@ build_metadata_request(Topics, CorrelationId, ClientId) -> [build_request_header(ClientId, ?METADATA_KEY, CorrelationId), Request]. % Message parsing +-spec parse_correlation_id(binary()) -> {ok, integer(), binary()}. +parse_correlation_id(<<_Size:32/unsigned-integer, + CorrelationId:32/unsigned-integer, + Remainder/binary>>) -> + {ok, CorrelationId, Remainder}. + -spec parse_produce_response(binary()) -> produce_response(). parse_produce_response(<<_Size:32/unsigned-integer, CorrelationId:32/unsigned-integer, @@ -49,12 +56,16 @@ parse_fetch_response(<<_Size:32/unsigned-integer, {ok, Topics} -> {ok, CorrelationId, Topics}; {incomplete, Topics, {Bin, Steps}} -> - {incomplete, CorrelationId, Topics, {Bin, CorrelationId, Steps}} + {incomplete, CorrelationId, Topics, {Bin, CorrelationId, Steps}}; + {error, _Reason} = Error -> + Error end; parse_fetch_response(_Other) -> {error, unexpected_binary}. -spec parse_fetch_response(binary(), fetch_state()) -> fetch_response(). +parse_fetch_response(Bin, void) -> + parse_fetch_response(Bin); parse_fetch_response(Bin, {Remainder, CorrelationId, Steps}) -> NewBin = <>, parse_steps(NewBin, CorrelationId, Steps). @@ -68,10 +79,10 @@ parse_metadata_response(< {ok, CorrelationId, {Brokers, Metadata}}; - Error -> + {error, _Reason} = Error -> Error end; - Error -> + {error, _Reason} = Error -> Error end; parse_metadata_response(_Other) -> @@ -361,7 +372,9 @@ parse_topics(Count, Bin, Acc) -> Step = {topics, Count}, {incomplete, lists:reverse(Acc, [Topic]), {Remainder, Steps ++ [Step]}}; incomplete -> - {incomplete, lists:reverse(Acc), {Bin, [{topics, Count}]}} + {incomplete, lists:reverse(Acc), {Bin, [{topics, Count}]}}; + {error, _Reason} = Error -> + Error end. parse_topic(< Step = {topic, TopicName}, - {incomplete, {TopicName, Partitions}, {Bin, Steps ++ [Step]}} + {incomplete, {TopicName, Partitions}, {Bin, Steps ++ [Step]}}; + {error, _Reason} = Error -> + Error end; parse_topic(_Bin) -> incomplete. @@ -393,7 +408,9 @@ parse_partitions(Count, Bin, Acc) -> {incomplete, lists:reverse(Acc, [Partition]), NewState}; incomplete -> Step = {partitions, Count}, - {incomplete, lists:reverse(Acc), {Bin, [Step]}} + {incomplete, lists:reverse(Acc), {Bin, [Step]}}; + {error, _Reason} = Error -> + Error end. parse_partition(<> -> - {no_key, Value} + Value end, % 12 is the size of the offset plus the size of the MessageSize int {ok, {KV, MessageSize + 12}, Remainder}; diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index bbcd149..267f369 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -5,6 +5,7 @@ -export([get_tcp_options/1]). -export([merge_messages/1, split_messages/1, valid_message/1]). -export([buffer_name/2]). +-export([gather_consume_responses/0, gather_consume_responses/1]). -include("kafkerl.hrl"). -include("kafkerl_consumers.hrl"). @@ -129,3 +130,22 @@ is_partition({Partition, Messages}) -> (is_binary(Messages) orelse is_list_of_binaries(Messages)); is_partition(_Any) -> false. + +gather_consume_responses() -> + gather_consume_responses(2500). +gather_consume_responses(Timeout) -> + gather_consume_responses(Timeout, []). +gather_consume_responses(Timeout, Acc) -> + receive + {message_count, _} -> + % Ignore this one + gather_consume_responses(Acc); + {consumed, Messages} -> + gather_consume_responses(Acc ++ Messages); + {consume_done, Messages} -> + Acc ++ Messages; + {error, _Reason} = Error -> + Error + after Timeout -> + {error, {no_response, Acc}} + end. \ No newline at end of file From 80bd2f37d40532a98be347119ecd204e7c6987a0 Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Tue, 4 Aug 2015 11:08:35 -0300 Subject: [PATCH 40/72] fixed bad arity on a function call --- src/kafkerl_utils.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 267f369..b7d8f70 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -139,9 +139,9 @@ gather_consume_responses(Timeout, Acc) -> receive {message_count, _} -> % Ignore this one - gather_consume_responses(Acc); + gather_consume_responses(Timeout, Acc); {consumed, Messages} -> - gather_consume_responses(Acc ++ Messages); + gather_consume_responses(Timeout, Acc ++ Messages); {consume_done, Messages} -> Acc ++ Messages; {error, _Reason} = Error -> From c1fbaaf37cb13532a1248f1e191e1f64840e5281 Mon Sep 17 00:00:00 2001 From: Hernan Rivas Acosta Date: Tue, 4 Aug 2015 11:13:58 -0300 Subject: [PATCH 41/72] removed an unnecesary parameter on send_messages --- src/kafkerl_broker_connection.erl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 98921d1..713fff7 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -221,13 +221,13 @@ handle_tcp_data(Bin, State = #state{fetches = Fetches}) -> handle_fetch_response(Bin, Consumer, FetchState, State) -> case kafkerl_protocol:parse_fetch_response(Bin, FetchState) of {ok, _CorrelationId, [{_, [{{_, MessagesInPartition}, Messages}]}]} -> - send_messages(Consumer, {message_count, MessagesInPartition}, true), - send_messages(Consumer, {consume_done, Messages}, true), + send_messages(Consumer, {message_count, MessagesInPartition}), + send_messages(Consumer, {consume_done, Messages}), {ok, State#state{fetching = void}}; {incomplete, CorrelationId, Topics, NewFetchState} -> _ = case Topics of - [{_, [{_, Messages}]}] -> - send_messages(Consumer, {consumed, Messages}, false); + [{_, [{_, [_ | _] = Messages}]}] -> + send_messages(Consumer, {consumed, Messages}); _ -> ignore end, @@ -407,7 +407,5 @@ parse_correlation_id(Bin, #state{fetching = void}) -> parse_correlation_id(Bin, #state{fetching = CorrelationId}) -> {ok, CorrelationId, Bin}. -send_messages(_Consumer, {_EventType, []}, false = _SendEmptyMessages) -> - ok; -send_messages(Consumer, Event, _SendEmptyMessages) -> +send_messages(Consumer, Event) -> kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file From 60cf62981cf351f46f85a766802cd54ea15ddd1e Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Thu, 21 Jan 2016 14:37:56 -0300 Subject: [PATCH 42/72] consumer improvements, removed types from .hrl files, improved API --- bin/stop_all_brokers.sh | 2 +- bin/stop_zk.sh | 2 +- include/kafkerl.hrl | 77 --------- include/kafkerl_consumers.hrl | 9 - src/kafkerl.erl | 30 +++- src/kafkerl_broker_connection.erl | 254 +++++++++++++++++----------- src/kafkerl_connector.erl | 50 +++--- src/kafkerl_protocol.erl | 269 +++++++++++++++++++----------- src/kafkerl_sup.erl | 2 - src/kafkerl_utils.erl | 22 ++- 10 files changed, 398 insertions(+), 319 deletions(-) delete mode 100644 include/kafkerl_consumers.hrl diff --git a/bin/stop_all_brokers.sh b/bin/stop_all_brokers.sh index 47bee29..51cec34 100755 --- a/bin/stop_all_brokers.sh +++ b/bin/stop_all_brokers.sh @@ -1 +1 @@ -ps ax | grep -i 'kafka\.Kafka' | grep java | grep -v grep | awk '{print $1}' | xargs kill -15 \ No newline at end of file +ps ax | grep -i 'kafka\.Kafka' | grep java | grep -v grep | awk '{print $1}' | xargs kill -9 diff --git a/bin/stop_zk.sh b/bin/stop_zk.sh index a63f1fc..b93e032 100755 --- a/bin/stop_zk.sh +++ b/bin/stop_zk.sh @@ -1 +1 @@ -ps ax | grep -i 'zookeeper' | grep -v grep | awk '{print $1}' | xargs kill -15 \ No newline at end of file +ps ax | grep -i 'zookeeper' | grep -v grep | awk '{print $1}' | xargs kill -9 diff --git a/include/kafkerl.hrl b/include/kafkerl.hrl index 64964c3..bf4955c 100644 --- a/include/kafkerl.hrl +++ b/include/kafkerl.hrl @@ -1,80 +1,3 @@ -%% Constants -% Misc --define(ETS_BUFFER, ets_buffer). --define(DEFAULT_TCP_OPTS, lists:sort([{mode, binary}, {packet, 0}])). -% Compression --define(COMPRESSION_NONE, none). --define(COMPRESSION_GZIP, gzip). --define(COMPRESSION_SNAPPY, snappy). --define(KAFKERL_COMPRESSION_TYPES, [?COMPRESSION_NONE, - ?COMPRESSION_GZIP, - ?COMPRESSION_SNAPPY]). -% API keys --define(PRODUCE_KEY, 0). --define(FETCH_KEY, 1). --define(OFFSET_KEY, 2). --define(METADATA_KEY, 3). - -%% Common --type error_code() :: -1..16. --type correlation_id() :: non_neg_integer(). - -%% Connection --type address_host() :: string(). --type address_port() :: 1..65535. --type socket_address() :: {address_host(), address_port()}. --type broker_id() :: integer(). --type broker() :: {broker_id(), socket_address()}. - -%% Configuration --type compression() :: ?COMPRESSION_NONE | - ?COMPRESSION_GZIP | - ?COMPRESSION_SNAPPY. - -%% Requests --type client_id() :: binary(). --type topic() :: binary(). --type partition() :: integer(). --type payload() :: binary() | [binary()]. --type basic_message() :: {topic(), partition(), payload()}. --type merged_message() :: basic_message() | - {topic(), [{partition(), payload()}]} | - [merged_message()]. - --type fetch_offset() :: integer(). --type fetch_max_bytes() :: integer(). --type fetch_partition() :: {partition(), fetch_offset(), fetch_max_bytes()} | - [fetch_partition()]. --type fetch_request() :: {topic(), fetch_partition()} | - [fetch_request()]. - -%% Reponses --type error() :: {error, atom() | {atom(), any()}}. - -%% Produce responses --type produce_partition() :: {partition(), error_code(), integer()}. --type produce_topic() :: {topic(), [produce_partition()]}. --type produce_response() :: {ok, correlation_id(), [produce_topic()]}. - -%% Fetch responses --type messages() :: [{topic(), [{{partition(), integer()}, - [binary() | {binary(), binary()}]}]}]. --type fetch_state() :: {binary(), integer(), [any()]} | void. --type fetch_response() :: {ok, integer(), messages()} | - {incomplete, integer(), messages(), fetch_state()} | - error(). - -%% Metadata responses --type leader() :: integer(). --type replica() :: integer(). --type isr() :: integer(). --type partition_metadata() :: {error_code(), partition(), broker_id(), - [replica()], [isr()]}. --type topic_metadata() :: {error_code(), topic(), [partition_metadata()]}. --type metadata() :: {[broker()], [topic_metadata()]}. --type metadata_response() :: {ok, correlation_id(), metadata()} | - error(). - %% Error codes -define(NO_ERROR, 0). -define(OFFSET_OUT_OF_RANGE, 1). diff --git a/include/kafkerl_consumers.hrl b/include/kafkerl_consumers.hrl deleted file mode 100644 index 6b66190..0000000 --- a/include/kafkerl_consumers.hrl +++ /dev/null @@ -1,9 +0,0 @@ -% Pid, M:F or M:F(A1, A2, ..., An) --type callback() :: pid() | - fun() | - {atom(), atom()} | - {atom(), atom(), [any()]}. - --type filters() :: all | [atom()]. - --type message_metadata() :: {done | incomplete, integer(), integer()}. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 320aa23..32fb06e 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -3,25 +3,35 @@ -export([start/0, start/2]). -export([produce/3, produce/4, produce/5, - consume/2, consume/3, consume/4, + consume/2, consume/3, consume/4, stop_consuming/2, stop_consuming/3, request_metadata/0, request_metadata/1, request_metadata/2, partitions/0, partitions/1]). -export([version/0]). --include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). - %% Types +-type callback() :: pid() | + fun() | + {atom(), atom()} | + {atom(), atom(), [any()]}. -type option() :: {buffer_size, integer() | infinity} | {dump_location, string()} | {consumer, callback()} | {min_bytes, integer()} | {max_wait, integer()} | - {offset, integer()}. + {offset, integer()} | + {fetch_interval, false | integer()}. -type options() :: [option()]. -type server_ref() :: atom() | pid(). --export_type([options/0, server_ref/0]). +-type error() :: {error, atom() | {atom(), any()}}. + +-type topic() :: binary(). +-type partition() :: integer(). +-type payload() :: binary() | [binary()]. +-type basic_message() :: {topic(), partition(), payload()}. + +-export_type([server_ref/0, error/0, options/0, callback/0, + topic/0, partition/0, payload/0, basic_message/0]). %%============================================================================== %% API @@ -76,6 +86,14 @@ consume(ServerRef, Topic, Partition, Options) -> kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) end. +-spec stop_consuming(topic(), partition()) -> ok. +stop_consuming(Topic, Partition) -> + stop_consuming(?MODULE, Topic, Partition). + +-spec stop_consuming(server_ref(), topic(), partition()) -> ok. +stop_consuming(ServerRef, Topic, Partition) -> + kafkerl_connector:stop_fetch(ServerRef, Topic, Partition). + %% Metadata API -spec request_metadata() -> ok. request_metadata() -> diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 713fff7..0cccbfb 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -4,7 +4,7 @@ -behaviour(gen_server). %% API --export([add_buffer/2, clear_buffers/1, fetch/4]). +-export([add_buffer/2, clear_buffers/1, fetch/4, stop_fetch/3]). % Only for internal use -export([connect/6]). % Supervisors @@ -14,20 +14,25 @@ handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). --type server_ref() :: atom() | pid(). -type conn_idx() :: 0..1023. -type start_link_response() :: {ok, atom(), pid()} | ignore | {error, any()}. +-record(fetch, {correlation_id = 0 :: kafkerl_protocol:correlation_id(), + server_ref = undefined :: kafkerl:server_ref(), + topic = undefined :: kafkerl:topic(), + partition = undefined :: kafkerl:partition(), + options = undefined :: kafkerl:options(), + state = void :: kafkerl_protocol:fetch_state()}). + -record(state, {name = undefined :: atom(), buffers = [] :: [atom()], conn_idx = undefined :: conn_idx(), client_id = undefined :: binary(), - socket = undefined :: undefined | port(), - address = undefined :: undefined | socket_address(), - connector = undefined :: undefined | pid(), - tref = undefined :: undefined | any(), + socket = undefined :: port(), + address = undefined :: kafkerl_connector:address(), + connector = undefined :: pid(), + tref = undefined :: any(), tcp_options = [] :: [any()], max_retries = 0 :: integer(), retry_interval = 0 :: integer(), @@ -35,16 +40,14 @@ pending_requests = [] :: [integer()], max_time_queued = 0 :: integer(), ets = undefined :: atom(), - fetching = void :: integer() | void, - fetches = [] :: [{correlation_id(), - callback(), - fetch_state()}]}). + fetches = [] :: [#fetch{}], + current_fetch = void :: void | kafkerl_protocol:correlation_id()}). -type state() :: #state{}. %%============================================================================== %% API %%============================================================================== --spec start_link(conn_idx(), pid(), socket_address(), any()) -> +-spec start_link(conn_idx(), pid(), kafkerl_connector:address(), any()) -> start_link_response(). start_link(Id, Connector, Address, Config) -> NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), @@ -57,17 +60,21 @@ start_link(Id, Connector, Address, Config) -> Other end. --spec add_buffer(server_ref(), atom()) -> ok. +-spec add_buffer(kafkerl:server_ref(), atom()) -> ok. add_buffer(ServerRef, Buffer) -> gen_server:call(ServerRef, {add_buffer, Buffer}). --spec clear_buffers(server_ref()) -> ok. +-spec clear_buffers(kafkerl:server_ref()) -> ok. clear_buffers(ServerRef) -> gen_server:call(ServerRef, {clear_buffers}). --spec fetch(server_ref(), topic(), partition(), kafkerl:options()) -> ok. +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> ok. fetch(ServerRef, Topic, Partition, Options) -> - gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). + gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). + +-spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> ok. +stop_fetch(ServerRef, Topic, Partition) -> + gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). %%============================================================================== %% gen_server callbacks @@ -77,8 +84,10 @@ handle_call({add_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> {reply, ok, State#state{buffers = [Buffer| Buffers]}}; handle_call({clear_buffers}, _From, State) -> {reply, ok, State#state{buffers = []}}; -handle_call({fetch, Topic, Partition, Options}, _From, State) -> - handle_fetch(Topic, Partition, Options, State). +handle_call({fetch, ServerRef, Topic, Partition, Options}, _From, State) -> + handle_fetch(ServerRef, Topic, Partition, Options, State); +handle_call({stop_fetch, Topic, Partition}, _From, State) -> + handle_stop_fetch(Topic, Partition, State). -spec handle_info(any(), state()) -> {noreply, state()}. handle_info({connected, Socket}, State) -> @@ -87,7 +96,7 @@ handle_info(connection_timeout, State) -> {stop, {error, unable_to_connect}, State}; handle_info({tcp_closed, _Socket}, State = #state{name = Name, address = {Host, Port}}) -> - lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), + _ = lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), NewState = handle_tcp_close(State), {noreply, NewState}; handle_info({tcp, _Socket, Bin}, State) -> @@ -99,7 +108,7 @@ handle_info({flush, Time}, State) -> {ok, _Tref} = queue_flush(Time), handle_flush(State); handle_info(Msg, State = #state{name = Name}) -> - lager:notice("~p received unexpected info message: ~p on ~p", [Name, Msg]), + _ = lager:notice("~p received unexpected info message: ~p on ~p", [Name, Msg]), {noreply, State}. % Boilerplate @@ -135,7 +144,7 @@ init([Id, Connector, Address, Config, Name]) -> {ok, State}; {errors, Errors} -> lists:foreach(fun(E) -> - lager:critical("broker connection config error ~p", [E]) + _ = lager:critical("broker connection config error ~p", [E]) end, Errors), {stop, bad_config} end. @@ -156,32 +165,31 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, MergedMessages -> Request = kafkerl_protocol:build_produce_request(MergedMessages, ClientId, - CorrelationId, - ?COMPRESSION_NONE), + CorrelationId), true = ets:insert_new(EtsName, {CorrelationId, MergedMessages}), - lager:debug("~p sending ~p", [Name, Request]), + _ = lager:debug("~p sending ~p", [Name, Request]), case gen_tcp:send(Socket, Request) of {error, Reason} -> - lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), gen_tcp:close(Socket), ets:delete_all_objects(EtsName, CorrelationId), ok = resend_messages(MergedMessages, Connector), {noreply, handle_tcp_close(NewState)}; ok -> - lager:debug("~p sent message ~p", [Name, CorrelationId]), + _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), {noreply, NewState} end end. -handle_fetch(Topic, Partition, Options, State = #state{client_id = ClientId, - socket = Socket, - name = Name}) -> +handle_fetch(ServerRef, Topic, Partition, Options, + State = #state{fetches = Fetches, client_id = ClientId, + socket = Socket, name = Name}) -> + {ok, CorrelationId, NewState} = build_correlation_id(State), Offset = proplists:get_value(offset, Options, 0), + Request = {Topic, {Partition, Offset, 2147483647}}, MaxWait = proplists:get_value(max_wait, Options), MinBytes = proplists:get_value(min_bytes, Options), - {ok, CorrelationId, NewState} = build_fetch_correlation_id(Options, State), - Request = {Topic, {Partition, Offset, 2147483647}}, Payload = kafkerl_protocol:build_fetch_request(Request, ClientId, CorrelationId, @@ -189,14 +197,41 @@ handle_fetch(Topic, Partition, Options, State = #state{client_id = ClientId, MinBytes), case gen_tcp:send(Socket, Payload) of {error, Reason} -> - lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - gen_tcp:close(Socket), - {reply, {error, no_connection}, handle_tcp_close(NewState)}; + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + ok = gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(State)}; ok -> - lager:debug("~p sent request ~p", [Name, CorrelationId]), - {reply, ok, NewState} - end. + _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), + NewFetch = #fetch{correlation_id = CorrelationId, + server_ref = ServerRef, + topic = Topic, + partition = Partition, + options = Options}, + {reply, ok, NewState#state{fetches = [NewFetch | Fetches]}} + end; +handle_fetch(_ServerRef, _Topic, _Partition, _Options, State) -> + {reply, {error, fetch_in_progress}, State}. + +handle_stop_fetch(Topic, Partition, State) -> + % Leave current fetch as it is + NewFetches = remove_fetch(Topic, Partition, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches}}. + +remove_fetch(Topic, Partition, CurrentFetches) -> + remove_fetch(Topic, Partition, CurrentFetches, []). +remove_fetch(_Topic, _Partition, [], Acc) -> + Acc; +remove_fetch(Topic, Partition, + [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> + % Clearing the fetch options ensures this fetch will stop sending any messages + % since there is no consumer. This also removes the fetch_interval so it won't + % be requested again. + % Simply removing the fetch here doesn't work since we will still get a server + % response, but we will not be able to properly handle it. + [Fetch#fetch{options = []} | Acc] ++ T; +remove_fetch(Topic, Partition, [H | T], Acc) -> + remove_fetch(Topic, Partition, T, [H | Acc]). % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, @@ -208,35 +243,55 @@ handle_tcp_close(State = #state{retry_interval = RetryInterval, _Pid = spawn_link(?MODULE, connect, Params), State#state{socket = undefined}. -handle_tcp_data(Bin, State = #state{fetches = Fetches}) -> - {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, State), - case lists:keytake(CorrelationId, 1, Fetches) of - {value, {CorrelationId, Consumer, FetchState}, NewFetches} -> - NewState = State#state{fetches = NewFetches}, - handle_fetch_response(Bin, Consumer, FetchState, NewState); - false -> +handle_tcp_data(Bin, State = #state{fetches = Fetches, + current_fetch = CurrentFetch}) -> + {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, CurrentFetch), + case get_fetch(CorrelationId, Fetches) of + Fetch = #fetch{} -> + handle_fetch_response(Bin, Fetch, State); + _ -> handle_produce_response(Bin, State) end. -handle_fetch_response(Bin, Consumer, FetchState, State) -> - case kafkerl_protocol:parse_fetch_response(Bin, FetchState) of - {ok, _CorrelationId, [{_, [{{_, MessagesInPartition}, Messages}]}]} -> - send_messages(Consumer, {message_count, MessagesInPartition}), - send_messages(Consumer, {consume_done, Messages}), - {ok, State#state{fetching = void}}; - {incomplete, CorrelationId, Topics, NewFetchState} -> - _ = case Topics of - [{_, [{_, [_ | _] = Messages}]}] -> - send_messages(Consumer, {consumed, Messages}); - _ -> - ignore - end, - Fetches = State#state.fetches, - NewFetches = [{CorrelationId, Consumer, NewFetchState} | Fetches], - {ok, State#state{fetches = NewFetches, fetching = CorrelationId}}; +handle_fetch_response(Bin, Fetch, State = #state{name = Name, + fetches = Fetches}) -> + Options = Fetch#fetch.options, + Consumer = proplists:get_value(consumer, Options), + case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of + {ok, _CorrelationId, [{_, [{{_, Offset}, Messages}]}]} -> + % The messages can be empty, for example when there are no new messages in + % this partition, if that happens, don't send anything and end the fetch. + ok = send_messages(Consumer, + case Messages of + [] -> []; + _ -> [{consumed, Messages}, {offset, Offset}] + end), + case proplists:get_value(fetch_interval, Options, false) of + false -> {ok, State#state{current_fetch = void}}; + Interval -> + NewOptions = lists:keyreplace(offset, 1, Options, {offset, Offset}), + Arguments = [Fetch#fetch.server_ref, Fetch#fetch.topic, + Fetch#fetch.partition, NewOptions], + _ = timer:apply_after(Interval, ?MODULE, fetch, Arguments), + {ok, State#state{current_fetch = void, + fetches = lists:delete(Fetch, Fetches)}} + end; + {incomplete, CorrelationId, Data, NewFetchState} -> + ok = case Data of + [{_, [{_, Messages}]}] -> + send_messages(Consumer, {consumed, Messages}); + _ -> + % On some cases, kafka will return an incomplete response with no + % messages, in this case since we don't have anything to send, we + % just need to update the fetch state. + ok + end, + {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | + lists:delete(Fetch, Fetches)], + current_fetch = CorrelationId}}; Error -> - kafkerl_utils:send_event(Consumer, Error), - {ok, State#state{fetching = void}} + ok = send_messages(Consumer, Error), + {ok, State#state{current_fetch = void, fetches = lists:delete(Fetch, Fetches)}} end. handle_produce_response(Bin, State = #state{connector = Connector, name = Name, @@ -246,7 +301,7 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, case ets:lookup(EtsName, CorrelationId) of [{CorrelationId, Messages}] -> ets:delete(EtsName, CorrelationId), - {Errors, Successes} = separate_errors(Topics), + {Errors, Successes} = split_errors_and_successes(Topics), % First, send the offsets and messages that were delivered spawn(fun() -> notify_success_to_connector(Successes, Messages, Connector) @@ -260,13 +315,13 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, {ok, State} end; _ -> - lager:warning("~p was unable to properly process produce response", - [Name]), + _ = lager:warning("~p was unable to properly process produce response", + [Name]), {error, invalid_produce_response} end; Other -> - lager:critical("~p got unexpected response when parsing message: ~p", - [Name, Other]), + _ = lager:critical("~p got unexpected response when parsing message: ~p", + [Name, Other]), {ok, State} end. @@ -303,25 +358,18 @@ build_correlation_id(State = #state{request_number = RequestNumber, CorrelationId = (ConnIdx bsl 22) bor NextRequest, {ok, CorrelationId, State#state{request_number = NextRequest}}. -build_fetch_correlation_id(Options, State = #state{fetches = Fetches}) -> - Consumer = proplists:get_value(consumer, Options), - {ok, CorrelationId, NewState} = build_correlation_id(State), - NewFetches = [{CorrelationId, Consumer, void} | Fetches], - {ok, CorrelationId, NewState#state{fetches = NewFetches}}. - -% TODO: Refactor this function, it is not sufficiently clear what it does -separate_errors(Topics) -> - separate_errors(Topics, {[], []}). +split_errors_and_successes(Topics) -> + split_errors_and_successes(Topics, {[], []}). -separate_errors([], Acc) -> +split_errors_and_successes([], Acc) -> Acc; -separate_errors([{Topic, Partitions} | T], Acc) -> +split_errors_and_successes([{Topic, Partitions} | T], Acc) -> F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> {E, [{Topic, Partition, Offset} | S]}; ({Partition, Error, _}, {E, S}) -> {[{Topic, Partition, Error} | E], S} end, - separate_errors(T, lists:foldl(F, Acc, Partitions)). + split_errors_and_successes(T, lists:foldl(F, Acc, Partitions)). handle_errors([], _Messages, _Name) -> ignore; @@ -341,21 +389,21 @@ handle_error({Topic, Partition, Error}, Messages, Name) Message -> {true, Message} end; handle_error({Topic, Partition, Error}, _Messages, Name) -> - lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", - [Name, kafkerl_error:get_error_name(Error), Topic, Partition]), + _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", + [Name, kafkerl_error:get_error_name(Error), Topic, Partition]), false. get_message_for_error(Topic, Partition, SavedMessages, Name) -> case lists:keyfind(Topic, 1, SavedMessages) of false -> - lager:error("~p found no saved messages for topic ~p, partition ~p", - [Name, Topic, Partition]), + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), undefined; {Topic, Partitions} -> case lists:keyfind(Partition, 1, Partitions) of false -> - lager:error("~p found no saved messages for topic ~p, partition ~p", - [Name, Topic, Partition]), + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), undefined; {Partition, Messages} -> {Topic, Partition, Messages} @@ -363,19 +411,19 @@ get_message_for_error(Topic, Partition, SavedMessages, Name) -> end. connect(Pid, Name, _TCPOpts, {Host, Port} = _Address, _Timeout, 0) -> - lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), + _ = lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), Pid ! connection_timeout; connect(Pid, Name, TCPOpts, {Host, Port} = Address, Timeout, Retries) -> - lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), + _ = lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), case gen_tcp:connect(Host, Port, TCPOpts, 5000) of {ok, Socket} -> - lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), + _ = lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), gen_tcp:controlling_process(Socket, Pid), Pid ! {connected, Socket}; {error, Reason} -> NewRetries = Retries - 1, - lager:warning("~p can't connect to ~p:~p. Reason: ~p, ~p retries left", - [Name, Host, Port, Reason, NewRetries]), + _ = lager:warning("~p can't connect to ~p:~p. Reason: ~p, ~p retries left", + [Name, Host, Port, Reason, NewRetries]), timer:sleep(Timeout), connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) end. @@ -398,14 +446,28 @@ get_messages_from(Ets, Retries) -> _Error when Retries > 0 -> get_messages_from(Ets, Retries - 1); _Error -> - lager:warning("giving up on reading from the ETS buffer"), + _ = lager:warning("giving up on reading from the ETS buffer"), [] end. -parse_correlation_id(Bin, #state{fetching = void}) -> - {ok, _CorrelationId, _NewBin} = kafkerl_protocol:parse_correlation_id(Bin); -parse_correlation_id(Bin, #state{fetching = CorrelationId}) -> +parse_correlation_id(Bin, void) -> + kafkerl_protocol:parse_correlation_id(Bin); +parse_correlation_id(Bin, CorrelationId) -> {ok, CorrelationId, Bin}. +get_fetch(_CorrelationId, []) -> + not_found; +get_fetch(CorrelationId, [Fetch = #fetch{correlation_id = CorrelationId} | _T]) -> + Fetch; +get_fetch(CorrelationId, [_ | T]) -> + get_fetch(CorrelationId, T). + +send_messages(_Consumer, []) -> + ok; +send_messages(Consumer, [Event | T]) -> + case send_messages(Consumer, Event) of + ok -> send_messages(Consumer, T); + Error -> Error + end; send_messages(Consumer, Event) -> kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 69f3533..089818e 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -24,36 +24,43 @@ handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). --type broker_mapping_key() :: {topic(), partition()}. +-type kafler_host() :: string(). +-type kafler_port() :: 1..65535. +-type address() :: {kafler_host(), kafler_port()}. + +-type filters() :: all | [atom()]. + +-type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. --record(state, {brokers = [] :: [socket_address()], +-record(state, {brokers = [] :: [address()], broker_mapping = void :: [broker_mapping()] | void, - client_id = <<>> :: client_id(), + client_id = <<>> :: kafkerl_protocol:client_id(), max_metadata_retries = -1 :: integer(), retry_interval = 1 :: non_neg_integer(), config = [] :: {atom(), any()}, autocreate_topics = false :: boolean(), - callbacks = [] :: [{filters(), callback()}], + callbacks = [] :: [{filters(), kafkerl:callback()}], known_topics = [] :: [binary()], - pending = [] :: [basic_message()], + pending = [] :: [kafkerl:basic_message()], last_metadata_request = 0 :: integer(), metadata_request_cd = 0 :: integer(), last_dump_name = {"", 0} :: {string(), integer()}, default_fetch_options = [] :: kafkerl:options()}). -type state() :: #state{}. +-export_type([address/0]). + %%============================================================================== %% API %%============================================================================== --spec start_link(atom(), any()) -> {ok, pid()} | ignore | error(). +-spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). start_link(Name, Config) -> gen_server:start_link({local, Name}, ?MODULE, [Config], []). --spec send(kafkerl:server_ref(), basic_message(), kafkerl:options()) -> - ok | error(). +-spec send(kafkerl:server_ref(), kafkerl:basic_message(), kafkerl:options()) -> + ok | kafkerl:error(). send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), case ets_buffer:write(Buffer, Message) of @@ -69,13 +76,13 @@ send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> gen_server:call(ServerRef, {send, Message}) end. --spec fetch(kafkerl:server_ref(), topic(), partition(), kafkerl:options()) -> +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> ok. fetch(ServerRef, Topic, Partition, Options) -> gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). -spec get_partitions(kafkerl:server_ref()) -> - [{topic(), [partition()]}] | error(). + [{kafkerl:topic(), [kafkerl:partition()]}] | kafkerl:error(). get_partitions(ServerRef) -> case gen_server:call(ServerRef, {get_partitions}) of {ok, Mapping} -> @@ -84,13 +91,14 @@ get_partitions(ServerRef) -> Error end. --spec subscribe(kafkerl:server_ref(), callback()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok | kafkerl:error(). subscribe(ServerRef, Callback) -> subscribe(ServerRef, Callback, all). --spec subscribe(kafkerl:server_ref(), callback(), filters()) -> ok | error(). +-spec subscribe(kafkerl:server_ref(), kafkerl:callback(), filters()) -> + ok | kafkerl:error(). subscribe(ServerRef, Callback, Filter) -> gen_server:call(ServerRef, {subscribe, {Filter, Callback}}). --spec unsubscribe(kafkerl:server_ref(), callback()) -> ok. +-spec unsubscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok. unsubscribe(ServerRef, Callback) -> gen_server:call(ServerRef, {unsubscribe, Callback}). @@ -98,16 +106,20 @@ unsubscribe(ServerRef, Callback) -> request_metadata(ServerRef) -> gen_server:call(ServerRef, {request_metadata}). --spec request_metadata(kafkerl:server_ref(), [topic()] | boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()] | boolean()) -> + ok. request_metadata(ServerRef, TopicsOrForced) -> gen_server:call(ServerRef, {request_metadata, TopicsOrForced}). --spec request_metadata(kafkerl:server_ref(), [topic()], boolean()) -> ok. +-spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()], boolean()) -> + ok. request_metadata(ServerRef, Topics, Forced) -> gen_server:call(ServerRef, {request_metadata, Topics, Forced}). --spec produce_succeeded(kafkerl:server_ref(), - [{topic(), partition(), [binary()], integer()}]) -> ok. +-spec produce_succeeded(kafkerl:server_ref(), [{kafkerl:topic(), + kafkerl:partition(), + [binary()], + integer()}]) -> ok. produce_succeeded(ServerRef, Messages) -> gen_server:cast(ServerRef, {produce_succeeded, Messages}). @@ -511,7 +523,7 @@ make_metadata_request(State = #state{brokers = Brokers, spawn_monitor(?MODULE, do_request_metadata, Params). get_timestamp() -> - {A, B, C} = erlang:now(), + {A, B, C} = erlang:timestamp(), (A * 1000000 + B) * 1000 + C div 1000. %%============================================================================== diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 11b18a9..763d719 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -1,19 +1,89 @@ -module(kafkerl_protocol). -author('hernanrivasacosta@gmail.com'). --export([build_produce_request/4, build_fetch_request/5, +-export([build_produce_request/3, build_produce_request/4, + build_fetch_request/5, build_metadata_request/3]). -export([parse_correlation_id/1, parse_produce_response/1, parse_fetch_response/1, parse_fetch_response/2, parse_metadata_response/1]). --include("kafkerl.hrl"). +%% Common +-type error_code() :: -1..16. +-type correlation_id() :: non_neg_integer(). +-type broker_id() :: integer(). +-type broker() :: {broker_id(), kafkerl_connector:address()}. + +%% Requests +-type client_id() :: binary(). +-type merged_message() :: kafkerl:basic_message() | + {kafkerl:topic(), [{kafkerl:partition(), kafkerl:payload()}]} | + [merged_message()]. +-type fetch_offset() :: integer(). +-type fetch_max_bytes() :: integer(). +-type fetch_partition() :: {kafkerl:partition(), fetch_offset(), fetch_max_bytes()} | + [fetch_partition()]. +-type fetch_request() :: {kafkerl:topic(), fetch_partition()} | + [fetch_request()]. + +%% Responses +-type produce_partition() :: {kafkerl:partition(), error_code(), integer()}. +-type produce_topic() :: {kafkerl:topic(), [produce_partition()]}. +-type produce_response() :: {ok, correlation_id(), [produce_topic()]}. +-type replica() :: integer(). +-type isr() :: integer(). +-type partition_metadata() :: {error_code(), kafkerl:partition(), broker_id(), + [replica()], [isr()]}. +-type topic_metadata() :: {error_code(), kafkerl:topic(), [partition_metadata()]}. +-type metadata() :: {[broker()], [topic_metadata()]}. +-type metadata_response() :: {ok, correlation_id(), metadata()} | + kafkerl:error(). +-type messages() :: [{kafkerl:topic(), + [{{kafkerl:partition(), integer()}, + [binary() | {binary(), binary()}]}]}]. +-type fetch_state() :: {binary(), integer(), [any()]} | void. +-type fetch_response() :: {ok, integer(), messages()} | + {incomplete, integer(), messages(), fetch_state()} | + kafkerl:error(). + +% Compression +-define(COMPRESSION_NONE, none). +-define(COMPRESSION_GZIP, gzip). +-define(COMPRESSION_SNAPPY, snappy). +-define(KAFKERL_COMPRESSION_TYPES, [?COMPRESSION_NONE, + ?COMPRESSION_GZIP, + ?COMPRESSION_SNAPPY]). + +%% Configuration +-type compression() :: ?COMPRESSION_NONE | + ?COMPRESSION_GZIP | + ?COMPRESSION_SNAPPY. + +% API keys +-define(PRODUCE_KEY, 0). +-define(FETCH_KEY, 1). +-define(OFFSET_KEY, 2). +-define(METADATA_KEY, 3). + +% C style binary types +-define(SHORT, 16/signed-integer). +-define(INT, 32/signed-integer). +-define(UCHAR, 8/unsigned-integer). +-define(USHORT, 16/unsigned-integer). +-define(UINT, 32/unsigned-integer). +-define(ULONG, 64/unsigned-integer). + +% Type exports +-export_type([merged_message/0, client_id/0, correlation_id/0, fetch_state/0]). %%============================================================================== %% API %%============================================================================== % Message building +-spec build_produce_request(merged_message(), client_id(), correlation_id()) -> iodata(). +build_produce_request(Data, ClientId, CorrelationId) -> + build_produce_request(Data, ClientId, CorrelationId, ?COMPRESSION_NONE). -spec build_produce_request(merged_message(), client_id(), correlation_id(), compression()) -> iodata(). build_produce_request(Data, ClientId, CorrelationId, Compression) -> @@ -26,7 +96,8 @@ build_fetch_request(Data, ClientId, CorrelationId, MaxWait, MinBytes) -> {Size, Request} = build_fetch_request(Data, MaxWait, MinBytes), [build_request_header(ClientId, ?FETCH_KEY, CorrelationId, Size), Request]. --spec build_metadata_request(topic() | [topic()], correlation_id(), +-spec build_metadata_request(kafkerl:topic() | [kafkerl:topic()], + correlation_id(), client_id()) -> iodata(). build_metadata_request(Topics, CorrelationId, ClientId) -> {_Size, Request} = build_metadata_request(Topics), @@ -34,23 +105,23 @@ build_metadata_request(Topics, CorrelationId, ClientId) -> % Message parsing -spec parse_correlation_id(binary()) -> {ok, integer(), binary()}. -parse_correlation_id(<<_Size:32/unsigned-integer, - CorrelationId:32/unsigned-integer, +parse_correlation_id(<<_Size:?UINT, + CorrelationId:?UINT, Remainder/binary>>) -> {ok, CorrelationId, Remainder}. -spec parse_produce_response(binary()) -> produce_response(). -parse_produce_response(<<_Size:32/unsigned-integer, - CorrelationId:32/unsigned-integer, - TopicCount:32/unsigned-integer, +parse_produce_response(<<_Size:?UINT, + CorrelationId:?UINT, + TopicCount:?UINT, TopicsBin/binary>>) -> {ok, Topics} = parse_produced_topics(TopicCount, TopicsBin), {ok, CorrelationId, Topics}. -spec parse_fetch_response(binary()) -> fetch_response(). -parse_fetch_response(<<_Size:32/unsigned-integer, - CorrelationId:32/unsigned-integer, - TopicCount:32/unsigned-integer, +parse_fetch_response(<<_Size:?UINT, + CorrelationId:?UINT, + TopicCount:?UINT, TopicsBin/binary>>) -> case parse_topics(TopicCount, TopicsBin) of {ok, Topics} -> @@ -71,11 +142,11 @@ parse_fetch_response(Bin, {Remainder, CorrelationId, Steps}) -> parse_steps(NewBin, CorrelationId, Steps). -spec parse_metadata_response(binary()) -> metadata_response(). -parse_metadata_response(<>) -> case parse_brokers(BrokerCount, BrokersBin) of - {ok, Brokers, <>} -> + {ok, Brokers, <>} -> case parse_topic_metadata(TopicCount, TopicsBin) of {ok, Metadata} -> {ok, CorrelationId, {Brokers, Metadata}}; @@ -95,16 +166,16 @@ build_request_header(ClientId, ApiKey, CorrelationId) -> % Build the header (http://goo.gl/5SNNTV) ApiVersion = 0, % The version should be 0, it's not a placeholder ClientIdSize = byte_size(ClientId), - [<>, + [<>, ClientId]. build_request_header(ClientId, ApiKey, CorrelationId, RequestSize) -> % 10 is the size of the header MessageSize = byte_size(ClientId) + RequestSize + 10, - [<>, + [<>, build_request_header(ClientId, ApiKey, CorrelationId)]. %% PRODUCE REQUEST @@ -119,14 +190,14 @@ build_produce_request({Topic, Partition, Messages}, Compression) -> TopicSize = byte_size(Topic), {Size, MessageSet} = build_message_set(Messages, Compression), {Size + TopicSize + 24, - [<<-1:16/signed-integer, - -1:32/signed-integer, % Timeout - 1:32/unsigned-integer, % TopicCount - TopicSize:16/unsigned-integer>>, + [<<-1:?SHORT, + -1:?INT, % Timeout + 1:?UINT, % TopicCount + TopicSize:?USHORT>>, Topic, - <<1:32/unsigned-integer, % PartitionCount - Partition:32/unsigned-integer, - Size:32/unsigned-integer>>, + <<1:?UINT, % PartitionCount + Partition:?UINT, + Size:?UINT>>, MessageSet]}; build_produce_request(Data, Compression) -> % Build the body of the request with multiple topics/partitions @@ -135,9 +206,9 @@ build_produce_request(Data, Compression) -> {TopicsSize, Topics} = build_topics(Data, Compression), % 10 is the size of the header {TopicsSize + 10, - [<<-1:16/signed-integer, % RequiredAcks - -1:32/signed-integer, % Timeout - TopicCount:32/unsigned-integer>>, + [<<-1:?SHORT, % RequiredAcks + -1:?INT, % Timeout + TopicCount:?UINT>>, Topics]}. build_topics(Topics, Compression) -> @@ -157,9 +228,9 @@ build_topic({Topic, Partitions}, Compression) -> {Size, BuiltPartitions} = build_partitions(Partitions, Compression), % 6 is the size of both the partition count int and the topic size int {Size + TopicSize + 6, - [<>, + PartitionCount:?UINT>>, BuiltPartitions]}. build_partitions(Partitions, Compression) -> @@ -177,8 +248,8 @@ build_partition({Partition, Messages}, Compression) -> {Size, MessageSet} = build_message_set(Messages, Compression), % 8 is the size of the header, 4 bytes of the partition and 4 for the size {Size + 8, - [<>, + [<>, MessageSet]}. % Docs at http://goo.gl/4W7J0r @@ -207,18 +278,18 @@ build_message(Bin) -> Crc = erlang:crc32(Message), % 12 is the size of the offset plus the size int itself {Size + 12, - [<>, + [<>, Message]}. get_message_header(MessageSize, Compression) -> MagicByte = 0, % Version id Attributes = compression_to_int(Compression), - <>. + <>. compression_to_int(?COMPRESSION_NONE) -> 0; compression_to_int(?COMPRESSION_GZIP) -> 1; @@ -241,26 +312,26 @@ build_fetch_request({Topic, {Partition, Offset, MaxBytes}}, MaxWait, MinBytes) -> TopicSize = byte_size(Topic), {TopicSize + 38, - [<<-1:32/signed-integer, % ReplicaId - MaxWait:32/unsigned-integer, - MinBytes:32/unsigned-integer, - 1:32/unsigned-integer, % TopicCount - TopicSize:16/unsigned-integer>>, + [<<-1:?INT, % ReplicaId + MaxWait:?UINT, + MinBytes:?UINT, + 1:?UINT, % TopicCount + TopicSize:?USHORT>>, Topic, - <<1:32/unsigned-integer, % PartitionCount - Partition:32/unsigned-integer, - Offset:64/unsigned-integer, - MaxBytes:32/unsigned-integer>>]}; + <<1:?UINT, % PartitionCount + Partition:?UINT, + Offset:?ULONG, + MaxBytes:?UINT>>]}; build_fetch_request(Data, MaxWait, MinBytes) -> ReplicaId = -1, % This should always be -1 TopicCount = length(Data), {TopicSize, Topics} = build_fetch_topics(Data), % 16 is the size of the header {TopicSize + 16, - [<>, + [<>, Topics]}. build_fetch_topics(Topics) -> @@ -280,9 +351,9 @@ build_fetch_topic({Topic, Partitions}) -> {Size, BuiltPartitions} = build_fetch_partitions(Partitions), % 6 is the size of the topicSize's 16 bytes + 32 from the partition count {Size + TopicSize + 6, - [<>, + PartitionCount:?UINT>>, BuiltPartitions]}. build_fetch_partitions(Partitions) -> @@ -296,20 +367,20 @@ build_fetch_partitions([H | T] = _Partitions, {OldSize, IOList}) -> build_fetch_partition({Partition, Offset, MaxBytes}) -> {16, - <>}. + <>}. build_metadata_request([]) -> % Builds an empty metadata request that returns all topics and partitions - {4, <<0:32/unsigned-integer>>}; + {4, <<0:?UINT>>}; build_metadata_request(Topic) when is_binary(Topic) -> build_metadata_request([Topic]); build_metadata_request(Topics) -> TopicCount = length(Topics), {Size, BuiltTopics} = build_metadata_topics(Topics), {Size + 4, - [<>, + [<>, BuiltTopics]}. build_metadata_topics(Topics) -> @@ -319,7 +390,7 @@ build_metadata_topics([] = _Topics, {Size, IOList}) -> {Size, lists:reverse(IOList)}; build_metadata_topics([H | T] = _Partitions, {OldSize, IOList}) -> Size = byte_size(H), - Topic = [<>, H], + Topic = [<>, H], build_metadata_topics(T, {OldSize + Size + 2, [Topic | IOList]}). %%============================================================================== @@ -334,9 +405,9 @@ parse_produced_topics(Count, <<>>, Acc) when Count =< 0 -> parse_produced_topics(Count, Bin, Acc) when Count =< 0 -> lager:warning("Finished parsing produce response, ignoring bytes: ~p", [Bin]), {ok, lists:reverse(Acc)}; -parse_produced_topics(Count, <>, Acc) -> {ok, Partitions, Remainder} = parse_produced_partitions(PartitionCount, PartitionsBin), @@ -347,9 +418,9 @@ parse_produced_partitions(Count, Bin) -> parse_produced_partitions(Count, Bin, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Bin}; -parse_produced_partitions(Count, <>, Acc) -> PartitionData = {Partition, ErrorCode, Offset}, parse_produced_partitions(Count - 1, Remainder, [PartitionData | Acc]). @@ -377,9 +448,9 @@ parse_topics(Count, Bin, Acc) -> Error end. -parse_topic(<>) -> case parse_partitions(PartitionCount, PartitionsBin) of {ok, Partitions, Remainder} -> @@ -413,10 +484,10 @@ parse_partitions(Count, Bin, Acc) -> Error end. -parse_partition(<>) -> Partition = {PartitionId, HighwaterMarkOffset}, case parse_message_set(MessageSetSize, MessageSetBin) of @@ -426,8 +497,8 @@ parse_partition(<>) -> kafkerl_error:get_error_tuple(ErrorCode); parse_partition(<<>>) -> @@ -446,21 +517,21 @@ parse_message_set(RemainingSize, Bin, Acc) -> {incomplete, lists:reverse(Acc), {Bin, [{message_set, RemainingSize}]}} end. -parse_message(<<_Offset:64/unsigned-integer, - MessageSize:32/signed-integer, +parse_message(<<_Offset:?ULONG, + MessageSize:?INT, Message:MessageSize/binary, Remainder/binary>>) -> - <<_Crc:32/unsigned-integer, - _MagicByte:8/unsigned-integer, - _Attributes:8/unsigned-integer, + <<_Crc:?UINT, + _MagicByte:?UCHAR, + _Attributes:?UCHAR, KeyValue/binary>> = Message, KV = case KeyValue of - <> -> + <> -> {Key, Value}; % 4294967295 is -1 and it signifies an empty Key http://goo.gl/Ssl4wq - <<4294967295:32/unsigned-integer, - ValueSize:32/unsigned-integer, Value:ValueSize/binary>> -> + <<4294967295:?UINT, + ValueSize:?UINT, Value:ValueSize/binary>> -> Value end, % 12 is the size of the offset plus the size of the MessageSize int @@ -474,10 +545,10 @@ parse_brokers(Count, Bin) -> parse_brokers(Count, Bin, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Bin}; -parse_brokers(Count, <>, Acc) -> HostStr = binary_to_list(Host), parse_brokers(Count - 1, Remainder, [{Id, {HostStr, Port}} | Acc]). @@ -490,18 +561,18 @@ parse_topic_metadata(Count, <<>>, Acc) when Count =< 0 -> parse_topic_metadata(Count, Bin, Acc) when Count =< 0 -> lager:warning("Finished parsing topic metadata, ignoring bytes: ~p", [Bin]), {ok, lists:reverse(Acc)}; -parse_topic_metadata(Count, <<0:16/signed-integer, - TopicSize:16/unsigned-integer, +parse_topic_metadata(Count, <<0:?SHORT, + TopicSize:?USHORT, TopicName:TopicSize/binary, - PartitionCount:32/unsigned-integer, + PartitionCount:?UINT, PartitionsBin/binary>>, Acc) -> {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, PartitionsBin), TopicMetadata = {0, TopicName, PartitionsMetadata}, parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]); -parse_topic_metadata(Count, <>, Acc) -> {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(0, Remainder), TopicMetadata = {ErrorCode, <<"unkown">>, PartitionsMetadata}, @@ -512,13 +583,13 @@ parse_partition_metadata(Count, Bin) -> parse_partition_metadata(Count, Remainder, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Remainder}; -parse_partition_metadata(Count, <>, Acc) -> {ok, Replicas, Remainder} = parse_replica_metadata(ReplicaCount, ReplicasBin), - <> = Remainder, + <> = Remainder, {ok, Isr, IsrRemainder} = parse_isr_metadata(IsrCount, IsrBin), PartitionMetadata = {ErrorCode, Partition, Leader, Replicas, Isr}, parse_partition_metadata(Count - 1, IsrRemainder, [PartitionMetadata | Acc]). @@ -528,7 +599,7 @@ parse_replica_metadata(Count, Bin) -> parse_replica_metadata(Count, Remainder, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Remainder}; -parse_replica_metadata(Count, <>, Acc) -> parse_replica_metadata(Count - 1, Remainder, [Replica | Acc]). @@ -537,7 +608,7 @@ parse_isr_metadata(Count, Bin) -> parse_isr_metadata(Count, Remainder, Acc) when Count =< 0 -> {ok, lists:reverse(Acc), Remainder}; -parse_isr_metadata(Count, <>, Acc) -> parse_isr_metadata(Count - 1, Remainder, [Isr | Acc]). diff --git a/src/kafkerl_sup.erl b/src/kafkerl_sup.erl index b720d83..fd989b4 100644 --- a/src/kafkerl_sup.erl +++ b/src/kafkerl_sup.erl @@ -5,8 +5,6 @@ -export([start_link/0, init/1]). --include("kafkerl.hrl"). - -define(SERVER, ?MODULE). -type restart_strategy() :: {supervisor:strategy(), diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index b7d8f70..6b7b515 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -7,17 +7,14 @@ -export([buffer_name/2]). -export([gather_consume_responses/0, gather_consume_responses/1]). --include("kafkerl.hrl"). --include("kafkerl_consumers.hrl"). - %%============================================================================== %% API %%============================================================================== --spec send_error(callback(), any()) -> ok. +-spec send_error(kafkerl:callback(), any()) -> ok. send_error(Callback, Reason) -> send_event(Callback, {error, Reason}). --spec send_event(callback(), any()) -> ok | {error, {bad_callback, any()}}. +-spec send_event(kafkerl:callback(), any()) -> ok | {error, {bad_callback, any()}}. send_event({M, F}, Data) -> spawn(fun() -> M:F(Data) end), ok; @@ -33,18 +30,24 @@ send_event(Function, Data) when is_function(Function, 1) -> send_event(BadCallback, _Data) -> {error, {bad_callback, BadCallback}}. +default_tcp_options() -> + % This list has to be sorted + [{mode, binary}, {packet, 0}]. get_tcp_options(Options) -> % TODO: refactor - lists:ukeymerge(1, lists:sort(proplists:unfold(Options)), ?DEFAULT_TCP_OPTS). + UnfoldedOptions = proplists:unfold(Options), + lists:ukeymerge(1, lists:sort(UnfoldedOptions), default_tcp_options()). % This is rather costly, and for obvious reasons does not maintain the order of % the partitions or topics, but it does keep the order of the messages within a % specific topic-partition pair --spec merge_messages([basic_message()]) -> merged_message(). +-spec merge_messages([kafkerl_protocol:basic_message()]) -> + kafkerl_protocol:merged_message(). merge_messages(Topics) -> merge_topics(Topics). % Not as costly, but still avoid this in a place where performance is critical --spec split_messages(merged_message()) -> [basic_message()]. +-spec split_messages(kafkerl_protocol:merged_message()) -> + [kafkerl_protocol:basic_message()]. split_messages({Topic, {Partition, Messages}}) -> {Topic, Partition, Messages}; split_messages({Topic, Partitions}) -> @@ -64,7 +67,8 @@ valid_message(L) when is_list(L) -> valid_message(_Any) -> false. --spec buffer_name(topic(), partition()) -> atom(). +-spec buffer_name(kafkerl_protocol:topic(), kafkerl_protocol:partition()) -> + atom(). buffer_name(Topic, Partition) -> Bin = <>, binary_to_atom(Bin, utf8). From fc11c597450f3a04c2d141a8f33a73c800136c4e Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Thu, 21 Jan 2016 14:40:39 -0300 Subject: [PATCH 43/72] minor spec fixes --- src/kafkerl_protocol.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 763d719..6c80855 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -41,10 +41,10 @@ kafkerl:error(). -type messages() :: [{kafkerl:topic(), [{{kafkerl:partition(), integer()}, - [binary() | {binary(), binary()}]}]}]. + [binary() | {binary(), binary()}]}]}]. -type fetch_state() :: {binary(), integer(), [any()]} | void. --type fetch_response() :: {ok, integer(), messages()} | - {incomplete, integer(), messages(), fetch_state()} | +-type fetch_response() :: {ok, correlation_id(), messages()} | + {incomplete, correlation_id(), messages(), fetch_state()} | kafkerl:error(). % Compression From 677ffefbada59c511674b0bbc4b03fa9f6cd5f7e Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Mon, 25 Jan 2016 00:26:26 -0300 Subject: [PATCH 44/72] fixes and improvements --- src/kafkerl.erl | 15 +- src/kafkerl_broker_connection.erl | 234 +++++++++++++++++++----------- src/kafkerl_connector.erl | 135 ++++++++++------- src/kafkerl_protocol.erl | 15 +- src/kafkerl_utils.erl | 21 ++- 5 files changed, 262 insertions(+), 158 deletions(-) diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 32fb06e..ba92fc9 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -9,6 +9,8 @@ -export([version/0]). %% Types +-type offset() :: integer(). + -type callback() :: pid() | fun() | {atom(), atom()} | @@ -18,7 +20,7 @@ {consumer, callback()} | {min_bytes, integer()} | {max_wait, integer()} | - {offset, integer()} | + {offset, offset()} | {fetch_interval, false | integer()}. -type options() :: [option()]. -type server_ref() :: atom() | pid(). @@ -67,7 +69,7 @@ produce(ServerRef, Topic, Partition, Message, Options) -> consume(Topic, Partition) -> consume(?MODULE, Topic, Partition, []). --spec consume(topic(), partition(), options()) -> ok | [binary()] | error(); +-spec consume(topic(), partition(), options()) -> ok | error(); (server_ref(), topic(), partition()) -> ok | error(). consume(Topic, Partition, Options) when is_list(Options) -> consume(?MODULE, Topic, Partition, Options); @@ -75,13 +77,16 @@ consume(ServerRef, Topic, Partition) -> consume(ServerRef, Topic, Partition, []). -spec consume(server_ref(), topic(), partition(), options()) -> - ok | [binary()] | error(). + ok | {[payload()], offset()} | error(). consume(ServerRef, Topic, Partition, Options) -> - case lists:keyfind(consumer, 1, Options) of - false -> + case {proplists:get_value(consumer, Options, undefined), + proplists:get_value(fetch_interval, Options, false)} of + {undefined, false} -> NewOptions = [{consumer, self()} | Options], kafkerl_connector:fetch(ServerRef, Topic, Partition, NewOptions), kafkerl_utils:gather_consume_responses(); + {undefined, _} -> + {error, fetch_interval_specified_with_no_consumer}; _ -> kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) end. diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 0cccbfb..f9bb48a 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -25,23 +25,27 @@ options = undefined :: kafkerl:options(), state = void :: kafkerl_protocol:fetch_state()}). --record(state, {name = undefined :: atom(), - buffers = [] :: [atom()], - conn_idx = undefined :: conn_idx(), - client_id = undefined :: binary(), - socket = undefined :: port(), - address = undefined :: kafkerl_connector:address(), - connector = undefined :: pid(), - tref = undefined :: any(), - tcp_options = [] :: [any()], - max_retries = 0 :: integer(), - retry_interval = 0 :: integer(), - request_number = 0 :: integer(), - pending_requests = [] :: [integer()], - max_time_queued = 0 :: integer(), - ets = undefined :: atom(), - fetches = [] :: [#fetch{}], - current_fetch = void :: void | kafkerl_protocol:correlation_id()}). +-record(state, {name = undefined :: atom(), + buffers = [] :: [atom()], + conn_idx = undefined :: conn_idx(), + client_id = undefined :: binary(), + socket = undefined :: port(), + address = undefined :: kafkerl_connector:address(), + connector = undefined :: pid(), + tref = undefined :: any(), + tcp_options = [] :: [any()], + max_retries = 0 :: integer(), + retry_interval = 0 :: integer(), + request_number = 0 :: integer(), + pending_requests = [] :: [integer()], + max_time_queued = 0 :: integer(), + ets = undefined :: atom(), + fetches = [] :: [#fetch{}], + current_fetch = void :: kafkerl_protocol:correlation_id() | + void, + scheduled_fetches = [] :: [{{kafkerl:topic(), + kafkerl:partition()}, + timer:tref()}]}). -type state() :: #state{}. %%============================================================================== @@ -68,11 +72,13 @@ add_buffer(ServerRef, Buffer) -> clear_buffers(ServerRef) -> gen_server:call(ServerRef, {clear_buffers}). --spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> ok. +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), + kafkerl:options()) -> ok | kafkerl:error(). fetch(ServerRef, Topic, Partition, Options) -> gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). --spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> ok. +-spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> + ok. stop_fetch(ServerRef, Topic, Partition) -> gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). @@ -108,7 +114,7 @@ handle_info({flush, Time}, State) -> {ok, _Tref} = queue_flush(Time), handle_flush(State); handle_info(Msg, State = #state{name = Name}) -> - _ = lager:notice("~p received unexpected info message: ~p on ~p", [Name, Msg]), + _ = lager:notice("~p got unexpected info message: ~p on ~p", [Name, Msg]), {noreply, State}. % Boilerplate @@ -143,9 +149,9 @@ init([Id, Connector, Address, Config, Name]) -> {ok, _Tref} = queue_flush(MaxTimeQueued), {ok, State}; {errors, Errors} -> - lists:foreach(fun(E) -> - _ = lager:critical("broker connection config error ~p", [E]) - end, Errors), + ok = lists:foreach(fun(E) -> + _ = lager:critical("configuration error: ~p", [E]) + end, Errors), {stop, bad_config} end. @@ -184,54 +190,89 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, handle_fetch(ServerRef, Topic, Partition, Options, State = #state{fetches = Fetches, client_id = ClientId, - socket = Socket, name = Name}) -> - {ok, CorrelationId, NewState} = build_correlation_id(State), - Offset = proplists:get_value(offset, Options, 0), - Request = {Topic, {Partition, Offset, 2147483647}}, - MaxWait = proplists:get_value(max_wait, Options), - MinBytes = proplists:get_value(min_bytes, Options), - Payload = kafkerl_protocol:build_fetch_request(Request, - ClientId, - CorrelationId, - MaxWait, - MinBytes), - case gen_tcp:send(Socket, Payload) of - {error, Reason} -> - _ = lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - ok = gen_tcp:close(Socket), - {reply, {error, no_connection}, handle_tcp_close(State)}; - ok -> - _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), - NewFetch = #fetch{correlation_id = CorrelationId, - server_ref = ServerRef, - topic = Topic, - partition = Partition, - options = Options}, - {reply, ok, NewState#state{fetches = [NewFetch | Fetches]}} - end; -handle_fetch(_ServerRef, _Topic, _Partition, _Options, State) -> - {reply, {error, fetch_in_progress}, State}. + socket = Socket, name = Name, + scheduled_fetches = ScheduledFetches}) -> + Scheduled = proplists:get_bool(scheduled, Options), + case {get_fetch(Topic, Partition, Fetches), + lists:keytake({Topic, Partition}, 1, ScheduledFetches), + Scheduled} of + % An scheduled fetch we can't identify? We ignore it + {_, false, true} -> + lager:warning("ignoring unknown scheduled fetch"), + {reply, ok, State}; + % We are already fetching that topic/partition pair + {#fetch{}, _, false} -> + {reply, {error, fetch_in_progress}, State}; + % We have a scheduled fetch for that topic/partition pair and this is not an + % scheduled fetch + {not_found, Tuple, false} when is_tuple(Tuple) -> + {reply, {error, fetch_in_progress}, State}; + % We have a valid fetch request! + {not_found, KeyTakeResult, Scheduled} -> + {ok, CorrelationId, NewState} = build_correlation_id(State), + Offset = proplists:get_value(offset, Options, 0), + Request = {Topic, {Partition, Offset, 2147483647}}, + MaxWait = proplists:get_value(max_wait, Options), + MinBytes = proplists:get_value(min_bytes, Options), + Payload = kafkerl_protocol:build_fetch_request(Request, + ClientId, + CorrelationId, + MaxWait, + MinBytes), + case gen_tcp:send(Socket, Payload) of + {error, Reason} -> + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + ok = gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(State)}; + ok -> + _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), + NewFetch = #fetch{correlation_id = CorrelationId, + server_ref = ServerRef, + topic = Topic, + partition = Partition, + options = Options}, + NewScheduledFetches = case KeyTakeResult of + false -> ScheduledFetches; + {_, _, List} -> List + end, + {reply, ok, NewState#state{fetches = [NewFetch | Fetches], + scheduled_fetches = NewScheduledFetches}} + end + end. handle_stop_fetch(Topic, Partition, State) -> - % Leave current fetch as it is - NewFetches = remove_fetch(Topic, Partition, State#state.fetches), - {reply, ok, State#state{fetches = NewFetches}}. + % Cancel any timers we have for scheduled fetches + case lists:keytake({Topic, Partition}, 1, State#state.scheduled_fetches) of + false -> + NewFetches = remove_fetch(Topic, Partition, false, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches}}; + {value, {{Topic, Partition}, TRef}, NewScheduledFetches} -> + _ = timer:cancel(TRef), + NewFetches = remove_fetch(Topic, Partition, force, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches, + scheduled_fetches = NewScheduledFetches}} + end. -remove_fetch(Topic, Partition, CurrentFetches) -> - remove_fetch(Topic, Partition, CurrentFetches, []). -remove_fetch(_Topic, _Partition, [], Acc) -> +remove_fetch(Topic, Partition, Force, CurrentFetches) -> + remove_fetch(Topic, Partition, Force, CurrentFetches, []). + +remove_fetch(_Topic, _Partition, _Force, [], Acc) -> Acc; -remove_fetch(Topic, Partition, +remove_fetch(Topic, Partition, force, + [#fetch{topic = Topic, partition = Partition} | T], Acc) -> + % If we are forcing the removal, just remove the fetch + Acc ++ T; +remove_fetch(Topic, Partition, _, [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> % Clearing the fetch options ensures this fetch will stop sending any messages % since there is no consumer. This also removes the fetch_interval so it won't % be requested again. % Simply removing the fetch here doesn't work since we will still get a server - % response, but we will not be able to properly handle it. + % response, but we won't be able to handle it. [Fetch#fetch{options = []} | Acc] ++ T; -remove_fetch(Topic, Partition, [H | T], Acc) -> - remove_fetch(Topic, Partition, T, [H | Acc]). +remove_fetch(Topic, Partition, Force, [H | T], Acc) -> + remove_fetch(Topic, Partition, Force, T, [H | Acc]). % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, @@ -253,8 +294,9 @@ handle_tcp_data(Bin, State = #state{fetches = Fetches, handle_produce_response(Bin, State) end. -handle_fetch_response(Bin, Fetch, State = #state{name = Name, - fetches = Fetches}) -> +handle_fetch_response(Bin, Fetch, + State = #state{fetches = Fetches, + scheduled_fetches = ScheduledFetches}) -> Options = Fetch#fetch.options, Consumer = proplists:get_value(consumer, Options), case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of @@ -266,24 +308,29 @@ handle_fetch_response(Bin, Fetch, State = #state{name = Name, [] -> []; _ -> [{consumed, Messages}, {offset, Offset}] end), + NewFetches = lists:delete(Fetch, Fetches), + NewState = State#state{current_fetch = void, fetches = NewFetches}, case proplists:get_value(fetch_interval, Options, false) of - false -> {ok, State#state{current_fetch = void}}; - Interval -> - NewOptions = lists:keyreplace(offset, 1, Options, {offset, Offset}), - Arguments = [Fetch#fetch.server_ref, Fetch#fetch.topic, - Fetch#fetch.partition, NewOptions], - _ = timer:apply_after(Interval, ?MODULE, fetch, Arguments), - {ok, State#state{current_fetch = void, - fetches = lists:delete(Fetch, Fetches)}} + false -> + {ok, NewState}; + Interval -> + NewOptions = kafkerl_utils:proplists_set(Options, [{scheduled, true}, + {offset, Offset}]), + Topic = Fetch#fetch.topic, + Partition = Fetch#fetch.partition, + ServerRef = Fetch#fetch.server_ref, + Arguments = [ServerRef, Topic, Partition, NewOptions], + {ok, Tref} = timer:apply_after(Interval, ?MODULE, fetch, Arguments), + NewScheduledFetches = [{{Topic, Partition}, Tref} | ScheduledFetches], + {ok, NewState#state{scheduled_fetches = NewScheduledFetches}} end; {incomplete, CorrelationId, Data, NewFetchState} -> ok = case Data of - [{_, [{_, Messages}]}] -> + [{_, [{_, Messages = [_ | _]}]}] -> send_messages(Consumer, {consumed, Messages}); _ -> % On some cases, kafka will return an incomplete response with no - % messages, in this case since we don't have anything to send, we - % just need to update the fetch state. + % messages, but we shouldn't send the empty message list. ok end, {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | @@ -291,7 +338,8 @@ handle_fetch_response(Bin, Fetch, State = #state{name = Name, current_fetch = CorrelationId}}; Error -> ok = send_messages(Consumer, Error), - {ok, State#state{current_fetch = void, fetches = lists:delete(Fetch, Fetches)}} + NewFetches = lists:delete(Fetch, Fetches), + {ok, State#state{current_fetch = void, fetches = NewFetches}} end. handle_produce_response(Bin, State = #state{connector = Connector, name = Name, @@ -303,9 +351,10 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, ets:delete(EtsName, CorrelationId), {Errors, Successes} = split_errors_and_successes(Topics), % First, send the offsets and messages that were delivered - spawn(fun() -> - notify_success_to_connector(Successes, Messages, Connector) - end), + _ = spawn(fun() -> + notify_success(Successes, Messages, Connector) + end), + % Then handle the errors case handle_errors(Errors, Messages, Name) of ignore -> {ok, State}; @@ -315,8 +364,7 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, {ok, State} end; _ -> - _ = lager:warning("~p was unable to properly process produce response", - [Name]), + _ = lager:warning("~p was unable to get produce response", [Name]), {error, invalid_produce_response} end; Other -> @@ -332,14 +380,14 @@ resend_messages(Messages, Connector) -> F = fun(M) -> kafkerl_connector:send(Connector, M, []) end, lists:foreach(F, Messages). -notify_success_to_connector([], _Messages, _Pid) -> +notify_success([], _Messages, _Pid) -> ok; -notify_success_to_connector([{Topic, Partition, Offset} | T], Messages, Pid) -> +notify_success([{Topic, Partition, Offset} | T], Messages, Pid) -> MergedMessages = kafkerl_utils:merge_messages(Messages), Partitions = partitions_in_topic(Topic, MergedMessages), M = messages_in_partition(Partition, Partitions), kafkerl_connector:produce_succeeded(Pid, {Topic, Partition, M, Offset}), - notify_success_to_connector(T, Messages, Pid). + notify_success(T, Messages, Pid). partitions_in_topic(Topic, Messages) -> lists:flatten([P || {T, P} <- Messages, T =:= Topic]). @@ -389,8 +437,9 @@ handle_error({Topic, Partition, Error}, Messages, Name) Message -> {true, Message} end; handle_error({Topic, Partition, Error}, _Messages, Name) -> + ErrorName = kafkerl_error:get_error_name(Error), _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", - [Name, kafkerl_error:get_error_name(Error), Topic, Partition]), + [Name, ErrorName, Topic, Partition]), false. get_message_for_error(Topic, Partition, SavedMessages, Name) -> @@ -422,7 +471,8 @@ connect(Pid, Name, TCPOpts, {Host, Port} = Address, Timeout, Retries) -> Pid ! {connected, Socket}; {error, Reason} -> NewRetries = Retries - 1, - _ = lager:warning("~p can't connect to ~p:~p. Reason: ~p, ~p retries left", + _ = lager:warning("~p unable to connect to ~p:~p. Reason: ~p + (~p retries left)", [Name, Host, Port, Reason, NewRetries]), timer:sleep(Timeout), connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) @@ -457,11 +507,19 @@ parse_correlation_id(Bin, CorrelationId) -> get_fetch(_CorrelationId, []) -> not_found; -get_fetch(CorrelationId, [Fetch = #fetch{correlation_id = CorrelationId} | _T]) -> - Fetch; -get_fetch(CorrelationId, [_ | T]) -> +get_fetch(CorrelationId, [H = #fetch{correlation_id = CorrelationId} | _T]) -> + H; +get_fetch(CorrelationId, [_H | T]) -> get_fetch(CorrelationId, T). +get_fetch(_Topic, _Partition, []) -> + not_found; +get_fetch(Topic, Partition, [H = #fetch{topic = Topic, + partition = Partition} | _T]) -> + H; +get_fetch(Topic, Partition, [_H | T]) -> + get_fetch(Topic, Partition, T). + send_messages(_Consumer, []) -> ok; send_messages(Consumer, [Event | T]) -> diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 089818e..e15e23b 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -10,7 +10,7 @@ % Produce -export([send/3]). % Consume --export([fetch/4]). +-export([fetch/4, stop_fetch/3]). % Common -export([subscribe/2, subscribe/3, unsubscribe/2]). % Only for internal use @@ -34,20 +34,20 @@ -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. --record(state, {brokers = [] :: [address()], - broker_mapping = void :: [broker_mapping()] | void, - client_id = <<>> :: kafkerl_protocol:client_id(), - max_metadata_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), - config = [] :: {atom(), any()}, - autocreate_topics = false :: boolean(), - callbacks = [] :: [{filters(), kafkerl:callback()}], - known_topics = [] :: [binary()], - pending = [] :: [kafkerl:basic_message()], - last_metadata_request = 0 :: integer(), - metadata_request_cd = 0 :: integer(), - last_dump_name = {"", 0} :: {string(), integer()}, - default_fetch_options = [] :: kafkerl:options()}). +-record(state, {brokers = [] :: [address()], + broker_mapping = void :: [broker_mapping()] | void, + client_id = <<>> :: kafkerl_protocol:client_id(), + max_metadata_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + config = [] :: {atom(), any()}, + autocreate_topics = false :: boolean(), + callbacks = [] :: [{filters(), kafkerl:callback()}], + known_topics = [] :: [binary()], + pending = [] :: [kafkerl:basic_message()], + last_metadata_request = 0 :: integer(), + metadata_request_cd = 0 :: integer(), + last_dump_name = {"", 0} :: {string(), integer()}, + default_fetch_options = [] :: kafkerl:options()}). -type state() :: #state{}. -export_type([address/0]). @@ -72,15 +72,20 @@ send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> ok end; Error -> - lager:debug("unable to send message to ~p, reason: ~p", [Buffer, Error]), + _ = lager:debug("unable to write on ~p, reason: ~p", [Buffer, Error]), gen_server:call(ServerRef, {send, Message}) end. --spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> - ok. +-spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), + kafkerl:options()) -> ok | kafkerl:error(). fetch(ServerRef, Topic, Partition, Options) -> gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). +-spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> + ok. +stop_fetch(ServerRef, Topic, Partition) -> + gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). + -spec get_partitions(kafkerl:server_ref()) -> [{kafkerl:topic(), [kafkerl:partition()]}] | kafkerl:error(). get_partitions(ServerRef) -> @@ -91,7 +96,8 @@ get_partitions(ServerRef) -> Error end. --spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok | kafkerl:error(). +-spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> + ok | kafkerl:error(). subscribe(ServerRef, Callback) -> subscribe(ServerRef, Callback, all). -spec subscribe(kafkerl:server_ref(), kafkerl:callback(), filters()) -> @@ -132,15 +138,17 @@ handle_call({dump_buffer_to_disk, Buffer, Options}, _From, State) -> {DumpNameStr, _} = DumpName = get_ets_dump_name(State#state.last_dump_name), AllMessages = ets_buffer:read_all(Buffer), FilePath = proplists:get_value(dump_location, Options, "") ++ DumpNameStr, - ok = case file:write_file(FilePath, term_to_binary(AllMessages)) of - ok -> lager:debug("Dumped unsent messages at ~p", [FilePath]); - Error -> lager:critical("Unable to save messages, reason: ~p", [Error]) - end, + _ = case file:write_file(FilePath, term_to_binary(AllMessages)) of + ok -> lager:debug("Dumped unsent messages at ~p", [FilePath]); + Error -> lager:critical("Unable to save messages, reason: ~p", [Error]) + end, {reply, ok, State#state{last_dump_name = DumpName}}; handle_call({send, Message}, _From, State) -> handle_send(Message, State); handle_call({fetch, Topic, Partition, Options}, _From, State) -> {reply, handle_fetch(Topic, Partition, Options, State), State}; +handle_call({stop_fetch, Topic, Partition}, _From, State) -> + {reply, handle_stop_fetch(Topic, Partition, State), State}; handle_call({request_metadata}, _From, State) -> {reply, ok, handle_request_metadata(State, [])}; handle_call({request_metadata, Forced}, _From, State) when is_boolean(Forced) -> @@ -170,7 +178,7 @@ handle_info({metadata_updated, []}, State) -> handle_info({metadata_updated, Mapping}, State) -> % Create the topic mapping (this also starts the broker connections) NewBrokerMapping = get_broker_mapping(Mapping, State), - lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), + _ = lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), % Get the partition data to send to the subscribers and send it PartitionData = get_partitions_from_mapping(NewBrokerMapping), Callbacks = State#state.callbacks, @@ -178,7 +186,7 @@ handle_info({metadata_updated, Mapping}, State) -> % Add to the list of known topics NewTopics = lists:sort([T || {T, _P} <- PartitionData]), NewKnownTopics = lists:umerge(NewTopics, State#state.known_topics), - lager:debug("Known topics: ~p", [NewKnownTopics]), + _ = lager:debug("Known topics: ~p", [NewKnownTopics]), % Reverse the pending messages and try to send them again RPending = lists:reverse(State#state.pending), ok = lists:foreach(fun(P) -> send(self(), P, []) end, RPending), @@ -189,11 +197,11 @@ handle_info({'DOWN', Ref, process, _, normal}, State) -> true = demonitor(Ref), {noreply, State}; handle_info({'DOWN', Ref, process, _, Reason}, State) -> - lager:error("metadata request failed, reason: ~p", [Reason]), + _ = lager:error("metadata request failed, reason: ~p", [Reason]), true = demonitor(Ref), {noreply, handle_request_metadata(State, [], true)}; handle_info(Msg, State) -> - lager:notice("Unexpected info message received: ~p on ~p", [Msg, State]), + _ = lager:notice("Unexpected info message received: ~p on ~p", [Msg, State]), {noreply, State}. -spec handle_cast(any(), state()) -> {noreply, state()}. @@ -238,44 +246,51 @@ init([Config]) -> {ok, State}; {errors, Errors} -> lists:foreach(fun(E) -> - lager:critical("Connector config error ~p", [E]) + _ = lager:critical("Connector config error ~p", [E]) end, Errors), {stop, bad_config} end. handle_send(Message, State = #state{autocreate_topics = false}) -> + lager:critical("a.1 ~p", [Message]), % The topic didn't exist, ignore {Topic, _Partition, Payload} = Message, - lager:error("Dropping ~p sent to non existing topic ~p", [Payload, Topic]), - {reply, ok, State}; + _ = lager:error("Dropped ~p sent to non existing topic ~p", [Payload, Topic]), + {reply, {error, non_existing_topic}, State}; handle_send(Message, State = #state{broker_mapping = void, pending = Pending}) -> - % Maybe have a new buffer + lager:critical("b.1 ~p", [Message]), + % We should consider saving this to a new buffer instead of using the state. {reply, ok, State#state{pending = [Message | Pending]}}; handle_send(Message, State = #state{broker_mapping = Mapping, pending = Pending, known_topics = KnownTopics}) -> + lager:critical("c.1 ~p", [Message]), {Topic, Partition, Payload} = Message, case lists:any(fun({K, _}) -> K =:= {Topic, Partition} end, Mapping) of true -> - % We need to check if the topic/partition pair exists, this is because the - % ets takes some time to start, so some messages could be lost. - % Therefore if we have the topic/partition, just send it again (the order - % will suffer though) - send(self(), Message, []), + % The ets takes some time to be available after being created, so we check + % if the topic/partition pair is in the mapping and if it does, we know we + % just need to send it again. The order is not guaranteed in this case, so + % if that's a concern, don't rely on autocreate_topics (besides, don't use + % autocreate_topics on production since it opens another can of worms). + ok = send(self(), Message, []), {reply, ok, State}; false -> - % Now, if the topic/partition was not valid, we need to check if the topic - % exists, if it does, just drop the message as we can assume no partitions - % are created. + % However, if the topic/partition pair does not exist, we need to check if + % the topic exists. If the topic exists, we drop the message because kafka + % can't add partitions on the fly. case lists:any(fun({{T, _}, _}) -> T =:= Topic end, Mapping) of true -> - lager:error("Dropping ~p sent to topic ~p, partition ~p", - [Payload, Topic, Partition]), - {reply, ok, State}; + _ = lager:error("Dropped ~p sent to topic ~p, partition ~p", + [Payload, Topic, Partition]), + {reply, {error, bad_partition}, State}; false -> NewKnownTopics = lists:umerge([Topic], KnownTopics), NewState = State#state{pending = [Message | Pending]}, - {reply, ok, handle_request_metadata(NewState, NewKnownTopics)} + lager:critical("X"), + R={reply, ok, handle_request_metadata(NewState, NewKnownTopics)}, + lager:critical("X2"), + R end end. @@ -290,6 +305,18 @@ handle_fetch(Topic, Partition, Options, State) -> kafkerl_broker_connection:fetch(Broker, Topic, Partition, NewOptions) end. +handle_stop_fetch(_Topic, _Partition, #state{broker_mapping = void}) -> + % Ignore, there's no fetch in progress + ok; +handle_stop_fetch(Topic, Partition, State) -> + case lists:keyfind({Topic, Partition}, 1, State#state.broker_mapping) of + false -> + % Ignore, there's no fetch in progress + ok; + {_, Broker} -> + kafkerl_broker_connection:stop_fetch(Broker, Topic, Partition) + end. + handle_get_partitions(#state{broker_mapping = void}) -> {error, not_available}; handle_get_partitions(#state{broker_mapping = Mapping}) -> @@ -355,7 +382,7 @@ do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> do_request_metadata([], _TCPOpts, _Request) -> {error, all_down}; do_request_metadata([{Host, Port} = _Broker | T], TCPOpts, Request) -> - lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), % Connect to the Broker case gen_tcp:connect(Host, Port, TCPOpts) of {error, Reason} -> @@ -435,12 +462,12 @@ expand_topic({?NO_ERROR, Topic, Partitions}) -> {true, {Topic, Partitions}}; expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> % Replica not available can be ignored, still, show a warning - lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), + _ = lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), {true, {Topic, Partitions}}; expand_topic({Error, Topic, _Partitions}) -> - lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), + _ = lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), {true, {Topic, []}}. expand_partitions(Metadata) -> @@ -453,13 +480,13 @@ expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, _, _} | T]}, Acc) -> - lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), + _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), ExpandedPartition = {{Topic, Partition}, Leader}, expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), + _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> @@ -530,5 +557,5 @@ get_timestamp() -> %% Error handling %%============================================================================== warn_metadata_request(Host, Port, Reason) -> - lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). + _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). \ No newline at end of file diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 6c80855..5d5d482 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -18,11 +18,13 @@ %% Requests -type client_id() :: binary(). -type merged_message() :: kafkerl:basic_message() | - {kafkerl:topic(), [{kafkerl:partition(), kafkerl:payload()}]} | + {kafkerl:topic(), + [{kafkerl:partition(), kafkerl:payload()}]} | [merged_message()]. -type fetch_offset() :: integer(). -type fetch_max_bytes() :: integer(). --type fetch_partition() :: {kafkerl:partition(), fetch_offset(), fetch_max_bytes()} | +-type fetch_partition() :: {kafkerl:partition(), fetch_offset(), + fetch_max_bytes()} | [fetch_partition()]. -type fetch_request() :: {kafkerl:topic(), fetch_partition()} | [fetch_request()]. @@ -35,7 +37,8 @@ -type isr() :: integer(). -type partition_metadata() :: {error_code(), kafkerl:partition(), broker_id(), [replica()], [isr()]}. --type topic_metadata() :: {error_code(), kafkerl:topic(), [partition_metadata()]}. +-type topic_metadata() :: {error_code(), kafkerl:topic(), + [partition_metadata()]}. -type metadata() :: {[broker()], [topic_metadata()]}. -type metadata_response() :: {ok, correlation_id(), metadata()} | kafkerl:error(). @@ -44,7 +47,8 @@ [binary() | {binary(), binary()}]}]}]. -type fetch_state() :: {binary(), integer(), [any()]} | void. -type fetch_response() :: {ok, correlation_id(), messages()} | - {incomplete, correlation_id(), messages(), fetch_state()} | + {incomplete, correlation_id(), messages(), + fetch_state()} | kafkerl:error(). % Compression @@ -81,7 +85,8 @@ %% API %%============================================================================== % Message building --spec build_produce_request(merged_message(), client_id(), correlation_id()) -> iodata(). +-spec build_produce_request(merged_message(), client_id(), correlation_id()) -> + iodata(). build_produce_request(Data, ClientId, CorrelationId) -> build_produce_request(Data, ClientId, CorrelationId, ?COMPRESSION_NONE). -spec build_produce_request(merged_message(), client_id(), correlation_id(), diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 6b7b515..8eac465 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -6,6 +6,7 @@ -export([merge_messages/1, split_messages/1, valid_message/1]). -export([buffer_name/2]). -export([gather_consume_responses/0, gather_consume_responses/1]). +-export([proplists_set/2]). %%============================================================================== %% API @@ -73,6 +74,17 @@ buffer_name(Topic, Partition) -> Bin = <>, binary_to_atom(Bin, utf8). +-type proplist_value() :: {atom(), any()}. +-type proplist() :: [proplist_value]. +-spec proplists_set(proplist(), proplist_value() | [proplist_value()]) -> + proplist(). +proplists_set(Proplist, {K, _V} = NewValue) -> + lists:keyreplace(K, 1, Proplist, NewValue); +proplists_set(Proplist, []) -> + Proplist; +proplists_set(Proplist, [H | T]) -> + proplists_set(proplists_set(Proplist, H), T). + %%============================================================================== %% Utils %%============================================================================== @@ -141,15 +153,12 @@ gather_consume_responses(Timeout) -> gather_consume_responses(Timeout, []). gather_consume_responses(Timeout, Acc) -> receive - {message_count, _} -> - % Ignore this one - gather_consume_responses(Timeout, Acc); {consumed, Messages} -> gather_consume_responses(Timeout, Acc ++ Messages); - {consume_done, Messages} -> - Acc ++ Messages; + {offset, Offset} -> + {Acc, Offset}; {error, _Reason} = Error -> Error after Timeout -> - {error, {no_response, Acc}} + [] end. \ No newline at end of file From 1090f34741f35e7c57f7a0aae4efa6b7d569742a Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Mon, 25 Jan 2016 12:21:52 -0300 Subject: [PATCH 45/72] updated readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f870605..5e437ff 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -kafkerl v1.0.2 +kafkerl v2.0.0 ============== [![Gitter](https://badges.gitter.im/Join Chat.svg)](https://gitter.im/HernanRivasAcosta/kafkerl?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -Apache Kafka 0.8 high performance producer for erlang. -Developed thanks to the support and sponsorship of [TigerText](http://www.tigertext.com/). +Apache Kafka 0.8.2 high performance producer/consumer for erlang. +Developed thanks to the support and sponsorship of [TigerText](http://www.tigertext.com/) and [Inaka](https://github.com/inaka/). ##Features (aka, why kafkerl?) - Fast binary creation. @@ -12,12 +12,12 @@ Developed thanks to the support and sponsorship of [TigerText](http://www.tigert - Messages are not lost but cached before sending to kafka. - Handles server side errors and broker/leadership changes. - Flexible API allows consumer of messages to define pids, funs or M:F pairs as callbacks for the received messages. + - Simple yet flexible consumer API to retrieve the messages from Kafka. ##Missing features (aka, what I am working on but haven't finished yet) - - Though the library can parse kafka messages, the consumers are not implemented in this version. - There is no communication with Zookeeper. - Tests suites. -Special thanks to [@nitzanharel](https://github.com/nitzanharel) who found some really nasty bugs and helped me understand the subtleties of kafka's design and to the rest of the [TigerText](http://www.tigertext.com/) team for their support and code reviews. +Special thanks to [@nitzanharel](https://github.com/nitzanharel) who found some really nasty bugs and helped me understand the subtleties of kafka's design and to the rest of the [TigerText](http://www.tigertext.com/) and [Inaka](https://github.com/inaka/) teams for their support and code reviews. \ No newline at end of file From b486bc1768b20f110acf46d19e8d9040d299d17c Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 8 Apr 2016 11:30:17 -0300 Subject: [PATCH 46/72] fixed a parsing error for some metadata responses --- src/kafkerl_protocol.erl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 5d5d482..f05b396 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -566,21 +566,22 @@ parse_topic_metadata(Count, <<>>, Acc) when Count =< 0 -> parse_topic_metadata(Count, Bin, Acc) when Count =< 0 -> lager:warning("Finished parsing topic metadata, ignoring bytes: ~p", [Bin]), {ok, lists:reverse(Acc)}; -parse_topic_metadata(Count, <<0:?SHORT, - TopicSize:?USHORT, - TopicName:TopicSize/binary, +parse_topic_metadata(Count, <>, Acc) -> {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, PartitionsBin), - TopicMetadata = {0, TopicName, PartitionsMetadata}, + TopicMetadata = {ErrorCode, <<"unknown">>, PartitionsMetadata}, parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]); parse_topic_metadata(Count, <>, Acc) -> - {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(0, Remainder), - TopicMetadata = {ErrorCode, <<"unkown">>, PartitionsMetadata}, + TopicSize:?USHORT, + TopicName:TopicSize/binary, + PartitionCount:?UINT, + PartitionsBin/binary>>, Acc) -> + {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, + PartitionsBin), + TopicMetadata = {ErrorCode, TopicName, PartitionsMetadata}, parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]). parse_partition_metadata(Count, Bin) -> From cabd9e1d97339d509ffbcc93b7abcf1581be14a8 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Tue, 5 Apr 2016 16:45:30 -0300 Subject: [PATCH 47/72] Revamped the way kafkerl handles messages that need to be written to disk --- rel/kafkerl.app.config | 8 +- src/kafkerl.erl | 22 +--- src/kafkerl_connector.erl | 213 ++++++++++++++++++------------- src/kafkerl_metadata_handler.erl | 58 +++++++++ src/kafkerl_utils.erl | 6 +- 5 files changed, 196 insertions(+), 111 deletions(-) create mode 100644 src/kafkerl_metadata_handler.erl diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 2542515..241a2b1 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -10,11 +10,15 @@ {broker_tcp_timeout, 1000}, {max_metadata_retries, -1}, {assume_autocreate_topics, true}, - {metadata_tcp_timeout, 1000}, + {metadata_tcp_timeout, 5000}, {max_queue_size, 20}, % In items, per topic/partition {max_time_queued, 5}, % In seconds {metadata_request_cooldown, 1500}, % In milliseconds {consumer_min_bytes, 1}, - {consumer_max_wait, 1500}]}, + {consumer_max_wait, 1500}, + {dump_location, "dumps/"}, + {flush_to_disk_every, 20000}, % In milliseconds + {max_buffer_size, 5000}, + {save_messages_for_bad_topics, true}]}, {topics, [test1, test2, test3]}, {tests, [{kafka_installation, "~/kafka"}]}]}]. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index ba92fc9..27238e2 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -2,7 +2,7 @@ -author('hernanrivasacosta@gmail.com'). -export([start/0, start/2]). --export([produce/3, produce/4, produce/5, +-export([produce/3, consume/2, consume/3, consume/4, stop_consuming/2, stop_consuming/3, request_metadata/0, request_metadata/1, request_metadata/2, partitions/0, partitions/1]). @@ -16,7 +16,6 @@ {atom(), atom()} | {atom(), atom(), [any()]}. -type option() :: {buffer_size, integer() | infinity} | - {dump_location, string()} | {consumer, callback()} | {min_bytes, integer()} | {max_wait, integer()} | @@ -51,19 +50,8 @@ start(_StartType, _StartArgs) -> %% Produce API -spec produce(topic(), partition(), payload()) -> ok. produce(Topic, Partition, Message) -> - produce(?MODULE, Topic, Partition, Message, []). - --spec produce(server_ref(), topic(), partition(), payload()) -> ok; - (topic(), partition(), payload(), options()) -> ok. -produce(Topic, Partition, Message, Options) when is_list(Options) -> - produce(?MODULE, {Topic, Partition, Message}, Options); -produce(ServerRef, Topic, Partition, Message) -> - produce(ServerRef, {Topic, Partition, Message}, []). - --spec produce(server_ref(), topic(), partition(), payload(), options()) -> ok. -produce(ServerRef, Topic, Partition, Message, Options) -> - kafkerl_connector:send(ServerRef, {Topic, Partition, Message}, Options). - + kafkerl_connector:send({Topic, Partition, Message}). + %% Consume API -spec consume(topic(), partition()) -> ok | error(). consume(Topic, Partition) -> @@ -104,13 +92,13 @@ stop_consuming(ServerRef, Topic, Partition) -> request_metadata() -> request_metadata(?MODULE). --spec request_metadata(atom() | [topic()]) -> ok. +-spec request_metadata(server_ref() | [topic()]) -> ok. request_metadata(Topics) when is_list(Topics) -> request_metadata(?MODULE, Topics); request_metadata(ServerRef) -> kafkerl_connector:request_metadata(ServerRef). --spec request_metadata(atom(), [topic()]) -> ok. +-spec request_metadata(server_ref(), [topic()]) -> ok. request_metadata(ServerRef, Topics) -> kafkerl_connector:request_metadata(ServerRef, Topics). diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index e15e23b..0aeab59 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -8,7 +8,7 @@ -export([request_metadata/1, request_metadata/2, request_metadata/3, get_partitions/1]). % Produce --export([send/3]). +-export([send/1]). % Consume -export([fetch/4, stop_fetch/3]). % Common @@ -37,17 +37,16 @@ -record(state, {brokers = [] :: [address()], broker_mapping = void :: [broker_mapping()] | void, client_id = <<>> :: kafkerl_protocol:client_id(), - max_metadata_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), config = [] :: {atom(), any()}, autocreate_topics = false :: boolean(), callbacks = [] :: [{filters(), kafkerl:callback()}], known_topics = [] :: [binary()], - pending = [] :: [kafkerl:basic_message()], - last_metadata_request = 0 :: integer(), - metadata_request_cd = 0 :: integer(), last_dump_name = {"", 0} :: {string(), integer()}, - default_fetch_options = [] :: kafkerl:options()}). + default_fetch_options = [] :: kafkerl:options(), + dump_location = "" :: string(), + max_buffer_size = 0 :: integer(), + save_bad_messages = false :: boolean(), + metadata_handler = void :: atom()}). -type state() :: #state{}. -export_type([address/0]). @@ -57,23 +56,25 @@ %%============================================================================== -spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). start_link(Name, Config) -> - gen_server:start_link({local, Name}, ?MODULE, [Config], []). + gen_server:start_link({local, Name}, ?MODULE, [Config, Name], []). --spec send(kafkerl:server_ref(), kafkerl:basic_message(), kafkerl:options()) -> +-spec send(kafkerl:basic_message()) -> ok | kafkerl:error(). -send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> +send({Topic, Partition, _Payload} = Message) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), case ets_buffer:write(Buffer, Message) of NewSize when is_integer(NewSize) -> - case lists:keyfind(buffer_size, 1, Options) of - {buffer_size, MaxSize} when NewSize > MaxSize -> - gen_server:call(ServerRef, {dump_buffer_to_disk, Buffer, Options}); - _ -> - ok - end; + ok; Error -> _ = lager:debug("unable to write on ~p, reason: ~p", [Buffer, Error]), - gen_server:call(ServerRef, {send, Message}) + case ets_buffer:write(kafkerl_utils:default_buffer_name(), Message) of + NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> + ok; + _ -> + _ = lager:critical("unable to write to default buffer, reason: ~p", + [Error]), + ok + end end. -spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), @@ -134,17 +135,6 @@ produce_succeeded(ServerRef, Messages) -> %%============================================================================== -spec handle_call(any(), any(), state()) -> {reply, ok, state()} | {reply, {error, any()}, state()}. -handle_call({dump_buffer_to_disk, Buffer, Options}, _From, State) -> - {DumpNameStr, _} = DumpName = get_ets_dump_name(State#state.last_dump_name), - AllMessages = ets_buffer:read_all(Buffer), - FilePath = proplists:get_value(dump_location, Options, "") ++ DumpNameStr, - _ = case file:write_file(FilePath, term_to_binary(AllMessages)) of - ok -> lager:debug("Dumped unsent messages at ~p", [FilePath]); - Error -> lager:critical("Unable to save messages, reason: ~p", [Error]) - end, - {reply, ok, State#state{last_dump_name = DumpName}}; -handle_call({send, Message}, _From, State) -> - handle_send(Message, State); handle_call({fetch, Topic, Partition, Options}, _From, State) -> {reply, handle_fetch(Topic, Partition, Options, State), State}; handle_call({stop_fetch, Topic, Partition}, _From, State) -> @@ -170,6 +160,10 @@ handle_call({unsubscribe, Callback}, _From, State) -> NewCallbacks = lists:keydelete(Callback, 2, State#state.callbacks), {reply, ok, State#state{callbacks = NewCallbacks}}. +-spec handle_info(any(), state()) -> {noreply, state()} | + {stop, {error, any()}, state()}. +handle_info(dump_buffer_tick, State) -> + {noreply, handle_dump_buffer_to_disk(State)}; handle_info(metadata_timeout, State) -> {stop, {error, unable_to_retrieve_metadata}, State}; handle_info({metadata_updated, []}, State) -> @@ -187,10 +181,8 @@ handle_info({metadata_updated, Mapping}, State) -> NewTopics = lists:sort([T || {T, _P} <- PartitionData]), NewKnownTopics = lists:umerge(NewTopics, State#state.known_topics), _ = lager:debug("Known topics: ~p", [NewKnownTopics]), - % Reverse the pending messages and try to send them again - RPending = lists:reverse(State#state.pending), - ok = lists:foreach(fun(P) -> send(self(), P, []) end, RPending), - {noreply, State#state{broker_mapping = NewBrokerMapping, pending = [], + % TODO: Maybe retry from the dumps + {noreply, State#state{broker_mapping = NewBrokerMapping, callbacks = NewCallbacks, known_topics = NewKnownTopics}}; handle_info({'DOWN', Ref, process, _, normal}, State) -> @@ -219,81 +211,50 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %%============================================================================== %% Handlers %%============================================================================== -init([Config]) -> +init([Config, Name]) -> + % The schema indicates what is expected of the configuration, it validates and + % normalizes the configuration Schema = [{brokers, [{string, {integer, {1, 65535}}}], required}, - {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, {client_id, binary, {default, <<"kafkerl_client">>}}, {topics, [binary], required}, - {metadata_tcp_timeout, positive_integer, {default, 1500}}, {assume_autocreate_topics, boolean, {default, false}}, - {metadata_request_cooldown, positive_integer, {default, 333}}, {consumer_min_bytes, positive_integer, {default, 1}}, - {consumer_max_wait, positive_integer, {default, 1500}}], + {consumer_max_wait, positive_integer, {default, 1500}}, + {dump_location, string, {default, ""}}, + {max_buffer_size, positive_integer, {default, 500}}, + {save_messages_for_bad_topics, boolean, {default, true}}, + {flush_to_disk_every, positive_integer, {default, 10000}}], case normalizerl:normalize_proplist(Schema, Config) of - {ok, [Brokers, MaxMetadataRetries, ClientId, Topics, RetryInterval, - AutocreateTopics, MetadataRequestCooldown, MinBytes, MaxWait]} -> + {ok, [Brokers, ClientId, Topics, AutocreateTopics, MinBytes, MaxWait, + DumpLocation, MaxBufferSize, SaveBadMessages, FlushToDiskInterval]} -> + % Start the metadata request handler + MetadataHandlerName = metadata_handler_name(Name), + {ok, _} = kafkerl_metadata_handler:start(MetadataHandlerName, Config), + % Build the default fetch options DefaultFetchOptions = [{min_bytes, MinBytes}, {max_wait, MaxWait}], State = #state{config = Config, known_topics = Topics, brokers = Brokers, client_id = ClientId, - retry_interval = RetryInterval, + dump_location = DumpLocation, + max_buffer_size = MaxBufferSize, + save_bad_messages = SaveBadMessages, autocreate_topics = AutocreateTopics, - max_metadata_retries = MaxMetadataRetries, - metadata_request_cd = MetadataRequestCooldown, - default_fetch_options = DefaultFetchOptions}, + default_fetch_options = DefaultFetchOptions, + metadata_handler = MetadataHandlerName}, + % Create a buffer to hold unsent messages + _ = ets_buffer:create(kafkerl_utils:default_buffer_name(), fifo), + % Start the interval that manages the buffers holding unsent messages + {ok, _TRef} = timer:send_interval(FlushToDiskInterval, dump_buffer_tick), {_Pid, _Ref} = make_metadata_request(State), {ok, State}; {errors, Errors} -> - lists:foreach(fun(E) -> - _ = lager:critical("Connector config error ~p", [E]) - end, Errors), + ok = lists:foreach(fun(E) -> + _ = lager:critical("Connector config error ~p", [E]) + end, Errors), {stop, bad_config} end. -handle_send(Message, State = #state{autocreate_topics = false}) -> - lager:critical("a.1 ~p", [Message]), - % The topic didn't exist, ignore - {Topic, _Partition, Payload} = Message, - _ = lager:error("Dropped ~p sent to non existing topic ~p", [Payload, Topic]), - {reply, {error, non_existing_topic}, State}; -handle_send(Message, State = #state{broker_mapping = void, - pending = Pending}) -> - lager:critical("b.1 ~p", [Message]), - % We should consider saving this to a new buffer instead of using the state. - {reply, ok, State#state{pending = [Message | Pending]}}; -handle_send(Message, State = #state{broker_mapping = Mapping, pending = Pending, - known_topics = KnownTopics}) -> - lager:critical("c.1 ~p", [Message]), - {Topic, Partition, Payload} = Message, - case lists:any(fun({K, _}) -> K =:= {Topic, Partition} end, Mapping) of - true -> - % The ets takes some time to be available after being created, so we check - % if the topic/partition pair is in the mapping and if it does, we know we - % just need to send it again. The order is not guaranteed in this case, so - % if that's a concern, don't rely on autocreate_topics (besides, don't use - % autocreate_topics on production since it opens another can of worms). - ok = send(self(), Message, []), - {reply, ok, State}; - false -> - % However, if the topic/partition pair does not exist, we need to check if - % the topic exists. If the topic exists, we drop the message because kafka - % can't add partitions on the fly. - case lists:any(fun({{T, _}, _}) -> T =:= Topic end, Mapping) of - true -> - _ = lager:error("Dropped ~p sent to topic ~p, partition ~p", - [Payload, Topic, Partition]), - {reply, {error, bad_partition}, State}; - false -> - NewKnownTopics = lists:umerge([Topic], KnownTopics), - NewState = State#state{pending = [Message | Pending]}, - lager:critical("X"), - R={reply, ok, handle_request_metadata(NewState, NewKnownTopics)}, - lager:critical("X2"), - R - end - end. - handle_fetch(_Topic, _Partition, _Options, #state{broker_mapping = void}) -> {error, not_connected}; handle_fetch(Topic, Partition, Options, State) -> @@ -345,9 +306,76 @@ handle_request_metadata(State, NewTopics, _) -> State#state{broker_mapping = void, known_topics = NewKnownTopics, last_metadata_request = LastMetadataUpdate}. +handle_dump_buffer_to_disk(State = #state{dump_location = DumpLocation, + last_dump_name = LastDumpName}) -> + % Get the buffer name and all the messages from it + Buffer = kafkerl_utils:default_buffer_name(), + MessagesInBuffer = ets_buffer:read_all(Buffer), + % Split them between the ones that should be retried and those that don't + {ToDump, ToRetry} = split_message_dump(MessagesInBuffer, State), + % Retry the messages on an async function (to avoid locking this gen_server) + ok = retry_messages(ToRetry), + % And dump the messages that need to be dumped into a file + case ToDump of + [_ | _] = Messages -> + % Get the name of the file we want to write to + {DumpNameStr, _} = NewDumpName = get_ets_dump_name(LastDumpName), + % Build the location + WorkingDirectory = case file:get_cwd() of + {ok, Path} -> Path; + {error, _} -> "" + end, + FilePath = filename:join([WorkingDirectory, DumpLocation, DumpNameStr]), + % Write to disk + _ = case file:write_file(FilePath, term_to_binary(Messages)) of + ok -> + lager:info("Dumped unsent messages at ~p", [FilePath]); + Error -> + lager:critical("Unable to save messages, reason: ~p", [Error]) + end, + State#state{last_dump_name = NewDumpName}; + _ -> + State + end. + %%============================================================================== %% Utils %%============================================================================== +retry_messages([]) -> + ok; +retry_messages(Messages) -> + _Pid = spawn(fun() -> [send(M) || M <- Messages] end), + ok. + +split_message_dump(Messages, #state{known_topics = KnownTopics, + max_buffer_size = MaxBufferSize, + save_bad_messages = SaveBadMessages}) + when is_list(Messages) -> + + % Split messages between for topics kafkerl knows exist and those that do not. + {Known, Unknown} = lists:partition(fun({Topic, _Partition, _Payload}) -> + lists:member(Topic, KnownTopics) + end, Messages), + % The messages to be dumped are those from unkown topics (if the settings call + % for it) and those from known topics if the buffer size is too large. + % The messages to be retried are those from the known topics, as long as their + % number does not exceed the MaxBufferSize. + case {SaveBadMessages, length(Known) >= MaxBufferSize} of + {true, true} -> + {Unknown ++ Known, []}; + {false, true} -> + {Known, []}; + {true, false} -> + {Unknown, Known}; + {false, false} -> + {[], Known} + end; +% If the messages are not a list, then it's an ets error, report it and move on. +% And yes, those messages are gone forever +split_message_dump(Error, _State) -> + lager:error("Unable to get messages from buffer, reason: ~p", [Error]), + {[], []}. + get_ets_dump_name({OldName, Counter}) -> {{Year, Month, Day}, {Hour, Minute, Second}} = calendar:local_time(), Ts = io_lib:format("~4.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B_", @@ -355,9 +383,9 @@ get_ets_dump_name({OldName, Counter}) -> PartialNewName = "kafkerl_messages_" ++ lists:flatten(Ts), case lists:prefix(PartialNewName, OldName) of true -> - {PartialNewName ++ integer_to_list(Counter + 1) ++ ".dump", Counter + 1}; + {PartialNewName ++ integer_to_list(Counter) ++ ".dump", Counter + 1}; _ -> - {PartialNewName ++ "0.dump", 0} + {PartialNewName ++ "0.dump", 1} end. get_metadata_tcp_options() -> @@ -430,6 +458,9 @@ send_event(Event, Callbacks) -> send_event(Event, Callback) =:= ok end, Callbacks). +metadata_handler_name(ServerName) -> + list_to_binary([atom_to_list(ServerName), "_metadata_handler"]). + %%============================================================================== %% Request building %%============================================================================== diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl new file mode 100644 index 0000000..bfb9c7f --- /dev/null +++ b/src/kafkerl_metadata_handler.erl @@ -0,0 +1,58 @@ +-module(kafkerl_metadata_handler). +-author('hernanrivasacosta@gmail.com'). + +-behaviour(gen_fsm). + +%% API +-export([request_metadata/2]). +-export([idle/2, requesting/2, on_cooldown/2]). +% gen_fsm +-export([start_link/1, init/1]). + +-record(state, {max_metadata_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + metadata_request_cd = 0 :: integer()}). + +%%============================================================================== +%% API +%%============================================================================== +-spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). +start_link(Name, Config) -> + gen_fsm:start_link({local, Name}, ?MODULE, [Config], []). + +-spec request_metadata(atom(), [topic()]) -> ok. +request_metadata(ServerRef, Topics) -> + ok. + +%%============================================================================== +%% States +%%============================================================================== +idle(_, State) -> + {next_state, open, {[], Code}, 30000};. + +requesting(_, State) -> + ok. + +on_cooldown(_, State) -> + ok. + +%%============================================================================== +%% Handlers +%%============================================================================== +init([Config]) -> + Schema = [{metadata_tcp_timeout, positive_integer, {default, 1500}}, + {metadata_request_cooldown, positive_integer, {default, 333}}, + {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}], + case normalizerl:normalize_proplist(Schema, Config) of + {ok, [RetryInterval, MetadataRequestCD, MaxMetadataRetries]} -> + State = #state{config = Config, + retry_interval = RetryInterval, + metadata_request_cd = MetadataRequestCD, + max_metadata_retries = MaxMetadataRetries}, + {ok, State}; + {errors, Errors} -> + ok = lists:foreach(fun(E) -> + _ = lager:critical("Metadata config error ~p", [E]) + end, Errors), + {stop, bad_config} + end. \ No newline at end of file diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 8eac465..79a718b 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -4,7 +4,7 @@ -export([send_event/2, send_error/2]). -export([get_tcp_options/1]). -export([merge_messages/1, split_messages/1, valid_message/1]). --export([buffer_name/2]). +-export([buffer_name/2, default_buffer_name/0]). -export([gather_consume_responses/0, gather_consume_responses/1]). -export([proplists_set/2]). @@ -74,6 +74,10 @@ buffer_name(Topic, Partition) -> Bin = <>, binary_to_atom(Bin, utf8). +-spec default_buffer_name() -> atom(). +default_buffer_name() -> + default_message_buffer. + -type proplist_value() :: {atom(), any()}. -type proplist() :: [proplist_value]. -spec proplists_set(proplist(), proplist_value() | [proplist_value()]) -> From 92a3b12b8d382529a7a70c756178de21de61983d Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 15 Apr 2016 11:52:38 -0300 Subject: [PATCH 48/72] improved the metadata handling --- rel/kafkerl.app.config | 7 +- src/kafkerl.erl | 56 ++--- src/kafkerl_broker_connection.erl | 63 ++--- src/kafkerl_connector.erl | 380 ++++++------------------------ src/kafkerl_metadata_handler.erl | 314 ++++++++++++++++++++++-- src/kafkerl_sup.erl | 5 +- src/kafkerl_utils.erl | 4 +- 7 files changed, 422 insertions(+), 407 deletions(-) diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 241a2b1..0c88a29 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -1,19 +1,18 @@ [{lager, [{colored, true}, {handlers, [{lager_console_backend, [debug,true]}]}]}, - {kafkerl, [%{gen_server_name, kafkerl_client}, - {disabled, false}, + {kafkerl, [{disabled, false}, {conn_config, [{brokers, [{"localhost", 9090}, {"localhost", 9091}, {"localhost", 9092}]}, {client_id, kafkerl_client}, % Sent to kafka {max_broker_retries, 2}, {broker_tcp_timeout, 1000}, - {max_metadata_retries, -1}, + {max_metadata_retries, 3}, {assume_autocreate_topics, true}, {metadata_tcp_timeout, 5000}, {max_queue_size, 20}, % In items, per topic/partition {max_time_queued, 5}, % In seconds - {metadata_request_cooldown, 1500}, % In milliseconds + {metadata_request_cooldown, 3500}, % In milliseconds {consumer_min_bytes, 1}, {consumer_max_wait, 1500}, {dump_location, "dumps/"}, diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 27238e2..3b423c6 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -3,9 +3,9 @@ -export([start/0, start/2]). -export([produce/3, - consume/2, consume/3, consume/4, stop_consuming/2, stop_consuming/3, - request_metadata/0, request_metadata/1, request_metadata/2, - partitions/0, partitions/1]). + consume/2, consume/3, stop_consuming/2, + request_metadata/0, request_metadata/1, + partitions/0]). -export([version/0]). %% Types @@ -31,8 +31,8 @@ -type payload() :: binary() | [binary()]. -type basic_message() :: {topic(), partition(), payload()}. --export_type([server_ref/0, error/0, options/0, callback/0, - topic/0, partition/0, payload/0, basic_message/0]). +-export_type([server_ref/0, error/0, options/0, topic/0, partition/0, payload/0, + callback/0, basic_message/0]). %%============================================================================== %% API @@ -55,63 +55,43 @@ produce(Topic, Partition, Message) -> %% Consume API -spec consume(topic(), partition()) -> ok | error(). consume(Topic, Partition) -> - consume(?MODULE, Topic, Partition, []). + consume(Topic, Partition, []). --spec consume(topic(), partition(), options()) -> ok | error(); - (server_ref(), topic(), partition()) -> ok | error(). -consume(Topic, Partition, Options) when is_list(Options) -> - consume(?MODULE, Topic, Partition, Options); -consume(ServerRef, Topic, Partition) -> - consume(ServerRef, Topic, Partition, []). - --spec consume(server_ref(), topic(), partition(), options()) -> - ok | {[payload()], offset()} | error(). -consume(ServerRef, Topic, Partition, Options) -> +-spec consume(topic(), partition(), options()) -> ok | + {[payload()], offset()} | + error(). +consume(Topic, Partition, Options) -> case {proplists:get_value(consumer, Options, undefined), proplists:get_value(fetch_interval, Options, false)} of {undefined, false} -> NewOptions = [{consumer, self()} | Options], - kafkerl_connector:fetch(ServerRef, Topic, Partition, NewOptions), + kafkerl_connector:fetch(Topic, Partition, NewOptions), kafkerl_utils:gather_consume_responses(); {undefined, _} -> {error, fetch_interval_specified_with_no_consumer}; _ -> - kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) + kafkerl_connector:fetch(Topic, Partition, Options) end. -spec stop_consuming(topic(), partition()) -> ok. stop_consuming(Topic, Partition) -> - stop_consuming(?MODULE, Topic, Partition). - --spec stop_consuming(server_ref(), topic(), partition()) -> ok. -stop_consuming(ServerRef, Topic, Partition) -> - kafkerl_connector:stop_fetch(ServerRef, Topic, Partition). + kafkerl_connector:stop_fetch(Topic, Partition). %% Metadata API -spec request_metadata() -> ok. request_metadata() -> - request_metadata(?MODULE). + request_metadata([]). --spec request_metadata(server_ref() | [topic()]) -> ok. +-spec request_metadata([topic()]) -> ok. request_metadata(Topics) when is_list(Topics) -> - request_metadata(?MODULE, Topics); -request_metadata(ServerRef) -> - kafkerl_connector:request_metadata(ServerRef). - --spec request_metadata(server_ref(), [topic()]) -> ok. -request_metadata(ServerRef, Topics) -> - kafkerl_connector:request_metadata(ServerRef, Topics). + kafkerl_connector:request_metadata(Topics). %% Partitions -spec partitions() -> [{topic(), [partition()]}] | error(). partitions() -> - partitions(?MODULE). - --spec partitions(server_ref()) -> [{topic(), [partition()]}] | error(). -partitions(ServerRef) -> - kafkerl_connector:get_partitions(ServerRef). + kafkerl_connector:get_partitions(). %% Utils -spec version() -> {integer(), integer(), integer()}. version() -> - {2, 0, 0}. \ No newline at end of file + {3, 0, 0}. \ No newline at end of file diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index f9bb48a..9e70e6e 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -8,7 +8,7 @@ % Only for internal use -export([connect/6]). % Supervisors --export([start_link/4]). +-export([start_link/3]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). @@ -31,7 +31,6 @@ client_id = undefined :: binary(), socket = undefined :: port(), address = undefined :: kafkerl_connector:address(), - connector = undefined :: pid(), tref = undefined :: any(), tcp_options = [] :: [any()], max_retries = 0 :: integer(), @@ -51,12 +50,12 @@ %%============================================================================== %% API %%============================================================================== --spec start_link(conn_idx(), pid(), kafkerl_connector:address(), any()) -> +-spec start_link(conn_idx(), kafkerl_connector:address(), any()) -> start_link_response(). -start_link(Id, Connector, Address, Config) -> +start_link(Id, Address, Config) -> NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), Name = list_to_atom(NameStr), - Params = [Id, Connector, Address, Config, Name], + Params = [Id, Address, Config, Name], case gen_server:start_link({local, Name}, ?MODULE, Params, []) of {ok, Pid} -> {ok, Name, Pid}; @@ -128,7 +127,7 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %%============================================================================== %% Handlers %%============================================================================== -init([Id, Connector, Address, Config, Name]) -> +init([Id, Address, Config, Name]) -> Schema = [{tcp_options, [any], {default, []}}, {retry_interval, positive_integer, {default, 1000}}, {max_retries, positive_integer, {default, 3}}, @@ -137,13 +136,17 @@ init([Id, Connector, Address, Config, Name]) -> case normalizerl:normalize_proplist(Schema, Config) of {ok, [TCPOpts, RetryInterval, MaxRetries, ClientId, MaxTimeQueued]} -> NewTCPOpts = kafkerl_utils:get_tcp_options(TCPOpts), - EtsName = list_to_atom(atom_to_list(Name) ++ "_ets"), - ets:new(EtsName, [named_table, public, {write_concurrency, true}, - {read_concurrency, true}]), - State = #state{conn_idx = Id, tcp_options = NewTCPOpts, address = Address, - max_retries = MaxRetries, retry_interval = RetryInterval, - connector = Connector, client_id = ClientId, name = Name, - max_time_queued = MaxTimeQueued, ets = EtsName}, + Ets = list_to_atom(atom_to_list(Name) ++ "_ets"), + _ = ets:new(Ets, ets_options()), + State = #state{ets = Ets, + name = Name, + conn_idx = Id, + address = Address, + client_id = ClientId, + max_retries = MaxRetries, + tcp_options = NewTCPOpts, + retry_interval = RetryInterval, + max_time_queued = MaxTimeQueued}, Params = [self(), Name, NewTCPOpts, Address, RetryInterval, MaxRetries], _Pid = spawn_link(?MODULE, connect, Params), {ok, _Tref} = queue_flush(MaxTimeQueued), @@ -159,9 +162,8 @@ handle_flush(State = #state{socket = undefined}) -> {noreply, State}; handle_flush(State = #state{buffers = []}) -> {noreply, State}; -handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, - client_id = ClientId, connector = Connector, - name = Name}) -> +handle_flush(State = #state{ets = EtsName, socket = Socket, buffers = Buffers, + name = Name, client_id = ClientId}) -> {ok, CorrelationId, NewState} = build_correlation_id(State), % TODO: Maybe buffer all this messages in case something goes wrong AllMessages = get_all_messages(Buffers), @@ -180,7 +182,7 @@ handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, [Name, Reason]), gen_tcp:close(Socket), ets:delete_all_objects(EtsName, CorrelationId), - ok = resend_messages(MergedMessages, Connector), + ok = resend_messages(MergedMessages), {noreply, handle_tcp_close(NewState)}; ok -> _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), @@ -231,6 +233,7 @@ handle_fetch(ServerRef, Topic, Partition, Options, server_ref = ServerRef, topic = Topic, partition = Partition, + %options = [scheduled | Options]}, options = Options}, NewScheduledFetches = case KeyTakeResult of false -> ScheduledFetches; @@ -342,8 +345,7 @@ handle_fetch_response(Bin, Fetch, {ok, State#state{current_fetch = void, fetches = NewFetches}} end. -handle_produce_response(Bin, State = #state{connector = Connector, name = Name, - ets = EtsName}) -> +handle_produce_response(Bin, State = #state{name = Name, ets = EtsName}) -> case kafkerl_protocol:parse_produce_response(Bin) of {ok, CorrelationId, Topics} -> case ets:lookup(EtsName, CorrelationId) of @@ -352,15 +354,15 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, {Errors, Successes} = split_errors_and_successes(Topics), % First, send the offsets and messages that were delivered _ = spawn(fun() -> - notify_success(Successes, Messages, Connector) + notify_success(Successes, Messages) end), % Then handle the errors case handle_errors(Errors, Messages, Name) of ignore -> {ok, State}; {request_metadata, MessagesToResend} -> - kafkerl_connector:request_metadata(Connector), - ok = resend_messages(MessagesToResend, Connector), + kafkerl_connector:request_metadata(), + ok = resend_messages(MessagesToResend), {ok, State} end; _ -> @@ -376,18 +378,18 @@ handle_produce_response(Bin, State = #state{connector = Connector, name = Name, %%============================================================================== %% Utils %%============================================================================== -resend_messages(Messages, Connector) -> - F = fun(M) -> kafkerl_connector:send(Connector, M, []) end, +resend_messages(Messages) -> + F = fun(M) -> kafkerl_connector:send(M) end, lists:foreach(F, Messages). -notify_success([], _Messages, _Pid) -> +notify_success([], _Messages) -> ok; -notify_success([{Topic, Partition, Offset} | T], Messages, Pid) -> +notify_success([{Topic, Partition, Offset} | T], Messages) -> MergedMessages = kafkerl_utils:merge_messages(Messages), Partitions = partitions_in_topic(Topic, MergedMessages), M = messages_in_partition(Partition, Partitions), - kafkerl_connector:produce_succeeded(Pid, {Topic, Partition, M, Offset}), - notify_success(T, Messages, Pid). + kafkerl_connector:produce_succeeded({Topic, Partition, M, Offset}), + notify_success(T, Messages). partitions_in_topic(Topic, Messages) -> lists:flatten([P || {T, P} <- Messages, T =:= Topic]). @@ -528,4 +530,7 @@ send_messages(Consumer, [Event | T]) -> Error -> Error end; send_messages(Consumer, Event) -> - kafkerl_utils:send_event(Consumer, Event). \ No newline at end of file + kafkerl_utils:send_event(Consumer, Event). + +ets_options() -> + [named_table, public, {write_concurrency, true}, {read_concurrency, true}]. \ No newline at end of file diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 0aeab59..e1c3e54 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -5,20 +5,19 @@ %% API % Metadata --export([request_metadata/1, request_metadata/2, request_metadata/3, - get_partitions/1]). +-export([request_metadata/0, request_metadata/1, get_partitions/0]). % Produce -export([send/1]). % Consume --export([fetch/4, stop_fetch/3]). +-export([fetch/3, stop_fetch/2]). % Common --export([subscribe/2, subscribe/3, unsubscribe/2]). -% Only for internal use --export([do_request_metadata/6, make_metadata_request/1]). -% Only for broker connections --export([produce_succeeded/2]). +-export([subscribe/1, subscribe/2, unsubscribe/1]). +% Only used by broker connections +-export([produce_succeeded/1]). +% Only used by the metadata handler +-export([topic_mapping_updated/1]). % Supervisors --export([start_link/2]). +-export([start_link/1]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). @@ -29,24 +28,20 @@ -type kafler_port() :: 1..65535. -type address() :: {kafler_host(), kafler_port()}. --type filters() :: all | [atom()]. +-type filters() :: all | [atom()]. -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. --record(state, {brokers = [] :: [address()], - broker_mapping = void :: [broker_mapping()] | void, - client_id = <<>> :: kafkerl_protocol:client_id(), - config = [] :: {atom(), any()}, +-record(state, {broker_mapping = void :: [broker_mapping()] | void, + config = [] :: [{atom(), any()}], autocreate_topics = false :: boolean(), callbacks = [] :: [{filters(), kafkerl:callback()}], - known_topics = [] :: [binary()], last_dump_name = {"", 0} :: {string(), integer()}, default_fetch_options = [] :: kafkerl:options(), dump_location = "" :: string(), max_buffer_size = 0 :: integer(), - save_bad_messages = false :: boolean(), - metadata_handler = void :: atom()}). + save_bad_messages = false :: boolean()}). -type state() :: #state{}. -export_type([address/0]). @@ -54,9 +49,9 @@ %%============================================================================== %% API %%============================================================================== --spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). -start_link(Name, Config) -> - gen_server:start_link({local, Name}, ?MODULE, [Config, Name], []). +-spec start_link(any()) -> {ok, pid()} | ignore | kafkerl:error(). +start_link(Config) -> + gen_server:start_link({local, kafkerl}, ?MODULE, [Config], []). -spec send(kafkerl:basic_message()) -> ok | kafkerl:error(). @@ -77,58 +72,52 @@ send({Topic, Partition, _Payload} = Message) -> end end. --spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), - kafkerl:options()) -> ok | kafkerl:error(). -fetch(ServerRef, Topic, Partition, Options) -> - gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). +-spec fetch(kafkerl:topic(), kafkerl:partition(), kafkerl:options()) -> + ok | kafkerl:error(). +fetch(Topic, Partition, Options) -> + gen_server:call(kafkerl, {fetch, Topic, Partition, Options}). --spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> - ok. -stop_fetch(ServerRef, Topic, Partition) -> - gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). +-spec stop_fetch(kafkerl:topic(), kafkerl:partition()) -> ok. +stop_fetch(Topic, Partition) -> + gen_server:call(kafkerl, {stop_fetch, Topic, Partition}). --spec get_partitions(kafkerl:server_ref()) -> - [{kafkerl:topic(), [kafkerl:partition()]}] | kafkerl:error(). -get_partitions(ServerRef) -> - case gen_server:call(ServerRef, {get_partitions}) of +-spec get_partitions() -> [{kafkerl:topic(), [kafkerl:partition()]}] | + kafkerl:error(). +get_partitions() -> + case gen_server:call(kafkerl, {get_partitions}) of {ok, Mapping} -> get_partitions_from_mapping(Mapping); Error -> Error end. --spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> - ok | kafkerl:error(). -subscribe(ServerRef, Callback) -> - subscribe(ServerRef, Callback, all). --spec subscribe(kafkerl:server_ref(), kafkerl:callback(), filters()) -> - ok | kafkerl:error(). -subscribe(ServerRef, Callback, Filter) -> - gen_server:call(ServerRef, {subscribe, {Filter, Callback}}). --spec unsubscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok. -unsubscribe(ServerRef, Callback) -> - gen_server:call(ServerRef, {unsubscribe, Callback}). +-spec subscribe(kafkerl:callback()) -> ok | kafkerl:error(). +subscribe(Callback) -> + subscribe(Callback, all). +-spec subscribe(kafkerl:callback(), filters()) -> ok | kafkerl:error(). +subscribe(Callback, Filter) -> + gen_server:call(kafkerl, {subscribe, {Filter, Callback}}). --spec request_metadata(kafkerl:server_ref()) -> ok. -request_metadata(ServerRef) -> - gen_server:call(ServerRef, {request_metadata}). +-spec unsubscribe(kafkerl:callback()) -> ok. +unsubscribe(Callback) -> + gen_server:call(kafkerl, {unsubscribe, Callback}). --spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()] | boolean()) -> - ok. -request_metadata(ServerRef, TopicsOrForced) -> - gen_server:call(ServerRef, {request_metadata, TopicsOrForced}). +-spec request_metadata() -> ok. +request_metadata() -> + gen_server:call(kafkerl, {request_metadata, []}). --spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()], boolean()) -> - ok. -request_metadata(ServerRef, Topics, Forced) -> - gen_server:call(ServerRef, {request_metadata, Topics, Forced}). +-spec request_metadata([kafkerl:topic()]) -> ok. +request_metadata(Topics) -> + gen_server:call(kafkerl, {request_metadata, Topics}). + +-spec produce_succeeded([{kafkerl:topic(), kafkerl:partition(), + [binary()], integer()}]) -> ok. +produce_succeeded(Produced) -> + gen_server:cast(kafkerl, {produce_succeeded, Produced}). --spec produce_succeeded(kafkerl:server_ref(), [{kafkerl:topic(), - kafkerl:partition(), - [binary()], - integer()}]) -> ok. -produce_succeeded(ServerRef, Messages) -> - gen_server:cast(ServerRef, {produce_succeeded, Messages}). +-spec topic_mapping_updated(any()) -> ok. +topic_mapping_updated(TopicMapping) -> + gen_server:cast(kafkerl, {topic_mapping_updated, TopicMapping}). %%============================================================================== %% gen_server callbacks @@ -139,14 +128,8 @@ handle_call({fetch, Topic, Partition, Options}, _From, State) -> {reply, handle_fetch(Topic, Partition, Options, State), State}; handle_call({stop_fetch, Topic, Partition}, _From, State) -> {reply, handle_stop_fetch(Topic, Partition, State), State}; -handle_call({request_metadata}, _From, State) -> - {reply, ok, handle_request_metadata(State, [])}; -handle_call({request_metadata, Forced}, _From, State) when is_boolean(Forced) -> - {reply, ok, handle_request_metadata(State, [], true)}; handle_call({request_metadata, Topics}, _From, State) -> - {reply, ok, handle_request_metadata(State, Topics)}; -handle_call({request_metadata, Topics, Forced}, _From, State) -> - {reply, ok, handle_request_metadata(State, Topics, Forced)}; + {reply, handle_request_metadata(Topics), State}; handle_call({get_partitions}, _From, State) -> {reply, handle_get_partitions(State), State}; handle_call({subscribe, Callback}, _From, State) -> @@ -168,39 +151,25 @@ handle_info(metadata_timeout, State) -> {stop, {error, unable_to_retrieve_metadata}, State}; handle_info({metadata_updated, []}, State) -> % If the metadata arrived empty request it again - {noreply, handle_request_metadata(State#state{broker_mapping = []}, [])}; -handle_info({metadata_updated, Mapping}, State) -> - % Create the topic mapping (this also starts the broker connections) - NewBrokerMapping = get_broker_mapping(Mapping, State), - _ = lager:debug("Refreshed topic mapping: ~p", [NewBrokerMapping]), - % Get the partition data to send to the subscribers and send it - PartitionData = get_partitions_from_mapping(NewBrokerMapping), - Callbacks = State#state.callbacks, - NewCallbacks = send_event({partition_update, PartitionData}, Callbacks), - % Add to the list of known topics - NewTopics = lists:sort([T || {T, _P} <- PartitionData]), - NewKnownTopics = lists:umerge(NewTopics, State#state.known_topics), - _ = lager:debug("Known topics: ~p", [NewKnownTopics]), - % TODO: Maybe retry from the dumps - {noreply, State#state{broker_mapping = NewBrokerMapping, - callbacks = NewCallbacks, - known_topics = NewKnownTopics}}; -handle_info({'DOWN', Ref, process, _, normal}, State) -> - true = demonitor(Ref), + ok = handle_request_metadata([]), {noreply, State}; -handle_info({'DOWN', Ref, process, _, Reason}, State) -> - _ = lager:error("metadata request failed, reason: ~p", [Reason]), - true = demonitor(Ref), - {noreply, handle_request_metadata(State, [], true)}; +%handle_info({metadata_updated, Mapping}, State) -> + handle_info(Msg, State) -> _ = lager:notice("Unexpected info message received: ~p on ~p", [Msg, State]), {noreply, State}. -spec handle_cast(any(), state()) -> {noreply, state()}. -handle_cast({produce_succeeded, Messages}, State) -> +handle_cast({produce_succeeded, Produced}, State) -> + Callbacks = State#state.callbacks, + NewCallbacks = send_event({produced, Produced}, Callbacks), + {noreply, State#state{callbacks = NewCallbacks}}; +handle_cast({topic_mapping_updated, NewMapping}, State) -> + % Get the partition data to send to the subscribers and send it + PartitionData = get_partitions_from_mapping(NewMapping), Callbacks = State#state.callbacks, - NewCallbacks = send_event({produced, Messages}, Callbacks), - {noreply, State#state{callbacks = NewCallbacks}}. + NewCallbacks = send_event({partition_update, PartitionData}, Callbacks), + {noreply, State#state{callbacks = NewCallbacks, broker_mapping = NewMapping}}. % Boilerplate -spec terminate(atom(), state()) -> ok. @@ -211,13 +180,10 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %%============================================================================== %% Handlers %%============================================================================== -init([Config, Name]) -> +init([Config]) -> % The schema indicates what is expected of the configuration, it validates and % normalizes the configuration - Schema = [{brokers, [{string, {integer, {1, 65535}}}], required}, - {client_id, binary, {default, <<"kafkerl_client">>}}, - {topics, [binary], required}, - {assume_autocreate_topics, boolean, {default, false}}, + Schema = [{assume_autocreate_topics, boolean, {default, false}}, {consumer_min_bytes, positive_integer, {default, 1}}, {consumer_max_wait, positive_integer, {default, 1500}}, {dump_location, string, {default, ""}}, @@ -225,28 +191,23 @@ init([Config, Name]) -> {save_messages_for_bad_topics, boolean, {default, true}}, {flush_to_disk_every, positive_integer, {default, 10000}}], case normalizerl:normalize_proplist(Schema, Config) of - {ok, [Brokers, ClientId, Topics, AutocreateTopics, MinBytes, MaxWait, - DumpLocation, MaxBufferSize, SaveBadMessages, FlushToDiskInterval]} -> + {ok, [AutocreateTopics, MinBytes, MaxWait, DumpLocation, + MaxBufferSize, SaveBadMessages, FlushToDiskInterval]} -> % Start the metadata request handler - MetadataHandlerName = metadata_handler_name(Name), - {ok, _} = kafkerl_metadata_handler:start(MetadataHandlerName, Config), + {ok, _} = kafkerl_metadata_handler:start_link(Config), % Build the default fetch options DefaultFetchOptions = [{min_bytes, MinBytes}, {max_wait, MaxWait}], State = #state{config = Config, - known_topics = Topics, - brokers = Brokers, - client_id = ClientId, dump_location = DumpLocation, max_buffer_size = MaxBufferSize, save_bad_messages = SaveBadMessages, autocreate_topics = AutocreateTopics, - default_fetch_options = DefaultFetchOptions, - metadata_handler = MetadataHandlerName}, + default_fetch_options = DefaultFetchOptions}, % Create a buffer to hold unsent messages _ = ets_buffer:create(kafkerl_utils:default_buffer_name(), fifo), % Start the interval that manages the buffers holding unsent messages {ok, _TRef} = timer:send_interval(FlushToDiskInterval, dump_buffer_tick), - {_Pid, _Ref} = make_metadata_request(State), + ok = kafkerl_metadata_handler:request_metadata([]), {ok, State}; {errors, Errors} -> ok = lists:foreach(fun(E) -> @@ -283,28 +244,8 @@ handle_get_partitions(#state{broker_mapping = void}) -> handle_get_partitions(#state{broker_mapping = Mapping}) -> {ok, Mapping}. -handle_request_metadata(State, Topics) -> - handle_request_metadata(State, Topics, false). - -% Ignore it if the topic mapping is void, we are already requesting the metadata -handle_request_metadata(State = #state{broker_mapping = void}, _, false) -> - State; -handle_request_metadata(State, NewTopics, _) -> - SortedNewTopics = lists:sort(NewTopics), - NewKnownTopics = lists:umerge(State#state.known_topics, SortedNewTopics), - Now = get_timestamp(), - LastRequest = State#state.last_metadata_request, - Cooldown = State#state.metadata_request_cd, - LastMetadataUpdate = case Cooldown - (Now - LastRequest) of - Negative when Negative =< 0 -> - _ = make_metadata_request(State), - Now; - Time -> - _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]), - LastRequest - end, - State#state{broker_mapping = void, known_topics = NewKnownTopics, - last_metadata_request = LastMetadataUpdate}. +handle_request_metadata(Topics) -> + kafkerl_metadata_handler:request_metadata(Topics). handle_dump_buffer_to_disk(State = #state{dump_location = DumpLocation, last_dump_name = LastDumpName}) -> @@ -347,11 +288,11 @@ retry_messages(Messages) -> _Pid = spawn(fun() -> [send(M) || M <- Messages] end), ok. -split_message_dump(Messages, #state{known_topics = KnownTopics, - max_buffer_size = MaxBufferSize, +split_message_dump(Messages, #state{max_buffer_size = MaxBufferSize, save_bad_messages = SaveBadMessages}) when is_list(Messages) -> + KnownTopics = kafkerl_metadata_handler:get_known_topics(), % Split messages between for topics kafkerl knows exist and those that do not. {Known, Unknown} = lists:partition(fun({Topic, _Partition, _Payload}) -> lists:member(Topic, KnownTopics) @@ -388,64 +329,6 @@ get_ets_dump_name({OldName, Counter}) -> {PartialNewName ++ "0.dump", 1} end. -get_metadata_tcp_options() -> - kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). - -do_request_metadata(Pid, _Brokers, _TCPOpts, 0, _RetryInterval, _Request) -> - Pid ! metadata_timeout; -do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> - case do_request_metadata(Brokers, TCPOpts, Request) of - {ok, TopicMapping} -> - Pid ! {metadata_updated, TopicMapping}; - _Error -> - timer:sleep(RetryInterval), - NewRetries = case Retries of - -1 -> -1; - N -> N - 1 - end, - do_request_metadata(Pid, Brokers, TCPOpts, NewRetries, RetryInterval, - Request) - end. - -do_request_metadata([], _TCPOpts, _Request) -> - {error, all_down}; -do_request_metadata([{Host, Port} = _Broker | T], TCPOpts, Request) -> - _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), - % Connect to the Broker - case gen_tcp:connect(Host, Port, TCPOpts) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Failed, try with the next one in the list - do_request_metadata(T, TCPOpts, Request); - {ok, Socket} -> - % On success, send the metadata request - case gen_tcp:send(Socket, Request) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % Unable to send request, try the next broker - do_request_metadata(T, TCPOpts, Request); - ok -> - case gen_tcp:recv(Socket, 0, 6000) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - gen_tcp:close(Socket), - % Nothing received (probably a timeout), try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, Data} -> - gen_tcp:close(Socket), - case kafkerl_protocol:parse_metadata_response(Data) of - {error, Reason} -> - warn_metadata_request(Host, Port, Reason), - % The parsing failed, try the next broker - do_request_metadata(T, TCPOpts, Request); - {ok, _CorrelationId, Metadata} -> - % We received a metadata response, make sure it has brokers - {ok, get_topic_mapping(Metadata)} - end - end - end - end. - send_event(Event, {all, Callback}) -> kafkerl_utils:send_event(Callback, Event); send_event({EventName, _Data} = Event, {Events, Callback}) -> @@ -458,100 +341,6 @@ send_event(Event, Callbacks) -> send_event(Event, Callback) =:= ok end, Callbacks). -metadata_handler_name(ServerName) -> - list_to_binary([atom_to_list(ServerName), "_metadata_handler"]). - -%%============================================================================== -%% Request building -%%============================================================================== -metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> - kafkerl_protocol:build_metadata_request([], 0, ClientId); -metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, - NewTopics) -> - AllTopics = lists:umerge(KnownTopics, NewTopics), - kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). - -%%============================================================================== -%% Topic/broker mapping -%%============================================================================== -get_topic_mapping({BrokerMetadata, TopicMetadata}) -> - % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] - Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), - % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] - Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), - % Converts the BrokerIds from the previous array into socket addresses - lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> - case lists:keyfind(BrokerId, 1, BrokerMetadata) of - {BrokerId, HostData} -> - {true, {{Topic, Partition, BrokerId}, HostData}}; - _Any -> - false - end - end, Partitions). - -expand_topic({?NO_ERROR, Topic, Partitions}) -> - {true, {Topic, Partitions}}; -expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> - % Replica not available can be ignored, still, show a warning - _ = lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, Partitions}}; -expand_topic({Error, Topic, _Partitions}) -> - _ = lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, []}}. - -expand_partitions(Metadata) -> - expand_partitions(Metadata, []). - -expand_partitions({_Topic, []}, Acc) -> - {true, Acc}; -expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, - _, _} | T]}, Acc) -> - _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); -expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - expand_partitions({Topic, T}, Acc). - -get_broker_mapping(TopicMapping, State) -> - get_broker_mapping(TopicMapping, State, 0, []). - -get_broker_mapping([], _State, _N, Acc) -> - [{Key, Address} || {_ConnId, Key, Address} <- Acc]; -get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], - State = #state{config = Config}, N, Acc) -> - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), - {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of - false -> - {start_broker_connection(N, Address, Config), N + 1}; - {ConnId, _, BrokerConnection} -> - {BrokerConnection, N} - end, - - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), - kafkerl_broker_connection:add_buffer(Conn, Buffer), - - NewMapping = {ConnId, {Topic, Partition}, Conn}, - get_broker_mapping(T, State, NewN, [NewMapping | Acc]). - -start_broker_connection(N, Address, Config) -> - case kafkerl_broker_connection:start_link(N, self(), Address, Config) of - {ok, Name, _Pid} -> - Name; - {error, {already_started, Pid}} -> - kafkerl_broker_connection:clear_buffers(Pid), - Pid - end. - % This is used to return the available partitions for each topic get_partitions_from_mapping(Mapping) -> F = fun({{Topic, Partition}, _}, Acc) -> @@ -568,25 +357,4 @@ send_mapping_to(_NewCallback, #state{broker_mapping = void}) -> ok; send_mapping_to(NewCallback, #state{broker_mapping = Mapping}) -> Partitions = get_partitions_from_mapping(Mapping), - send_event({partition_update, Partitions}, NewCallback). - -make_metadata_request(State = #state{brokers = Brokers, - known_topics = Topics, - max_metadata_retries = MaxMetadataRetries, - retry_interval = RetryInterval}) -> - Request = metadata_request(State, Topics), - % Start requesting metadata - Params = [self(), Brokers, get_metadata_tcp_options(), MaxMetadataRetries, - RetryInterval, Request], - spawn_monitor(?MODULE, do_request_metadata, Params). - -get_timestamp() -> - {A, B, C} = erlang:timestamp(), - (A * 1000000 + B) * 1000 + C div 1000. - -%%============================================================================== -%% Error handling -%%============================================================================== -warn_metadata_request(Host, Port, Reason) -> - _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). \ No newline at end of file + send_event({partition_update, Partitions}, NewCallback). \ No newline at end of file diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index bfb9c7f..ef91183 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -4,55 +4,319 @@ -behaviour(gen_fsm). %% API --export([request_metadata/2]). +-export([request_metadata/1, get_known_topics/0]). +%% States -export([idle/2, requesting/2, on_cooldown/2]). -% gen_fsm --export([start_link/1, init/1]). +%% Internal +-export([make_request/3]). +%% gen_fsm +-export([start_link/1, init/1, handle_info/3, terminate/3, code_change/4, + handle_event/3, handle_sync_event/4]). + +-include("kafkerl.hrl"). --record(state, {max_metadata_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), - metadata_request_cd = 0 :: integer()}). +-record(state, {config = [] :: [{atom(), any()}], + client_id = <<>> :: kafkerl_protocol:client_id(), + brokers = [] :: [kafkerl_connector:address()], + max_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + cooldown = 0 :: integer(), + known_topics = [] :: [kafkerl:topic()], + next_topics = [] :: [kafkerl:topic()]}). +-type state() :: #state{}. %%============================================================================== %% API %%============================================================================== --spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). -start_link(Name, Config) -> - gen_fsm:start_link({local, Name}, ?MODULE, [Config], []). +-spec start_link(any()) -> {ok, pid()} | ignore | kafkerl:error(). +start_link(Config) -> + gen_fsm:start_link({local, ?MODULE}, ?MODULE, [Config], []). --spec request_metadata(atom(), [topic()]) -> ok. -request_metadata(ServerRef, Topics) -> - ok. +-spec request_metadata([kafkerl:topic()]) -> ok. +request_metadata(Topics) -> + gen_fsm:send_event(?MODULE, {request, Topics}). + +-spec get_known_topics() -> ok. +get_known_topics() -> + gen_fsm:sync_send_all_state_event(?MODULE, get_known_topics). %%============================================================================== %% States %%============================================================================== -idle(_, State) -> - {next_state, open, {[], Code}, 30000};. +-spec idle(any(), state()) -> {next_state, atom(), state()}. +idle({request, Topics}, State = #state{known_topics = KnownTopics}) -> + % Add the requested topics to the state + SortedTopics = lists:usort(KnownTopics), + NewKnownTopics = lists:umerge(Topics, SortedTopics), + NewState = State#state{known_topics = NewKnownTopics}, + % Make the request + ok = schedule_metadata_request(NewState), + % And move the the requesting state + {next_state, requesting, NewState}. -requesting(_, State) -> - ok. +-spec requesting(any(), state()) -> {next_state, atom(), state()}. +% Handle a new metadata request while there's one in progress +requesting({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> + SortedTopics = lists:usort(NewTopics), % This also removes repeated entries + % If the request is for known topics, then we can safely ignore it, otherwise, + % queue a metadata request + NewState = case SortedTopics -- KnownTopics of + [] -> State; + _ -> request_metadata([]), + State#state{known_topics = lists:umerge(KnownTopics, + SortedTopics)} + end, + {next_state, requesting, NewState}; +% Handle the updated metadata +requesting({metadata_updated, RawMapping}, State) -> + % Create the topic mapping (this also starts the broker connections) + NewMapping = get_broker_mapping(RawMapping, State), + _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping]), + ok = kafkerl_connector:topic_mapping_updated(NewMapping), + {next_state, idle, State}; +% If we have no more retries left, go on cooldown +requesting({metadata_retry, 0}, State = #state{cooldown = Cooldown}) -> + Params = [?MODULE, on_timer], + {ok, _} = timer:apply_after(Cooldown, gen_fsm, send_event, Params), + {next_state, on_cooldown, State}; +% If we have more retries to do, schedule a new retry +requesting({metadata_retry, Retries}, State) -> + ok = schedule_metadata_request(Retries, State), + {next_state, requesting, State}. -on_cooldown(_, State) -> +-spec on_cooldown(any(), state()) -> {next_state, atom(), state()}. +on_cooldown({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> + % Since we are on cooldown (the time between consecutive requests) we only add + % the topics to the scheduled next request + SortedTopics = lists:usort(NewTopics), + State#state{known_topics = lists:umerge(KnownTopics, SortedTopics)}; +on_cooldown(on_timer, State) -> + ok = schedule_metadata_request(State), + {next_state, requesting, State}. + +%%============================================================================== +%% Events +%%============================================================================== +handle_sync_event(get_known_topics, _From, StateName, State) -> + Reply = State#state.known_topics, + {reply, Reply, StateName, State}. + +%%============================================================================== +%% gen_fsm boilerplate +%%============================================================================== +-spec handle_info(any(), atom(), state()) -> {next_state, atom(), state()}. +handle_info(Message, StateName, State) -> + lager:info("received unexpected message ~p", [Message]), + {next_state, StateName, State}. + +-spec code_change(any(), atom(), state(), any()) -> {ok, atom(), state()}. +code_change(_OldVsn, StateName, StateData, _Extra) -> + {ok, StateName, StateData}. + +-spec terminate(any(), atom(), state()) -> ok. +terminate(_Reason, _StateName, _StateData) -> ok. +-spec handle_event(any(), atom(), state()) -> {next_state, atom(), state()}. +handle_event(_Event, StateName, StateData) -> + {next_state, StateName, StateData}. + +%-spec handle_sync_event(any(), any(), atom(), state()) -> +% {next_state, atom(), state()}. +%handle_sync_event(_Event, _From, StateName, StateData) -> +% {next_state, StateName, StateData}. + %%============================================================================== %% Handlers %%============================================================================== init([Config]) -> - Schema = [{metadata_tcp_timeout, positive_integer, {default, 1500}}, + Schema = [{client_id, binary, {default, <<"kafkerl_client">>}}, + {metadata_tcp_timeout, positive_integer, {default, 1500}}, {metadata_request_cooldown, positive_integer, {default, 333}}, - {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}], + {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, + {brokers, [{string, {integer, {1, 65535}}}], required}, + {topics, [binary], required}], case normalizerl:normalize_proplist(Schema, Config) of - {ok, [RetryInterval, MetadataRequestCD, MaxMetadataRetries]} -> - State = #state{config = Config, - retry_interval = RetryInterval, - metadata_request_cd = MetadataRequestCD, - max_metadata_retries = MaxMetadataRetries}, - {ok, State}; + {ok, [ClientId, RetryInterval, Cooldown, MaxRetries, Brokers, Topics]} -> + State = #state{config = Config, + known_topics = Topics, + brokers = Brokers, + cooldown = Cooldown, + client_id = ClientId, + max_retries = MaxRetries, + retry_interval = RetryInterval}, + {ok, idle, State}; {errors, Errors} -> ok = lists:foreach(fun(E) -> _ = lager:critical("Metadata config error ~p", [E]) end, Errors), {stop, bad_config} + end. + +%%============================================================================== +%% Request logic +%%============================================================================== +schedule_metadata_request(State) -> + schedule_metadata_request(undefined, State). + +schedule_metadata_request(Retries, State = #state{brokers = Brokers, + max_retries = MaxRetries, + known_topics = Topics, + retry_interval = Interval}) -> + Request = metadata_request(State, Topics), + case Retries of + undefined -> + Params = [Brokers, Request, MaxRetries], + _ = spawn(?MODULE, make_request, Params); + _ -> + Params = [Brokers, Request, Retries], + {ok, _} = timer:apply_after(Interval, ?MODULE, make_request, Params) + end, + ok. + +make_request(Brokers, Request, Retries) -> + case do_request_metadata(Brokers, Request) of + {ok, TopicMapping} -> + gen_fsm:send_event(?MODULE, {metadata_updated, TopicMapping}); + Error -> + _ = lager:debug("Metadata request error: ~p", [Error]), + NewRetries = case Retries of -1 -> -1; _ -> Retries - 1 end, + gen_fsm:send_event(?MODULE, {metadata_retry, NewRetries}) + end. + +do_request_metadata([], _Request) -> + {error, all_down}; +do_request_metadata([{Host, Port} = _Broker | T], Request) -> + _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + % Connect to the Broker + case gen_tcp:connect(Host, Port, get_metadata_tcp_options()) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % Failed, try with the next one in the list + do_request_metadata(T, Request); + {ok, Socket} -> + % On success, send the metadata request + case gen_tcp:send(Socket, Request) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % Unable to send request, try the next broker + do_request_metadata(T, Request); + ok -> + case gen_tcp:recv(Socket, 0, 6000) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + gen_tcp:close(Socket), + % Nothing received (probably a timeout), try the next broker + do_request_metadata(T, Request); + {ok, Data} -> + gen_tcp:close(Socket), + case kafkerl_protocol:parse_metadata_response(Data) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % The parsing failed, try the next broker + do_request_metadata(T, Request); + {ok, _CorrelationId, Metadata} -> + % We received a metadata response, make sure it has brokers + {ok, get_topic_mapping(Metadata)} + end + end + end + end. + +%%============================================================================== +%% Utils +%%============================================================================== +get_metadata_tcp_options() -> + kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). + +log_metadata_request_error(Host, Port, Reason) -> + _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). + +metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> + kafkerl_protocol:build_metadata_request([], 0, ClientId); +metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, + NewTopics) -> + AllTopics = lists:umerge(KnownTopics, NewTopics), + kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). + +%%============================================================================== +%% Topic/broker mapping +%%============================================================================== +get_topic_mapping({BrokerMetadata, TopicMetadata}) -> + % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] + Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), + % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] + Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), + % Converts the BrokerIds from the previous array into socket addresses + lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> + case lists:keyfind(BrokerId, 1, BrokerMetadata) of + {BrokerId, HostData} -> + {true, {{Topic, Partition, BrokerId}, HostData}}; + _Any -> + false + end + end, Partitions). + +expand_topic({?NO_ERROR, Topic, Partitions}) -> + {true, {Topic, Partitions}}; +expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> + % Replica not available can be ignored, still, show a warning + _ = lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, Partitions}}; +expand_topic({Error, Topic, _Partitions}) -> + _ = lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, []}}. + +expand_partitions(Metadata) -> + expand_partitions(Metadata, []). + +expand_partitions({_Topic, []}, Acc) -> + {true, Acc}; +expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, + _, _} | T]}, Acc) -> + _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); +expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> + _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + expand_partitions({Topic, T}, Acc). + +get_broker_mapping(TopicMapping, State) -> + get_broker_mapping(TopicMapping, State, 0, []). + +get_broker_mapping([], _State, _N, Acc) -> + [{Key, Address} || {_ConnId, Key, Address} <- Acc]; +get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], + State = #state{config = Config}, N, Acc) -> + Buffer = kafkerl_utils:buffer_name(Topic, Partition), + _ = ets_buffer:create(Buffer, fifo), + {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of + false -> + {start_broker_connection(N, Address, Config), N + 1}; + {ConnId, _, BrokerConnection} -> + {BrokerConnection, N} + end, + + Buffer = kafkerl_utils:buffer_name(Topic, Partition), + _ = ets_buffer:create(Buffer, fifo), + kafkerl_broker_connection:add_buffer(Conn, Buffer), + + NewMapping = {ConnId, {Topic, Partition}, Conn}, + get_broker_mapping(T, State, NewN, [NewMapping | Acc]). + +start_broker_connection(N, Address, Config) -> + case kafkerl_broker_connection:start_link(N, Address, Config) of + {ok, Name, _Pid} -> + Name; + {error, {already_started, Pid}} -> + kafkerl_broker_connection:clear_buffers(Pid), + Pid end. \ No newline at end of file diff --git a/src/kafkerl_sup.erl b/src/kafkerl_sup.erl index fd989b4..e333131 100644 --- a/src/kafkerl_sup.erl +++ b/src/kafkerl_sup.erl @@ -33,9 +33,8 @@ init([]) -> {ok, {{one_for_one, 5, 10}, ChildSpecs}}. get_connector_child_spec() -> - Name = application:get_env(kafkerl, gen_server_name, kafkerl), {ok, ConnConfig} = application:get_env(kafkerl, conn_config), Topics = application:get_env(kafkerl, topics, []), - Params = [Name, [{topics, Topics} | ConnConfig]], + Params = [[{topics, Topics} | ConnConfig]], MFA = {kafkerl_connector, start_link, Params}, - {Name, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file + {kafkerl, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 79a718b..03efb71 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -78,12 +78,12 @@ buffer_name(Topic, Partition) -> default_buffer_name() -> default_message_buffer. --type proplist_value() :: {atom(), any()}. +-type proplist_value() :: {atom(), any()} | atom(). -type proplist() :: [proplist_value]. -spec proplists_set(proplist(), proplist_value() | [proplist_value()]) -> proplist(). proplists_set(Proplist, {K, _V} = NewValue) -> - lists:keyreplace(K, 1, Proplist, NewValue); + lists:keystore(K, 1, proplists:unfold(Proplist), NewValue); proplists_set(Proplist, []) -> Proplist; proplists_set(Proplist, [H | T]) -> From 93b9c68163d92c0e2956527e93c0384bfacb0652 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 15 Apr 2016 11:54:25 -0300 Subject: [PATCH 49/72] version bump --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e437ff..984e574 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -kafkerl v2.0.0 +kafkerl v3.0.0 ============== [![Gitter](https://badges.gitter.im/Join Chat.svg)](https://gitter.im/HernanRivasAcosta/kafkerl?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) From 5c4f32370d40722fa67f8e4e02b7d769a3370693 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Fri, 13 May 2016 16:19:39 -0300 Subject: [PATCH 50/72] improved error messages for the default buffer --- run.sh => run | 0 src/kafkerl_connector.erl | 7 ++++--- 2 files changed, 4 insertions(+), 3 deletions(-) rename run.sh => run (100%) diff --git a/run.sh b/run similarity index 100% rename from run.sh rename to run diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index e1c3e54..5f7f2bd 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -61,13 +61,14 @@ send({Topic, Partition, _Payload} = Message) -> NewSize when is_integer(NewSize) -> ok; Error -> - _ = lager:debug("unable to write on ~p, reason: ~p", [Buffer, Error]), + _ = lager:debug("error writing on ~p, reason: ~p", [Buffer, Error]), case ets_buffer:write(kafkerl_utils:default_buffer_name(), Message) of NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> ok; _ -> - _ = lager:critical("unable to write to default buffer, reason: ~p", - [Error]), + _ = lager:critical("unable to write to default buffer, the message ~p" + " was lost lost, reason: ~p", + [Message, Error]), ok end end. From 37cd3c248f68c597925b877734d99ffed534bdb9 Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Mon, 16 May 2016 15:41:43 -0300 Subject: [PATCH 51/72] improvements to the return values of the produce funcion --- src/kafkerl.erl | 3 ++- src/kafkerl_connector.erl | 15 +++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 3b423c6..a0c15b8 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -24,6 +24,7 @@ -type options() :: [option()]. -type server_ref() :: atom() | pid(). +-type ok() :: {ok, atom()}. -type error() :: {error, atom() | {atom(), any()}}. -type topic() :: binary(). @@ -48,7 +49,7 @@ start(_StartType, _StartArgs) -> %% Access API %%============================================================================== %% Produce API --spec produce(topic(), partition(), payload()) -> ok. +-spec produce(topic(), partition(), payload()) -> ok() | error(). produce(Topic, Partition, Message) -> kafkerl_connector:send({Topic, Partition, Message}). diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 5f7f2bd..f3cde39 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -53,23 +53,26 @@ start_link(Config) -> gen_server:start_link({local, kafkerl}, ?MODULE, [Config], []). --spec send(kafkerl:basic_message()) -> - ok | kafkerl:error(). +-spec send(kafkerl:basic_message()) -> kafkerl:ok() | kafkerl:error(). send({Topic, Partition, _Payload} = Message) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), case ets_buffer:write(Buffer, Message) of NewSize when is_integer(NewSize) -> - ok; + % Return 'saved' when the message went to the right ETS + {ok, saved}; Error -> _ = lager:debug("error writing on ~p, reason: ~p", [Buffer, Error]), case ets_buffer:write(kafkerl_utils:default_buffer_name(), Message) of - NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> - ok; + NewDefaultBufferSize when is_integer(NewDefaultBufferSize) -> + % We return 'cached' when we needed to use the default ets table + {ok, cached}; _ -> _ = lager:critical("unable to write to default buffer, the message ~p" " was lost lost, reason: ~p", [Message, Error]), - ok + % We can safely assume that the ets existance indicates if kafkerl was + % started + {error, not_started} end end. From 6b6beeda034568ab238402a7a62dbd13f680e4ec Mon Sep 17 00:00:00 2001 From: HernanRivasAcosta Date: Wed, 15 Jun 2016 17:13:38 +0300 Subject: [PATCH 52/72] multiple improvements --- .gitignore | 4 +- Makefile | 22 ++-------- bin/clear_kafkerl_test_topics.sh | 1 - bin/create_test_topics.sh | 6 +-- bin/delete_test_topics.sh | 1 + rel/kafkerl.app.config | 4 +- src/kafkerl.erl | 21 ++++++++-- src/kafkerl_connector.erl | 11 +++-- src/kafkerl_test_utils.erl | 55 +++++++++++++++++++++++++ src/kafkerl_utils.erl | 14 +------ test/kafkerl_SUITE.erl | 68 +++++++++++++++++++++++++++++++ test/message_validation_SUITE.erl | 67 ------------------------------ 12 files changed, 159 insertions(+), 115 deletions(-) delete mode 100755 bin/clear_kafkerl_test_topics.sh create mode 100755 bin/delete_test_topics.sh create mode 100644 src/kafkerl_test_utils.erl create mode 100644 test/kafkerl_SUITE.erl delete mode 100644 test/message_validation_SUITE.erl diff --git a/.gitignore b/.gitignore index cd4d9f8..960a23e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,6 @@ log rebar .rebar *.dump -bin/tmp \ No newline at end of file +bin/tmp +logs/ +rel/ \ No newline at end of file diff --git a/Makefile b/Makefile index 22ee8ad..bf16ef0 100644 --- a/Makefile +++ b/Makefile @@ -4,22 +4,10 @@ ERL ?= erl RUN := ${ERL} -pa ebin -pa deps/*/ebin -smp enable -s lager -setcookie ${COOKIE} -config ${CONFIG} -boot start_sasl ${ERL_ARGS} NODE ?= kafkerl CT_ARGS ?= "-vvv" -CT_LOG ?= /logs/ct -ERLARGS=-pa ${DEPS} -pa ${APPS} -smp enable -boot start_sasl -args_file ${VM_ARGS} -s lager -s redis_config -TEST_ERL_ARGS ?= ${ERLARGS} -args_file ${TEST_VM_ARGS} -config ${TEST_CONFIG} +ERLARGS=-config ${CONFIG} +TEST_ERL_ARGS ?= ${ERLARGS} REBAR ?= "rebar" -ifdef CT_SUITES - CT_SUITES_="suites=${CT_SUITES}" -else - CT_SUITES_="" -endif -ifdef CT_CASE - CT_CASE_="case=${CT_CASE}" -else - CT_CASE_="" -endif - all: ${REBAR} get-deps compile @@ -74,8 +62,4 @@ test: tests tests: @${REBAR} compile skip_deps=true - @rm -rf ${CT_LOG} - @mkdir -p ${CT_LOG} - @ERL_FLAGS="${TEST_ERL_ARGS}" \ - ERL_AFLAGS="${TEST_ERL_ARGS}" \ - ${REBAR} -v 3 skip_deps=true ${CT_SUITES_} ${CT_CASE_} ct + ${REBAR} -v 3 skip_deps=true ct diff --git a/bin/clear_kafkerl_test_topics.sh b/bin/clear_kafkerl_test_topics.sh deleted file mode 100755 index 90f1920..0000000 --- a/bin/clear_kafkerl_test_topics.sh +++ /dev/null @@ -1 +0,0 @@ -rm -rf tmp/ \ No newline at end of file diff --git a/bin/create_test_topics.sh b/bin/create_test_topics.sh index 2bad117..d474341 100755 --- a/bin/create_test_topics.sh +++ b/bin/create_test_topics.sh @@ -23,6 +23,6 @@ done # make sure the path is defined if [ ! -d "${d}" ]; then echo "invalid kafka path ${d}" ; exit 1 ; fi -"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test1 --partitions 3 --replication-factor 3 -"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test2 --partitions 3 --replication-factor 3 -"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test3 --partitions 4 --replication-factor 3 \ No newline at end of file +"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic kafkerl_test1 --partitions 1 --replication-factor 3 +"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic kafkerl_test2 --partitions 2 --replication-factor 3 +"${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic kafkerl_test3 --partitions 3 --replication-factor 3 \ No newline at end of file diff --git a/bin/delete_test_topics.sh b/bin/delete_test_topics.sh new file mode 100755 index 0000000..ad84185 --- /dev/null +++ b/bin/delete_test_topics.sh @@ -0,0 +1 @@ +rm -rf tmp \ No newline at end of file diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 0c88a29..ee73239 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -19,5 +19,5 @@ {flush_to_disk_every, 20000}, % In milliseconds {max_buffer_size, 5000}, {save_messages_for_bad_topics, true}]}, - {topics, [test1, test2, test3]}, - {tests, [{kafka_installation, "~/kafka"}]}]}]. \ No newline at end of file + {topics, []}, + {tests, [{kafkerl_path, "/usr/local/Cellar/kafka/0.8.2.2/"}]}]}]. \ No newline at end of file diff --git a/src/kafkerl.erl b/src/kafkerl.erl index a0c15b8..95f36d1 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -5,7 +5,8 @@ -export([produce/3, consume/2, consume/3, stop_consuming/2, request_metadata/0, request_metadata/1, - partitions/0]). + partitions/0, + subscribe/1, subscribe/2, unsubscribe/1]). -export([version/0]). %% Types @@ -15,6 +16,7 @@ fun() | {atom(), atom()} | {atom(), atom(), [any()]}. +-type filters() :: all | [atom()]. -type option() :: {buffer_size, integer() | infinity} | {consumer, callback()} | {min_bytes, integer()} | @@ -33,7 +35,7 @@ -type basic_message() :: {topic(), partition(), payload()}. -export_type([server_ref/0, error/0, options/0, topic/0, partition/0, payload/0, - callback/0, basic_message/0]). + callback/0, basic_message/0, filters/0]). %%============================================================================== %% API @@ -54,7 +56,7 @@ produce(Topic, Partition, Message) -> kafkerl_connector:send({Topic, Partition, Message}). %% Consume API --spec consume(topic(), partition()) -> ok | error(). +-spec consume(topic(), partition()) -> {[payload()], offset()} | error(). consume(Topic, Partition) -> consume(Topic, Partition, []). @@ -92,6 +94,19 @@ request_metadata(Topics) when is_list(Topics) -> partitions() -> kafkerl_connector:get_partitions(). +%% Events +-spec subscribe(callback()) -> ok | error(). +subscribe(Callback) -> + kafkerl_connector:subscribe(Callback). + +-spec subscribe(callback(), filters()) -> ok | error(). +subscribe(Callback, Filters) -> + kafkerl_connector:subscribe(Callback, Filters). + +-spec unsubscribe(callback()) -> ok. +unsubscribe(Callback) -> + kafkerl_connector:unsubscribe(Callback). + %% Utils -spec version() -> {integer(), integer(), integer()}. version() -> diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index f3cde39..447c64c 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -28,15 +28,14 @@ -type kafler_port() :: 1..65535. -type address() :: {kafler_host(), kafler_port()}. --type filters() :: all | [atom()]. - -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. -record(state, {broker_mapping = void :: [broker_mapping()] | void, config = [] :: [{atom(), any()}], autocreate_topics = false :: boolean(), - callbacks = [] :: [{filters(), kafkerl:callback()}], + callbacks = [] :: [{kafkerl:filters(), + kafkerl:callback()}], last_dump_name = {"", 0} :: {string(), integer()}, default_fetch_options = [] :: kafkerl:options(), dump_location = "" :: string(), @@ -98,9 +97,9 @@ get_partitions() -> -spec subscribe(kafkerl:callback()) -> ok | kafkerl:error(). subscribe(Callback) -> subscribe(Callback, all). --spec subscribe(kafkerl:callback(), filters()) -> ok | kafkerl:error(). -subscribe(Callback, Filter) -> - gen_server:call(kafkerl, {subscribe, {Filter, Callback}}). +-spec subscribe(kafkerl:callback(), kafkerl:filters()) -> ok | kafkerl:error(). +subscribe(Callback, Filters) -> + gen_server:call(kafkerl, {subscribe, {Filters, Callback}}). -spec unsubscribe(kafkerl:callback()) -> ok. unsubscribe(Callback) -> diff --git a/src/kafkerl_test_utils.erl b/src/kafkerl_test_utils.erl new file mode 100644 index 0000000..1a79c7e --- /dev/null +++ b/src/kafkerl_test_utils.erl @@ -0,0 +1,55 @@ +-module(kafkerl_test_utils). +-author('hernanrivasacosta@gmail.com'). + +-export([start_kafka/0, start_kafka/1, create_test_topics/0, stop_kafka/0]). + +%%============================================================================== +%% API +%%============================================================================== +-spec start_kafka() -> ok. +start_kafka() -> + start_kafka(false). + +-spec start_kafka(boolean()) -> ok. +start_kafka(CreateTestTopics) -> + % Clean all the logs + lager:critical("1"), + [] = os:cmd("rm -rf bin/tmp"), + % Start zookeeper and kafka + lager:critical("2"), + Path = get_path(), + lager:critical("3"), + [] = os:cmd("./bin/start_zk.sh -d " ++ Path ++ " -c bin/zookeeper.properties"), + lager:critical("4"), + [] = os:cmd("./bin/start_broker.sh -d " ++ Path ++ " -c bin/server0.properties"), + lager:critical("5"), + [] = os:cmd("./bin/start_broker.sh -d " ++ Path ++ " -c bin/server1.properties"), + lager:critical("6"), + [] = os:cmd("./bin/start_broker.sh -d " ++ Path ++ " -c bin/server2.properties"), + lager:critical("7"), + % Create the test topics and partitions + case CreateTestTopics of + true -> create_test_topics(); + false -> ok + end. + +-spec create_test_topics() -> ok. +create_test_topics() -> + Path = get_path(), + % TODO: If kafka doesn't start properly, this will never return + [] = os:cmd("./bin/create_test_topics.sh -d " ++ Path), + ok. + +-spec stop_kafka() -> ok. +stop_kafka() -> + % Stop both zookeeper and kafka + [] = os:cmd("./bin/stop_zk.sh"), + [] = os:cmd("./bin/stop_all_brokers.sh"), + ok. + +%%============================================================================== +%% Utils +%%============================================================================== +get_path() -> + {ok, TestProps} = application:get_env(kafkerl, tests), + proplists:get_value(kafkerl_path, TestProps). \ No newline at end of file diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index 03efb71..e7e3b34 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -3,7 +3,7 @@ -export([send_event/2, send_error/2]). -export([get_tcp_options/1]). --export([merge_messages/1, split_messages/1, valid_message/1]). +-export([merge_messages/1, split_messages/1]). -export([buffer_name/2, default_buffer_name/0]). -export([gather_consume_responses/0, gather_consume_responses/1]). -export([proplists_set/2]). @@ -56,18 +56,6 @@ split_messages({Topic, Partitions}) -> split_messages(Topics) -> lists:flatten([split_messages(Topic) || Topic <- Topics]). --spec valid_message(any()) -> boolean(). -valid_message({Topic, Partition, Messages}) -> - is_binary(Topic) andalso is_integer(Partition) andalso Partition >= 0 andalso - (is_binary(Messages) orelse is_list_of_binaries(Messages)); -valid_message({Topic, Partition}) -> - is_binary(Topic) andalso (is_partition(Partition) orelse - is_partition_list(Partition)); -valid_message(L) when is_list(L) -> - lists:all(fun valid_message/1, L); -valid_message(_Any) -> - false. - -spec buffer_name(kafkerl_protocol:topic(), kafkerl_protocol:partition()) -> atom(). buffer_name(Topic, Partition) -> diff --git a/test/kafkerl_SUITE.erl b/test/kafkerl_SUITE.erl new file mode 100644 index 0000000..a81ca26 --- /dev/null +++ b/test/kafkerl_SUITE.erl @@ -0,0 +1,68 @@ +-module(kafkerl_SUITE). +-author('hernanrivasacosta@gmail.com'). + +-export([produce_and_consume/1]). + +-export([init_per_suite/1, end_per_suite/1, init_per_testcase/2, + end_per_testcase/2, all/0]). + +-type config() :: [{atom(), term()}]. + +-spec all() -> [atom()]. +all() -> + [produce_and_consume]. + +-spec init_per_suite(config()) -> [config()]. +init_per_suite(Config) -> + Config. + +-spec end_per_suite(config()) -> [config()]. +end_per_suite(Config) -> + Config. + +-spec init_per_testcase(atom(), config()) -> [config()]. +init_per_testcase(TestCase, Config) -> + Config. + +-spec end_per_testcase(atom(), config()) -> [config()]. +end_per_testcase(TestCase, Config) -> + kafkerl_test_utils:stop_kafka(), + Config. + +%%============================================================================== +%% Tests +%%============================================================================== +-spec produce_and_consume(config()) -> ok. +produce_and_consume(_Config) -> + % Start by producing a message while kafkerl has not been started + ct:pal("sending initial message"), + {error, not_started} = kafkerl:produce(<<"kafkerl_test3">>, 0, <<"ignore">>), + % Start kafkerl + ct:pal("starting kafkerl"), + ok = kafkerl:start(), + % Produce on some non existing topic, it will be cached + ct:pal("producing a message that will be cached"), + {ok, cached} = kafkerl:produce(<<"kafkerl_test3">>, 0, <<"msg1">>), + % Start kafka + ct:pal("starting kafkerl"), + ok = kafkerl_test_utils:start_kafka(), + % Create the topics and get the metadata + %ct:pal("create the topics"), + %ok = kafkerl_test_utils:create_test_topics(), + ct:pal("request the metadata"), + ok = kafkerl:request_metadata(), + % Wait for the metadata to be updated + ok = receive + {partition_update, PU = [_ | _]} -> + ct:pal("got an update (~p)!", [PU]), + ok + after 7500 -> + ct:pal("no update :("), + error + end, + % Send another message + ct:pal("send a message"), + {ok, saved} = kafkerl:produce(<<"kafkerl_test3">>, 0, <<"msg2">>), + % Wait a bit for the messages to be sent + + ok. \ No newline at end of file diff --git a/test/message_validation_SUITE.erl b/test/message_validation_SUITE.erl deleted file mode 100644 index 682e832..0000000 --- a/test/message_validation_SUITE.erl +++ /dev/null @@ -1,67 +0,0 @@ --module(message_validation_SUITE). --author('hernanrivasacosta@gmail.com'). - --export([valid_messages/1, invalid_messages/1]). - --export([init_per_suite/1, end_per_suite/1, init_per_testcase/2, - end_per_testcase/2, all/0]). - --type config() :: [{atom(), term()}]. - --spec all() -> [atom()]. -all() -> - [valid_messages, invalid_messages]. - --spec init_per_suite(config()) -> [config()]. -init_per_suite(Config) -> - Config. - --spec end_per_suite(config()) -> [config()]. -end_per_suite(Config) -> - Config. - --spec init_per_testcase(atom(), config()) -> [config()]. -init_per_testcase(TestCase, Config) -> - Config. - --spec end_per_testcase(atom(), config()) -> [config()]. -end_per_testcase(TestCase, Config) -> - Config. - -%%============================================================================== -%% Tests -%%============================================================================== --spec valid_messages(config()) -> ok. -valid_messages(_Config) -> - true = kafkerl_producer:valid_topics({<<"topic">>, 1, <<"msg">>}), - true = kafkerl_producer:valid_topics({<<"topic">>, {1, <<"msg">>}}), - true = kafkerl_producer:valid_topics({<<"topic">>, [{1, <<"msg">>}]}), - true = kafkerl_producer:valid_topics({<<"topic">>, [{1, <<"msg1">>}, - {2, <<"msg2">>}]}), - true = kafkerl_producer:valid_topics([{<<"topic">>, 1, <<"msg">>}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, 1, <<"msg1">>}, - {<<"topic2">>, 1, <<"msg2">>}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, {1, <<"msg1">>}}, - {<<"topic2">>, 1, <<"msg2">>}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, [{1, <<"msg1">>}]}, - {<<"topic2">>, {1, <<"msg2">>}}]), - true = kafkerl_producer:valid_topics([{<<"topic1">>, [{1, <<"msg1">>}, - {2, <<"msg2">>}]}, - {<<"topic2">>, {1, <<"msg3">>}}]), - ok. - --spec invalid_messages(config()) -> ok. -invalid_messages(_Config) -> - false = kafkerl_producer:valid_topics(<<"test">>), - false = kafkerl_producer:valid_topics({<<"test">>, 1}), - false = kafkerl_producer:valid_topics({<<"test">>, <<"msg">>}), - false = kafkerl_producer:valid_topics({<<"test">>, [<<"msg">>]}), - false = kafkerl_producer:valid_topics({<<"test">>, [1, <<"msg">>]}), - false = kafkerl_producer:valid_topics([]), - false = kafkerl_producer:valid_topics([<<"test">>]), - false = kafkerl_producer:valid_topics({undefined, 1, <<"msg">>}), - false = kafkerl_producer:valid_topics({<<"topic">>, 1, undefined}), - false = kafkerl_producer:valid_topics([{<<"topic1">>, [{1, <<"msg1">>}, - {2, undefined}]}, - {<<"topic2">>, {1, <<"msg3">>}}]), - ok. \ No newline at end of file From 6a2fd32c5a2b43d33856cac66510351c4cf09c33 Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Thu, 7 Jul 2016 22:36:55 +0800 Subject: [PATCH 53/72] kafkerl refact --- rebar.config | 3 ++- rel/kafkerl.app.config | 2 +- src/kafkerl_buffer.erl | 34 ++++++++++++++++++++++++++ src/kafkerl_connector.erl | 2 +- src/kafkerl_metadata_handler.erl | 41 ++++++++++++++++++++++---------- 5 files changed, 67 insertions(+), 15 deletions(-) create mode 100644 src/kafkerl_buffer.erl diff --git a/rebar.config b/rebar.config index fb71da9..bc8c780 100644 --- a/rebar.config +++ b/rebar.config @@ -19,4 +19,5 @@ [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, {epocxy, "1.0.1", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.1"}}}, - {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. \ No newline at end of file + {eper, "0.69", {git, "git@github.com:tigertext/eper.git", "HEAD" }}, + {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index ee73239..9d7eb53 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -2,7 +2,7 @@ {handlers, [{lager_console_backend, [debug,true]}]}]}, {kafkerl, [{disabled, false}, {conn_config, [{brokers, [{"localhost", 9090}, - {"localhost", 9091}, + {"localhost", 9094}, {"localhost", 9092}]}, {client_id, kafkerl_client}, % Sent to kafka {max_broker_retries, 2}, diff --git a/src/kafkerl_buffer.erl b/src/kafkerl_buffer.erl new file mode 100644 index 0000000..4483120 --- /dev/null +++ b/src/kafkerl_buffer.erl @@ -0,0 +1,34 @@ + +-module(kafkerl_buffer). +-author("anders"). +-behavior(gen_server). + +%% API +-export([start_link/0, init/1, create_buffer/2, handle_call/3, start_link/1, create_buffer/1]). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +start_link(_) -> + start_link(). + + +init([]) -> + {ok, []}. + +create_buffer(Name, Type) -> + gen_server:call(?MODULE, {create_buffer, Name, Type}). +create_buffer(Name) -> + gen_server:call(?MODULE, {create_buffer, Name}). + +handle_call({create_buffer, Name, Type}, _from, State) -> + Alredy_Exists = ets_buffer:list(Name) =/= [], + Res = ets_buffer:create(Name, Type), + lager:debug("buffer ~p type ~p created ~p, already exists ~p", [Name, Type, Res, Alredy_Exists]), + {reply, ok, State}; +handle_call({create_buffer, Name}, _From, State) -> + Res = ets_buffer:create(Name), + lager:debug("buffer ~p created ~p", [Name, Res]), + {reply, ok, State}. + + diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 447c64c..72b3a95 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -207,7 +207,7 @@ init([Config]) -> autocreate_topics = AutocreateTopics, default_fetch_options = DefaultFetchOptions}, % Create a buffer to hold unsent messages - _ = ets_buffer:create(kafkerl_utils:default_buffer_name(), fifo), + _ = kafkerl_buffer:create_buffer(kafkerl_utils:default_buffer_name(), fifo), % Start the interval that manages the buffers holding unsent messages {ok, _TRef} = timer:send_interval(FlushToDiskInterval, dump_buffer_tick), ok = kafkerl_metadata_handler:request_metadata([]), diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index ef91183..58c9f90 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -22,7 +22,9 @@ retry_interval = 1 :: non_neg_integer(), cooldown = 0 :: integer(), known_topics = [] :: [kafkerl:topic()], - next_topics = [] :: [kafkerl:topic()]}). + next_topics = [] :: [kafkerl:topic()], + broker_connections = [], + connection_index = 0}). -type state() :: #state{}. %%============================================================================== @@ -70,10 +72,14 @@ requesting({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> % Handle the updated metadata requesting({metadata_updated, RawMapping}, State) -> % Create the topic mapping (this also starts the broker connections) - NewMapping = get_broker_mapping(RawMapping, State), - _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping]), - ok = kafkerl_connector:topic_mapping_updated(NewMapping), - {next_state, idle, State}; + {N, TopicMapping} = get_broker_mapping(RawMapping, State), + _ = lager:debug("Refreshed topic mapping: ~p", [TopicMapping]), + + NewMapping2 = [{{Topic, Partition}, Conn} || + {_ConnId, {Topic, Partition}, Conn} <- TopicMapping], + _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping2]), + ok = kafkerl_connector:topic_mapping_updated(NewMapping2), + {next_state, idle, State#state{connection_index = N, broker_connections = TopicMapping}}; % If we have no more retries left, go on cooldown requesting({metadata_retry, 0}, State = #state{cooldown = Cooldown}) -> Params = [?MODULE, on_timer], @@ -105,9 +111,18 @@ handle_sync_event(get_known_topics, _From, StateName, State) -> %% gen_fsm boilerplate %%============================================================================== -spec handle_info(any(), atom(), state()) -> {next_state, atom(), state()}. +handle_info({'EXIT', Pid, Reason}, StateName, State) -> + lager:info("process ~p crashed with reason ~p ", [Pid, Reason]), + BrokerConnections = [{Name, {Topic, Partition}, Conn} || {Name, {Topic, Partition}, Conn} <- State#state.broker_connections, + whereis(Conn) /= Pid, whereis(Conn) /= undefined], + lager:info("current connections ~p, updated connections ~p ~n", [State#state.broker_connections, BrokerConnections]), + timer:apply_after(1000, ?MODULE, request_metadata, [[]]), + {next_state, StateName, State#state{broker_connections = BrokerConnections}}; + handle_info(Message, StateName, State) -> - lager:info("received unexpected message ~p", [Message]), - {next_state, StateName, State}. + lager:info("received unexpected message ~p", [Message]), + {next_state, StateName, State}. + -spec code_change(any(), atom(), state(), any()) -> {ok, atom(), state()}. code_change(_OldVsn, StateName, StateData, _Extra) -> @@ -145,6 +160,7 @@ init([Config]) -> client_id = ClientId, max_retries = MaxRetries, retry_interval = RetryInterval}, + process_flag(trap_exit, true), {ok, idle, State}; {errors, Errors} -> ok = lists:foreach(fun(E) -> @@ -290,14 +306,15 @@ expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> - get_broker_mapping(TopicMapping, State, 0, []). + get_broker_mapping(TopicMapping, State, State#state.connection_index, + State#state.broker_connections). -get_broker_mapping([], _State, _N, Acc) -> - [{Key, Address} || {_ConnId, Key, Address} <- Acc]; +get_broker_mapping([], _State, N, Acc) -> + {N, lists:usort(Acc)}; get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], State = #state{config = Config}, N, Acc) -> Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), + _ = kafkerl_buffer:create_buffer(Buffer, fifo), {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of false -> {start_broker_connection(N, Address, Config), N + 1}; @@ -306,7 +323,7 @@ get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], end, Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = ets_buffer:create(Buffer, fifo), + _ = kafkerl_buffer:create_buffer(Buffer, fifo), kafkerl_broker_connection:add_buffer(Conn, Buffer), NewMapping = {ConnId, {Topic, Partition}, Conn}, From 8367f254eeba602d77c9b5b124351673295721ea Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Fri, 8 Jul 2016 08:06:25 +0800 Subject: [PATCH 54/72] add child --- src/kafkerl_sup.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/kafkerl_sup.erl b/src/kafkerl_sup.erl index e333131..8437c17 100644 --- a/src/kafkerl_sup.erl +++ b/src/kafkerl_sup.erl @@ -10,6 +10,7 @@ -type restart_strategy() :: {supervisor:strategy(), non_neg_integer(), non_neg_integer()}. +-define(CHILD(__Name, __Mod, __Args), {__Name, {__Mod, start_link, __Args}, permanent, 2000, worker, [__Mod]}). %%============================================================================== %% API @@ -28,13 +29,15 @@ init([]) -> lager:notice("Kafkerl is disabled, ignoring"), []; false -> - [get_connector_child_spec()] + [?CHILD(kafkerl_buffer, kafkerl_buffer, []), + get_connector_child_spec()] end, {ok, {{one_for_one, 5, 10}, ChildSpecs}}. + get_connector_child_spec() -> {ok, ConnConfig} = application:get_env(kafkerl, conn_config), Topics = application:get_env(kafkerl, topics, []), Params = [[{topics, Topics} | ConnConfig]], MFA = {kafkerl_connector, start_link, Params}, - {kafkerl, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file + {kafkerl, MFA, permanent, 2000, worker, [kafkerl_connector]}. \ No newline at end of file From 70bf3ba1ebdf4132b2ec28479b5f8aab5637c180 Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Fri, 8 Jul 2016 23:03:13 +0800 Subject: [PATCH 55/72] handle broker leader change --- Makefile | 2 +- rel/kafkerl.app.config | 7 +- src/kafkerl_broker_connection.erl | 790 +++++++++++++++--------------- src/kafkerl_metadata_handler.erl | 418 ++++++++-------- src/kafkerl_protocol.erl | 2 +- 5 files changed, 622 insertions(+), 597 deletions(-) diff --git a/Makefile b/Makefile index bf16ef0..ed229fe 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ COOKIE ?= KAFKERL-EXAMPLE CONFIG ?= rel/kafkerl.app.config ERL ?= erl -RUN := ${ERL} -pa ebin -pa deps/*/ebin -smp enable -s lager -setcookie ${COOKIE} -config ${CONFIG} -boot start_sasl ${ERL_ARGS} +RUN := ${ERL} -pa ebin -pa deps/*/ebin -smp enable -s lager -s kafkerl -setcookie ${COOKIE} -config ${CONFIG} -boot start_sasl ${ERL_ARGS} NODE ?= kafkerl CT_ARGS ?= "-vvv" ERLARGS=-config ${CONFIG} diff --git a/rel/kafkerl.app.config b/rel/kafkerl.app.config index 9d7eb53..cf6b2f3 100644 --- a/rel/kafkerl.app.config +++ b/rel/kafkerl.app.config @@ -1,9 +1,8 @@ [{lager, [{colored, true}, {handlers, [{lager_console_backend, [debug,true]}]}]}, {kafkerl, [{disabled, false}, - {conn_config, [{brokers, [{"localhost", 9090}, - {"localhost", 9094}, - {"localhost", 9092}]}, + {conn_config, [{brokers, [{"localhost", 9092}, {"localhost", 9094} + ]}, {client_id, kafkerl_client}, % Sent to kafka {max_broker_retries, 2}, {broker_tcp_timeout, 1000}, @@ -20,4 +19,4 @@ {max_buffer_size, 5000}, {save_messages_for_bad_topics, true}]}, {topics, []}, - {tests, [{kafkerl_path, "/usr/local/Cellar/kafka/0.8.2.2/"}]}]}]. \ No newline at end of file + {tests, [{kafkerl_path, "/usr/local/Cellar/kafka/0.8.2.2/"}]}]}]. diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 9e70e6e..f0610d2 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -4,117 +4,123 @@ -behaviour(gen_server). %% API --export([add_buffer/2, clear_buffers/1, fetch/4, stop_fetch/3]). +-export([add_buffer/2, clear_buffers/1, delete_buffer/2, fetch/4, stop_fetch/3]). % Only for internal use -export([connect/6]). % Supervisors -export([start_link/3]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, - handle_call/3, handle_cast/2, handle_info/2]). + handle_call/3, handle_cast/2, handle_info/2]). -include("kafkerl.hrl"). --type conn_idx() :: 0..1023. +-type conn_idx() :: 0..1023. -type start_link_response() :: {ok, atom(), pid()} | ignore | {error, any()}. --record(fetch, {correlation_id = 0 :: kafkerl_protocol:correlation_id(), - server_ref = undefined :: kafkerl:server_ref(), - topic = undefined :: kafkerl:topic(), - partition = undefined :: kafkerl:partition(), - options = undefined :: kafkerl:options(), - state = void :: kafkerl_protocol:fetch_state()}). - --record(state, {name = undefined :: atom(), - buffers = [] :: [atom()], - conn_idx = undefined :: conn_idx(), - client_id = undefined :: binary(), - socket = undefined :: port(), - address = undefined :: kafkerl_connector:address(), - tref = undefined :: any(), - tcp_options = [] :: [any()], - max_retries = 0 :: integer(), - retry_interval = 0 :: integer(), - request_number = 0 :: integer(), - pending_requests = [] :: [integer()], - max_time_queued = 0 :: integer(), - ets = undefined :: atom(), - fetches = [] :: [#fetch{}], - current_fetch = void :: kafkerl_protocol:correlation_id() | - void, - scheduled_fetches = [] :: [{{kafkerl:topic(), - kafkerl:partition()}, - timer:tref()}]}). +-record(fetch, {correlation_id = 0 :: kafkerl_protocol:correlation_id(), + server_ref = undefined :: kafkerl:server_ref(), + topic = undefined :: kafkerl:topic(), + partition = undefined :: kafkerl:partition(), + options = undefined :: kafkerl:options(), + state = void :: kafkerl_protocol:fetch_state()}). + +-record(state, {name = undefined :: atom(), + buffers = [] :: [atom()], + conn_idx = undefined :: conn_idx(), + client_id = undefined :: binary(), + socket = undefined :: port(), + address = undefined :: kafkerl_connector:address(), + tref = undefined :: any(), + tcp_options = [] :: [any()], + max_retries = 0 :: integer(), + retry_interval = 0 :: integer(), + request_number = 0 :: integer(), + pending_requests = [] :: [integer()], + max_time_queued = 0 :: integer(), + ets = undefined :: atom(), + fetches = [] :: [#fetch{}], + current_fetch = void :: kafkerl_protocol:correlation_id() | + void, + scheduled_fetches = [] :: [{{kafkerl:topic(), + kafkerl:partition()}, + timer:tref()}]}). -type state() :: #state{}. %%============================================================================== %% API %%============================================================================== -spec start_link(conn_idx(), kafkerl_connector:address(), any()) -> - start_link_response(). + start_link_response(). start_link(Id, Address, Config) -> - NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), - Name = list_to_atom(NameStr), - Params = [Id, Address, Config, Name], - case gen_server:start_link({local, Name}, ?MODULE, Params, []) of - {ok, Pid} -> - {ok, Name, Pid}; - Other -> - Other - end. + NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), + Name = list_to_atom(NameStr), + Params = [Id, Address, Config, Name], + case gen_server:start_link({local, Name}, ?MODULE, Params, []) of + {ok, Pid} -> + {ok, Name, Pid}; + Other -> + Other + end. -spec add_buffer(kafkerl:server_ref(), atom()) -> ok. add_buffer(ServerRef, Buffer) -> - gen_server:call(ServerRef, {add_buffer, Buffer}). + gen_server:call(ServerRef, {add_buffer, Buffer}). -spec clear_buffers(kafkerl:server_ref()) -> ok. clear_buffers(ServerRef) -> - gen_server:call(ServerRef, {clear_buffers}). + gen_server:call(ServerRef, {clear_buffers}). + +delete_buffer(ServerRef, Buffer) -> + gen_server:call(ServerRef, {delete_buffer, Buffer}). -spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), - kafkerl:options()) -> ok | kafkerl:error(). + kafkerl:options()) -> ok | kafkerl:error(). fetch(ServerRef, Topic, Partition, Options) -> - gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). + gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). -spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> - ok. + ok. stop_fetch(ServerRef, Topic, Partition) -> - gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). + gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). %%============================================================================== %% gen_server callbacks %%============================================================================== -spec handle_call(any(), any(), state()) -> {reply, ok, state()}. handle_call({add_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> - {reply, ok, State#state{buffers = [Buffer| Buffers]}}; + {reply, ok, State#state{buffers = [Buffer | Buffers]}}; handle_call({clear_buffers}, _From, State) -> - {reply, ok, State#state{buffers = []}}; + {reply, ok, State#state{buffers = []}}; +handle_call({delete_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> + {reply, ok, State#state{buffers = Buffers -- [Buffer]}}; + handle_call({fetch, ServerRef, Topic, Partition, Options}, _From, State) -> - handle_fetch(ServerRef, Topic, Partition, Options, State); + handle_fetch(ServerRef, Topic, Partition, Options, State); handle_call({stop_fetch, Topic, Partition}, _From, State) -> - handle_stop_fetch(Topic, Partition, State). + handle_stop_fetch(Topic, Partition, State). -spec handle_info(any(), state()) -> {noreply, state()}. handle_info({connected, Socket}, State) -> - handle_flush(State#state{socket = Socket}); + handle_flush(State#state{socket = Socket}); handle_info(connection_timeout, State) -> - {stop, {error, unable_to_connect}, State}; + {stop, {error, unable_to_connect}, State}; handle_info({tcp_closed, _Socket}, State = #state{name = Name, - address = {Host, Port}}) -> - _ = lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), - NewState = handle_tcp_close(State), - {noreply, NewState}; + address = {Host, Port}}) -> + _ = lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), + NewState = handle_tcp_close(State), + {noreply, NewState}; handle_info({tcp, _Socket, Bin}, State) -> - case handle_tcp_data(Bin, State) of - {ok, NewState} -> {noreply, NewState}; - {error, Reason} -> {stop, {error, Reason}, State} - end; + case handle_tcp_data(Bin, State) of + {ok, NewState} -> {noreply, NewState}; + {error, Reason} -> {stop, {error, Reason}, State} + end; handle_info({flush, Time}, State) -> - {ok, _Tref} = queue_flush(Time), - handle_flush(State); + {ok, _Tref} = queue_flush(Time), + handle_flush(State); handle_info(Msg, State = #state{name = Name}) -> - _ = lager:notice("~p got unexpected info message: ~p on ~p", [Name, Msg]), - {noreply, State}. + _ = lager:notice("~p got unexpected info message: ~p on ~p", [Name, Msg]), + {noreply, State}. % Boilerplate -spec handle_cast(any(), state()) -> {noreply, state()}. @@ -128,409 +134,413 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. %% Handlers %%============================================================================== init([Id, Address, Config, Name]) -> - Schema = [{tcp_options, [any], {default, []}}, - {retry_interval, positive_integer, {default, 1000}}, - {max_retries, positive_integer, {default, 3}}, - {client_id, binary, {default, <<"kafkerl_client">>}}, - {max_time_queued, positive_integer, required}], - case normalizerl:normalize_proplist(Schema, Config) of - {ok, [TCPOpts, RetryInterval, MaxRetries, ClientId, MaxTimeQueued]} -> - NewTCPOpts = kafkerl_utils:get_tcp_options(TCPOpts), - Ets = list_to_atom(atom_to_list(Name) ++ "_ets"), - _ = ets:new(Ets, ets_options()), - State = #state{ets = Ets, - name = Name, - conn_idx = Id, - address = Address, - client_id = ClientId, - max_retries = MaxRetries, - tcp_options = NewTCPOpts, - retry_interval = RetryInterval, - max_time_queued = MaxTimeQueued}, - Params = [self(), Name, NewTCPOpts, Address, RetryInterval, MaxRetries], - _Pid = spawn_link(?MODULE, connect, Params), - {ok, _Tref} = queue_flush(MaxTimeQueued), - {ok, State}; - {errors, Errors} -> - ok = lists:foreach(fun(E) -> - _ = lager:critical("configuration error: ~p", [E]) - end, Errors), - {stop, bad_config} - end. + Schema = [{tcp_options, [any], {default, []}}, + {retry_interval, positive_integer, {default, 1000}}, + {max_retries, positive_integer, {default, 3}}, + {client_id, binary, {default, <<"kafkerl_client">>}}, + {max_time_queued, positive_integer, required}], + case normalizerl:normalize_proplist(Schema, Config) of + {ok, [TCPOpts, RetryInterval, MaxRetries, ClientId, MaxTimeQueued]} -> + NewTCPOpts = kafkerl_utils:get_tcp_options(TCPOpts), + Ets = list_to_atom(atom_to_list(Name) ++ "_ets"), + _ = ets:new(Ets, ets_options()), + State = #state{ets = Ets, + name = Name, + conn_idx = Id, + address = Address, + client_id = ClientId, + max_retries = MaxRetries, + tcp_options = NewTCPOpts, + retry_interval = RetryInterval, + max_time_queued = MaxTimeQueued}, + Params = [self(), Name, NewTCPOpts, Address, RetryInterval, MaxRetries], + _Pid = spawn_link(?MODULE, connect, Params), + {ok, _Tref} = queue_flush(MaxTimeQueued), + {ok, State}; + {errors, Errors} -> + ok = lists:foreach(fun(E) -> + _ = lager:critical("configuration error: ~p", [E]) + end, Errors), + {stop, bad_config} + end. handle_flush(State = #state{socket = undefined}) -> - {noreply, State}; -handle_flush(State = #state{buffers = []}) -> - {noreply, State}; + {noreply, State}; +handle_flush(State = #state{buffers = [], name = Name}) -> + lager:error("process have connection to broker but no buffer binding!! kill me ~p", [Name]), + {noreply, State}; handle_flush(State = #state{ets = EtsName, socket = Socket, buffers = Buffers, - name = Name, client_id = ClientId}) -> - {ok, CorrelationId, NewState} = build_correlation_id(State), - % TODO: Maybe buffer all this messages in case something goes wrong - AllMessages = get_all_messages(Buffers), - case kafkerl_utils:merge_messages(AllMessages) of - [] -> - {noreply, NewState}; - MergedMessages -> - Request = kafkerl_protocol:build_produce_request(MergedMessages, - ClientId, - CorrelationId), - true = ets:insert_new(EtsName, {CorrelationId, MergedMessages}), - _ = lager:debug("~p sending ~p", [Name, Request]), - case gen_tcp:send(Socket, Request) of - {error, Reason} -> - _ = lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - gen_tcp:close(Socket), - ets:delete_all_objects(EtsName, CorrelationId), - ok = resend_messages(MergedMessages), - {noreply, handle_tcp_close(NewState)}; - ok -> - _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), - {noreply, NewState} - end - end. + name = Name, client_id = ClientId}) -> + {ok, CorrelationId, NewState} = build_correlation_id(State), + % TODO: Maybe buffer all this messages in case something goes wrong + AllMessages = get_all_messages(Buffers), + case kafkerl_utils:merge_messages(AllMessages) of + [] -> + {noreply, NewState}; + MergedMessages -> + Request = kafkerl_protocol:build_produce_request(MergedMessages, + ClientId, + CorrelationId), + true = ets:insert_new(EtsName, {CorrelationId, MergedMessages}), + _ = lager:debug("~p sending ~p", [Name, Request]), + case gen_tcp:send(Socket, Request) of + {error, Reason} -> + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + gen_tcp:close(Socket), + ets:delete_all_objects(EtsName, CorrelationId), + ok = resend_messages(MergedMessages), + {noreply, handle_tcp_close(NewState)}; + ok -> + _ = lager:debug("~p sent message ~p", [Name, CorrelationId]), + {noreply, NewState} + end + end. handle_fetch(ServerRef, Topic, Partition, Options, - State = #state{fetches = Fetches, client_id = ClientId, - socket = Socket, name = Name, - scheduled_fetches = ScheduledFetches}) -> - Scheduled = proplists:get_bool(scheduled, Options), - case {get_fetch(Topic, Partition, Fetches), + State = #state{fetches = Fetches, client_id = ClientId, + socket = Socket, name = Name, + scheduled_fetches = ScheduledFetches}) -> + Scheduled = proplists:get_bool(scheduled, Options), + case {get_fetch(Topic, Partition, Fetches), lists:keytake({Topic, Partition}, 1, ScheduledFetches), Scheduled} of - % An scheduled fetch we can't identify? We ignore it - {_, false, true} -> - lager:warning("ignoring unknown scheduled fetch"), - {reply, ok, State}; - % We are already fetching that topic/partition pair - {#fetch{}, _, false} -> - {reply, {error, fetch_in_progress}, State}; - % We have a scheduled fetch for that topic/partition pair and this is not an - % scheduled fetch - {not_found, Tuple, false} when is_tuple(Tuple) -> - {reply, {error, fetch_in_progress}, State}; - % We have a valid fetch request! - {not_found, KeyTakeResult, Scheduled} -> - {ok, CorrelationId, NewState} = build_correlation_id(State), - Offset = proplists:get_value(offset, Options, 0), - Request = {Topic, {Partition, Offset, 2147483647}}, - MaxWait = proplists:get_value(max_wait, Options), - MinBytes = proplists:get_value(min_bytes, Options), - Payload = kafkerl_protocol:build_fetch_request(Request, - ClientId, - CorrelationId, - MaxWait, - MinBytes), - case gen_tcp:send(Socket, Payload) of - {error, Reason} -> - _ = lager:critical("~p was unable to write to socket, reason: ~p", - [Name, Reason]), - ok = gen_tcp:close(Socket), - {reply, {error, no_connection}, handle_tcp_close(State)}; - ok -> - _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), - NewFetch = #fetch{correlation_id = CorrelationId, - server_ref = ServerRef, - topic = Topic, - partition = Partition, - %options = [scheduled | Options]}, - options = Options}, - NewScheduledFetches = case KeyTakeResult of - false -> ScheduledFetches; - {_, _, List} -> List - end, - {reply, ok, NewState#state{fetches = [NewFetch | Fetches], - scheduled_fetches = NewScheduledFetches}} - end - end. + % An scheduled fetch we can't identify? We ignore it + {_, false, true} -> + lager:warning("ignoring unknown scheduled fetch"), + {reply, ok, State}; + % We are already fetching that topic/partition pair + {#fetch{}, _, false} -> + {reply, {error, fetch_in_progress}, State}; + % We have a scheduled fetch for that topic/partition pair and this is not an + % scheduled fetch + {not_found, Tuple, false} when is_tuple(Tuple) -> + {reply, {error, fetch_in_progress}, State}; + % We have a valid fetch request! + {not_found, KeyTakeResult, Scheduled} -> + {ok, CorrelationId, NewState} = build_correlation_id(State), + Offset = proplists:get_value(offset, Options, 0), + Request = {Topic, {Partition, Offset, 2147483647}}, + MaxWait = proplists:get_value(max_wait, Options), + MinBytes = proplists:get_value(min_bytes, Options), + Payload = kafkerl_protocol:build_fetch_request(Request, + ClientId, + CorrelationId, + MaxWait, + MinBytes), + case gen_tcp:send(Socket, Payload) of + {error, Reason} -> + _ = lager:critical("~p was unable to write to socket, reason: ~p", + [Name, Reason]), + ok = gen_tcp:close(Socket), + {reply, {error, no_connection}, handle_tcp_close(State)}; + ok -> + _ = lager:debug("~p sent request ~p", [Name, CorrelationId]), + NewFetch = #fetch{correlation_id = CorrelationId, + server_ref = ServerRef, + topic = Topic, + partition = Partition, + %options = [scheduled | Options]}, + options = Options}, + NewScheduledFetches = case KeyTakeResult of + false -> ScheduledFetches; + {_, _, List} -> List + end, + {reply, ok, NewState#state{fetches = [NewFetch | Fetches], + scheduled_fetches = NewScheduledFetches}} + end + end. handle_stop_fetch(Topic, Partition, State) -> - % Cancel any timers we have for scheduled fetches - case lists:keytake({Topic, Partition}, 1, State#state.scheduled_fetches) of - false -> - NewFetches = remove_fetch(Topic, Partition, false, State#state.fetches), - {reply, ok, State#state{fetches = NewFetches}}; - {value, {{Topic, Partition}, TRef}, NewScheduledFetches} -> - _ = timer:cancel(TRef), - NewFetches = remove_fetch(Topic, Partition, force, State#state.fetches), - {reply, ok, State#state{fetches = NewFetches, - scheduled_fetches = NewScheduledFetches}} - end. + % Cancel any timers we have for scheduled fetches + case lists:keytake({Topic, Partition}, 1, State#state.scheduled_fetches) of + false -> + NewFetches = remove_fetch(Topic, Partition, false, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches}}; + {value, {{Topic, Partition}, TRef}, NewScheduledFetches} -> + _ = timer:cancel(TRef), + NewFetches = remove_fetch(Topic, Partition, force, State#state.fetches), + {reply, ok, State#state{fetches = NewFetches, + scheduled_fetches = NewScheduledFetches}} + end. remove_fetch(Topic, Partition, Force, CurrentFetches) -> - remove_fetch(Topic, Partition, Force, CurrentFetches, []). + remove_fetch(Topic, Partition, Force, CurrentFetches, []). remove_fetch(_Topic, _Partition, _Force, [], Acc) -> - Acc; + Acc; remove_fetch(Topic, Partition, force, - [#fetch{topic = Topic, partition = Partition} | T], Acc) -> - % If we are forcing the removal, just remove the fetch - Acc ++ T; + [#fetch{topic = Topic, partition = Partition} | T], Acc) -> + % If we are forcing the removal, just remove the fetch + Acc ++ T; remove_fetch(Topic, Partition, _, - [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> - % Clearing the fetch options ensures this fetch will stop sending any messages - % since there is no consumer. This also removes the fetch_interval so it won't - % be requested again. - % Simply removing the fetch here doesn't work since we will still get a server - % response, but we won't be able to handle it. - [Fetch#fetch{options = []} | Acc] ++ T; + [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> + % Clearing the fetch options ensures this fetch will stop sending any messages + % since there is no consumer. This also removes the fetch_interval so it won't + % be requested again. + % Simply removing the fetch here doesn't work since we will still get a server + % response, but we won't be able to handle it. + [Fetch#fetch{options = []} | Acc] ++ T; remove_fetch(Topic, Partition, Force, [H | T], Acc) -> - remove_fetch(Topic, Partition, Force, T, [H | Acc]). + remove_fetch(Topic, Partition, Force, T, [H | Acc]). % TCP Handlers handle_tcp_close(State = #state{retry_interval = RetryInterval, - tcp_options = TCPOpts, - max_retries = MaxRetries, - address = Address, - name = Name}) -> - Params = [self(), Name, TCPOpts, Address, RetryInterval, MaxRetries], - _Pid = spawn_link(?MODULE, connect, Params), - State#state{socket = undefined}. + tcp_options = TCPOpts, + max_retries = MaxRetries, + address = Address, + name = Name}) -> + Params = [self(), Name, TCPOpts, Address, RetryInterval, MaxRetries], + _Pid = spawn_link(?MODULE, connect, Params), + State#state{socket = undefined}. handle_tcp_data(Bin, State = #state{fetches = Fetches, - current_fetch = CurrentFetch}) -> - {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, CurrentFetch), - case get_fetch(CorrelationId, Fetches) of - Fetch = #fetch{} -> - handle_fetch_response(Bin, Fetch, State); - _ -> - handle_produce_response(Bin, State) - end. + current_fetch = CurrentFetch}) -> + {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, CurrentFetch), + case get_fetch(CorrelationId, Fetches) of + Fetch = #fetch{} -> + handle_fetch_response(Bin, Fetch, State); + _ -> + handle_produce_response(Bin, State) + end. handle_fetch_response(Bin, Fetch, - State = #state{fetches = Fetches, - scheduled_fetches = ScheduledFetches}) -> - Options = Fetch#fetch.options, - Consumer = proplists:get_value(consumer, Options), - case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of - {ok, _CorrelationId, [{_, [{{_, Offset}, Messages}]}]} -> - % The messages can be empty, for example when there are no new messages in - % this partition, if that happens, don't send anything and end the fetch. - ok = send_messages(Consumer, - case Messages of - [] -> []; - _ -> [{consumed, Messages}, {offset, Offset}] - end), - NewFetches = lists:delete(Fetch, Fetches), - NewState = State#state{current_fetch = void, fetches = NewFetches}, - case proplists:get_value(fetch_interval, Options, false) of - false -> - {ok, NewState}; - Interval -> - NewOptions = kafkerl_utils:proplists_set(Options, [{scheduled, true}, - {offset, Offset}]), - Topic = Fetch#fetch.topic, - Partition = Fetch#fetch.partition, - ServerRef = Fetch#fetch.server_ref, - Arguments = [ServerRef, Topic, Partition, NewOptions], - {ok, Tref} = timer:apply_after(Interval, ?MODULE, fetch, Arguments), - NewScheduledFetches = [{{Topic, Partition}, Tref} | ScheduledFetches], - {ok, NewState#state{scheduled_fetches = NewScheduledFetches}} - end; - {incomplete, CorrelationId, Data, NewFetchState} -> - ok = case Data of - [{_, [{_, Messages = [_ | _]}]}] -> - send_messages(Consumer, {consumed, Messages}); - _ -> - % On some cases, kafka will return an incomplete response with no - % messages, but we shouldn't send the empty message list. - ok - end, - {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | - lists:delete(Fetch, Fetches)], - current_fetch = CorrelationId}}; - Error -> - ok = send_messages(Consumer, Error), - NewFetches = lists:delete(Fetch, Fetches), - {ok, State#state{current_fetch = void, fetches = NewFetches}} - end. + State = #state{fetches = Fetches, + scheduled_fetches = ScheduledFetches}) -> + Options = Fetch#fetch.options, + Consumer = proplists:get_value(consumer, Options), + case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of + {ok, _CorrelationId, [{_, [{{_, Offset}, Messages}]}]} -> + % The messages can be empty, for example when there are no new messages in + % this partition, if that happens, don't send anything and end the fetch. + ok = send_messages(Consumer, + case Messages of + [] -> []; + _ -> [{consumed, Messages}, {offset, Offset}] + end), + NewFetches = lists:delete(Fetch, Fetches), + NewState = State#state{current_fetch = void, fetches = NewFetches}, + case proplists:get_value(fetch_interval, Options, false) of + false -> + {ok, NewState}; + Interval -> + NewOptions = kafkerl_utils:proplists_set(Options, [{scheduled, true}, + {offset, Offset}]), + Topic = Fetch#fetch.topic, + Partition = Fetch#fetch.partition, + ServerRef = Fetch#fetch.server_ref, + Arguments = [ServerRef, Topic, Partition, NewOptions], + {ok, Tref} = timer:apply_after(Interval, ?MODULE, fetch, Arguments), + NewScheduledFetches = [{{Topic, Partition}, Tref} | ScheduledFetches], + {ok, NewState#state{scheduled_fetches = NewScheduledFetches}} + end; + {incomplete, CorrelationId, Data, NewFetchState} -> + ok = case Data of + [{_, [{_, Messages = [_ | _]}]}] -> + send_messages(Consumer, {consumed, Messages}); + _ -> + % On some cases, kafka will return an incomplete response with no + % messages, but we shouldn't send the empty message list. + ok + end, + {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | + lists:delete(Fetch, Fetches)], + current_fetch = CorrelationId}}; + Error -> + ok = send_messages(Consumer, Error), + NewFetches = lists:delete(Fetch, Fetches), + {ok, State#state{current_fetch = void, fetches = NewFetches}} + end. handle_produce_response(Bin, State = #state{name = Name, ets = EtsName}) -> - case kafkerl_protocol:parse_produce_response(Bin) of - {ok, CorrelationId, Topics} -> - case ets:lookup(EtsName, CorrelationId) of - [{CorrelationId, Messages}] -> - ets:delete(EtsName, CorrelationId), - {Errors, Successes} = split_errors_and_successes(Topics), - % First, send the offsets and messages that were delivered - _ = spawn(fun() -> - notify_success(Successes, Messages) - end), - % Then handle the errors - case handle_errors(Errors, Messages, Name) of - ignore -> - {ok, State}; - {request_metadata, MessagesToResend} -> - kafkerl_connector:request_metadata(), - ok = resend_messages(MessagesToResend), - {ok, State} - end; - _ -> - _ = lager:warning("~p was unable to get produce response", [Name]), - {error, invalid_produce_response} - end; - Other -> - _ = lager:critical("~p got unexpected response when parsing message: ~p", - [Name, Other]), - {ok, State} - end. + case kafkerl_protocol:parse_produce_response(Bin) of + {ok, CorrelationId, Topics} -> + case ets:lookup(EtsName, CorrelationId) of + [{CorrelationId, Messages}] -> + ets:delete(EtsName, CorrelationId), + {Errors, Successes} = split_errors_and_successes(Topics), + % First, send the offsets and messages that were delivered + _ = spawn(fun() -> + notify_success(Successes, Messages) + end), + % Then handle the errors + case handle_errors(Errors, Messages, Name) of + ignore -> + {ok, State}; + {request_metadata, MessagesToResend} -> + kafkerl_connector:request_metadata(), + ok = resend_messages(MessagesToResend), + {ok, State} + end; + _ -> + _ = lager:warning("~p was unable to get produce response", [Name]), + {error, invalid_produce_response} + end; + Other -> + _ = lager:critical("~p got unexpected response when parsing message: ~p", + [Name, Other]), + {ok, State} + end. %%============================================================================== %% Utils %%============================================================================== resend_messages(Messages) -> - F = fun(M) -> kafkerl_connector:send(M) end, - lists:foreach(F, Messages). + F = fun(M) -> kafkerl_connector:send(M) end, + lists:foreach(F, Messages). notify_success([], _Messages) -> - ok; + ok; notify_success([{Topic, Partition, Offset} | T], Messages) -> - MergedMessages = kafkerl_utils:merge_messages(Messages), - Partitions = partitions_in_topic(Topic, MergedMessages), - M = messages_in_partition(Partition, Partitions), - kafkerl_connector:produce_succeeded({Topic, Partition, M, Offset}), - notify_success(T, Messages). - + MergedMessages = kafkerl_utils:merge_messages(Messages), + Partitions = partitions_in_topic(Topic, MergedMessages), + M = messages_in_partition(Partition, Partitions), + kafkerl_connector:produce_succeeded({Topic, Partition, M, Offset}), + notify_success(T, Messages). + partitions_in_topic(Topic, Messages) -> - lists:flatten([P || {T, P} <- Messages, T =:= Topic]). + lists:flatten([P || {T, P} <- Messages, T =:= Topic]). messages_in_partition(Partition, Messages) -> - lists:flatten([M || {P, M} <- Messages, P =:= Partition]). + lists:flatten([M || {P, M} <- Messages, P =:= Partition]). build_correlation_id(State = #state{request_number = RequestNumber, - conn_idx = ConnIdx}) -> - % CorrelationIds are 32 bit integers, of those, the first 10 bits are used for - % the connectionId (hence the 1023 limit on it) and the other 22 bits are used - % for the sequential numbering, this magic number down here is actually 2^10-1 - NextRequest = case RequestNumber > 4194303 of - true -> 0; - false -> RequestNumber + 1 - end, - CorrelationId = (ConnIdx bsl 22) bor NextRequest, - {ok, CorrelationId, State#state{request_number = NextRequest}}. + conn_idx = ConnIdx}) -> + % CorrelationIds are 32 bit integers, of those, the first 10 bits are used for + % the connectionId (hence the 1023 limit on it) and the other 22 bits are used + % for the sequential numbering, this magic number down here is actually 2^10-1 + NextRequest = case RequestNumber > 4194303 of + true -> 0; + false -> RequestNumber + 1 + end, + CorrelationId = (ConnIdx bsl 22) bor NextRequest, + {ok, CorrelationId, State#state{request_number = NextRequest}}. split_errors_and_successes(Topics) -> - split_errors_and_successes(Topics, {[], []}). + split_errors_and_successes(Topics, {[], []}). split_errors_and_successes([], Acc) -> - Acc; + Acc; split_errors_and_successes([{Topic, Partitions} | T], Acc) -> - F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> - {E, [{Topic, Partition, Offset} | S]}; - ({Partition, Error, _}, {E, S}) -> - {[{Topic, Partition, Error} | E], S} - end, - split_errors_and_successes(T, lists:foldl(F, Acc, Partitions)). + F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> + {E, [{Topic, Partition, Offset} | S]}; + ({Partition, Error, _}, {E, S}) -> + {[{Topic, Partition, Error} | E], S} + end, + split_errors_and_successes(T, lists:foldl(F, Acc, Partitions)). handle_errors([], _Messages, _Name) -> - ignore; + ignore; handle_errors(Errors, Messages, Name) -> - F = fun(E) -> handle_error(E, Messages, Name) end, - case lists:filtermap(F, Errors) of - [] -> ignore; - L -> {request_metadata, L} - end. + lager:warning("error ~p in handling errors", [Errors]), + F = fun(E) -> handle_error(E, Messages, Name) end, + case lists:filtermap(F, Errors) of + [] -> ignore; + L -> + lager:warning("found errors when sending ~p, requesting metadata", [L]), + {request_metadata, L} + end. handle_error({Topic, Partition, Error}, Messages, Name) - when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse - Error =:= ?NOT_LEADER_FOR_PARTITION orelse - Error =:= ?LEADER_NOT_AVAILABLE -> - case get_message_for_error(Topic, Partition, Messages, Name) of - undefined -> false; - Message -> {true, Message} - end; + when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse + Error =:= ?NOT_LEADER_FOR_PARTITION orelse + Error =:= ?LEADER_NOT_AVAILABLE -> + case get_message_for_error(Topic, Partition, Messages, Name) of + undefined -> false; + Message -> {true, Message} + end; handle_error({Topic, Partition, Error}, _Messages, Name) -> - ErrorName = kafkerl_error:get_error_name(Error), - _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", - [Name, ErrorName, Topic, Partition]), - false. + ErrorName = kafkerl_error:get_error_name(Error), + _ = lager:error("~p was unable to handle ~p error on topic ~p, partition ~p", + [Name, ErrorName, Topic, Partition]), + false. get_message_for_error(Topic, Partition, SavedMessages, Name) -> - case lists:keyfind(Topic, 1, SavedMessages) of - false -> - _ = lager:error("~p found no messages for topic ~p, partition ~p", - [Name, Topic, Partition]), - undefined; - {Topic, Partitions} -> - case lists:keyfind(Partition, 1, Partitions) of - false -> - _ = lager:error("~p found no messages for topic ~p, partition ~p", - [Name, Topic, Partition]), - undefined; - {Partition, Messages} -> - {Topic, Partition, Messages} - end - end. + case lists:keyfind(Topic, 1, SavedMessages) of + false -> + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), + undefined; + {Topic, Partitions} -> + case lists:keyfind(Partition, 1, Partitions) of + false -> + _ = lager:error("~p found no messages for topic ~p, partition ~p", + [Name, Topic, Partition]), + undefined; + {Partition, Messages} -> + {Topic, Partition, Messages} + end + end. connect(Pid, Name, _TCPOpts, {Host, Port} = _Address, _Timeout, 0) -> - _ = lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), - Pid ! connection_timeout; + _ = lager:error("~p was unable to connect to ~p:~p", [Name, Host, Port]), + Pid ! connection_timeout; connect(Pid, Name, TCPOpts, {Host, Port} = Address, Timeout, Retries) -> - _ = lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), - case gen_tcp:connect(Host, Port, TCPOpts, 5000) of - {ok, Socket} -> - _ = lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), - gen_tcp:controlling_process(Socket, Pid), - Pid ! {connected, Socket}; - {error, Reason} -> - NewRetries = Retries - 1, - _ = lager:warning("~p unable to connect to ~p:~p. Reason: ~p + _ = lager:debug("~p attempting connection to ~p:~p", [Name, Host, Port]), + case gen_tcp:connect(Host, Port, TCPOpts, 5000) of + {ok, Socket} -> + _ = lager:debug("~p connnected to ~p:~p", [Name, Host, Port]), + gen_tcp:controlling_process(Socket, Pid), + Pid ! {connected, Socket}; + {error, Reason} -> + NewRetries = Retries - 1, + _ = lager:warning("~p unable to connect to ~p:~p. Reason: ~p (~p retries left)", - [Name, Host, Port, Reason, NewRetries]), - timer:sleep(Timeout), - connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) - end. + [Name, Host, Port, Reason, NewRetries]), + timer:sleep(Timeout), + connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) + end. queue_flush(Time) -> - timer:send_after(Time * 1000, {flush, Time}). + timer:send_after(Time * 1000, {flush, Time}). get_all_messages(Buffers) -> - get_all_messages(Buffers, []). + get_all_messages(Buffers, []). get_all_messages([], Acc) -> - Acc; + Acc; get_all_messages([H | T], Acc) -> - get_all_messages(T, Acc ++ get_messages_from(H, 20)). + get_all_messages(T, Acc ++ get_messages_from(H, 20)). get_messages_from(Ets, Retries) -> - case ets_buffer:read_all(Ets) of - L when is_list(L) -> - L; - _Error when Retries > 0 -> - get_messages_from(Ets, Retries - 1); - _Error -> - _ = lager:warning("giving up on reading from the ETS buffer"), - [] - end. + case ets_buffer:read_all(Ets) of + L when is_list(L) -> + L; + _Error when Retries > 0 -> + get_messages_from(Ets, Retries - 1); + _Error -> + _ = lager:warning("giving up on reading from the ETS buffer"), + [] + end. parse_correlation_id(Bin, void) -> - kafkerl_protocol:parse_correlation_id(Bin); + kafkerl_protocol:parse_correlation_id(Bin); parse_correlation_id(Bin, CorrelationId) -> - {ok, CorrelationId, Bin}. + {ok, CorrelationId, Bin}. get_fetch(_CorrelationId, []) -> - not_found; + not_found; get_fetch(CorrelationId, [H = #fetch{correlation_id = CorrelationId} | _T]) -> - H; + H; get_fetch(CorrelationId, [_H | T]) -> - get_fetch(CorrelationId, T). + get_fetch(CorrelationId, T). get_fetch(_Topic, _Partition, []) -> - not_found; + not_found; get_fetch(Topic, Partition, [H = #fetch{topic = Topic, - partition = Partition} | _T]) -> - H; + partition = Partition} | _T]) -> + H; get_fetch(Topic, Partition, [_H | T]) -> - get_fetch(Topic, Partition, T). + get_fetch(Topic, Partition, T). send_messages(_Consumer, []) -> - ok; + ok; send_messages(Consumer, [Event | T]) -> - case send_messages(Consumer, Event) of - ok -> send_messages(Consumer, T); - Error -> Error - end; + case send_messages(Consumer, Event) of + ok -> send_messages(Consumer, T); + Error -> Error + end; send_messages(Consumer, Event) -> - kafkerl_utils:send_event(Consumer, Event). + kafkerl_utils:send_event(Consumer, Event). ets_options() -> - [named_table, public, {write_concurrency, true}, {read_concurrency, true}]. \ No newline at end of file + [named_table, public, {write_concurrency, true}, {read_concurrency, true}]. \ No newline at end of file diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index 58c9f90..4ee5be4 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -11,20 +11,21 @@ -export([make_request/3]). %% gen_fsm -export([start_link/1, init/1, handle_info/3, terminate/3, code_change/4, - handle_event/3, handle_sync_event/4]). + handle_event/3, handle_sync_event/4]). -include("kafkerl.hrl"). - --record(state, {config = [] :: [{atom(), any()}], - client_id = <<>> :: kafkerl_protocol:client_id(), - brokers = [] :: [kafkerl_connector:address()], - max_retries = -1 :: integer(), - retry_interval = 1 :: non_neg_integer(), - cooldown = 0 :: integer(), - known_topics = [] :: [kafkerl:topic()], - next_topics = [] :: [kafkerl:topic()], - broker_connections = [], - connection_index = 0}). +-type broker_id() :: non_neg_integer(). +-type broker_connection() :: {broker_id(), {kafkerl:topic(), kafkerl:partition()}, atom()}. +-record(state, {config = [] :: [{atom(), any()}], + client_id = <<>> :: kafkerl_protocol:client_id(), + brokers = [] :: [kafkerl_connector:address()], + max_retries = -1 :: integer(), + retry_interval = 1 :: non_neg_integer(), + cooldown = 0 :: integer(), + known_topics = [] :: [kafkerl:topic()], + next_topics = [] :: [kafkerl:topic()], + broker_connections = [] :: [broker_connection()], + connection_index = 0 :: non_neg_integer()}). -type state() :: #state{}. %%============================================================================== @@ -32,80 +33,82 @@ %%============================================================================== -spec start_link(any()) -> {ok, pid()} | ignore | kafkerl:error(). start_link(Config) -> - gen_fsm:start_link({local, ?MODULE}, ?MODULE, [Config], []). + gen_fsm:start_link({local, ?MODULE}, ?MODULE, [Config], []). -spec request_metadata([kafkerl:topic()]) -> ok. request_metadata(Topics) -> - gen_fsm:send_event(?MODULE, {request, Topics}). + gen_fsm:send_event(?MODULE, {request, Topics}). -spec get_known_topics() -> ok. get_known_topics() -> - gen_fsm:sync_send_all_state_event(?MODULE, get_known_topics). + gen_fsm:sync_send_all_state_event(?MODULE, get_known_topics). %%============================================================================== %% States %%============================================================================== -spec idle(any(), state()) -> {next_state, atom(), state()}. idle({request, Topics}, State = #state{known_topics = KnownTopics}) -> - % Add the requested topics to the state - SortedTopics = lists:usort(KnownTopics), - NewKnownTopics = lists:umerge(Topics, SortedTopics), - NewState = State#state{known_topics = NewKnownTopics}, - % Make the request - ok = schedule_metadata_request(NewState), - % And move the the requesting state - {next_state, requesting, NewState}. + % Add the requested topics to the state + SortedTopics = lists:usort(KnownTopics), + NewKnownTopics = lists:umerge(Topics, SortedTopics), + NewState = State#state{known_topics = NewKnownTopics}, + % Make the request + ok = schedule_metadata_request(NewState), + % And move the the requesting state + {next_state, requesting, NewState}. -spec requesting(any(), state()) -> {next_state, atom(), state()}. % Handle a new metadata request while there's one in progress requesting({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> - SortedTopics = lists:usort(NewTopics), % This also removes repeated entries - % If the request is for known topics, then we can safely ignore it, otherwise, - % queue a metadata request - NewState = case SortedTopics -- KnownTopics of - [] -> State; - _ -> request_metadata([]), - State#state{known_topics = lists:umerge(KnownTopics, - SortedTopics)} - end, - {next_state, requesting, NewState}; + SortedTopics = lists:usort(NewTopics), % This also removes repeated entries + % If the request is for known topics, then we can safely ignore it, otherwise, + % queue a metadata request + NewState = case SortedTopics -- KnownTopics of + [] -> State; + _ -> request_metadata([]), + State#state{known_topics = lists:umerge(KnownTopics, + SortedTopics)} + end, + {next_state, requesting, NewState}; % Handle the updated metadata requesting({metadata_updated, RawMapping}, State) -> - % Create the topic mapping (this also starts the broker connections) + % Create the topic mapping (this also starts the broker connections) {N, TopicMapping} = get_broker_mapping(RawMapping, State), - _ = lager:debug("Refreshed topic mapping: ~p", [TopicMapping]), - + OldMapping = State#state.broker_connections, + %%OldMapping -- TopicMapping =/= [] andalso + lager:warning("sync broker mappings old ~p new ~p", + [OldMapping, TopicMapping]), NewMapping2 = [{{Topic, Partition}, Conn} || - {_ConnId, {Topic, Partition}, Conn} <- TopicMapping], - _ = lager:debug("Refreshed topic mapping: ~p", [NewMapping2]), - ok = kafkerl_connector:topic_mapping_updated(NewMapping2), - {next_state, idle, State#state{connection_index = N, broker_connections = TopicMapping}}; + {_ConnId, {Topic, Partition}, Conn} <- TopicMapping], + lager:debug("Refreshed topic mapping: ~p", [NewMapping2]), + ok = kafkerl_connector:topic_mapping_updated(NewMapping2), + {next_state, idle, State#state{connection_index = N, broker_connections = TopicMapping}}; % If we have no more retries left, go on cooldown requesting({metadata_retry, 0}, State = #state{cooldown = Cooldown}) -> - Params = [?MODULE, on_timer], - {ok, _} = timer:apply_after(Cooldown, gen_fsm, send_event, Params), - {next_state, on_cooldown, State}; + Params = [?MODULE, on_timer], + {ok, _} = timer:apply_after(Cooldown, gen_fsm, send_event, Params), + {next_state, on_cooldown, State}; % If we have more retries to do, schedule a new retry requesting({metadata_retry, Retries}, State) -> - ok = schedule_metadata_request(Retries, State), - {next_state, requesting, State}. + ok = schedule_metadata_request(Retries, State), + {next_state, requesting, State}. -spec on_cooldown(any(), state()) -> {next_state, atom(), state()}. on_cooldown({request, NewTopics}, State = #state{known_topics = KnownTopics}) -> - % Since we are on cooldown (the time between consecutive requests) we only add - % the topics to the scheduled next request - SortedTopics = lists:usort(NewTopics), - State#state{known_topics = lists:umerge(KnownTopics, SortedTopics)}; + % Since we are on cooldown (the time between consecutive requests) we only add + % the topics to the scheduled next request + SortedTopics = lists:usort(NewTopics), + State#state{known_topics = lists:umerge(KnownTopics, SortedTopics)}; on_cooldown(on_timer, State) -> - ok = schedule_metadata_request(State), - {next_state, requesting, State}. + ok = schedule_metadata_request(State), + {next_state, requesting, State}. %%============================================================================== %% Events %%============================================================================== handle_sync_event(get_known_topics, _From, StateName, State) -> - Reply = State#state.known_topics, - {reply, Reply, StateName, State}. + Reply = State#state.known_topics, + {reply, Reply, StateName, State}. %%============================================================================== %% gen_fsm boilerplate @@ -126,15 +129,15 @@ handle_info(Message, StateName, State) -> -spec code_change(any(), atom(), state(), any()) -> {ok, atom(), state()}. code_change(_OldVsn, StateName, StateData, _Extra) -> - {ok, StateName, StateData}. + {ok, StateName, StateData}. -spec terminate(any(), atom(), state()) -> ok. terminate(_Reason, _StateName, _StateData) -> - ok. + ok. -spec handle_event(any(), atom(), state()) -> {next_state, atom(), state()}. handle_event(_Event, StateName, StateData) -> - {next_state, StateName, StateData}. + {next_state, StateName, StateData}. %-spec handle_sync_event(any(), any(), atom(), state()) -> % {next_state, atom(), state()}. @@ -145,195 +148,208 @@ handle_event(_Event, StateName, StateData) -> %% Handlers %%============================================================================== init([Config]) -> - Schema = [{client_id, binary, {default, <<"kafkerl_client">>}}, - {metadata_tcp_timeout, positive_integer, {default, 1500}}, - {metadata_request_cooldown, positive_integer, {default, 333}}, - {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, - {brokers, [{string, {integer, {1, 65535}}}], required}, - {topics, [binary], required}], - case normalizerl:normalize_proplist(Schema, Config) of - {ok, [ClientId, RetryInterval, Cooldown, MaxRetries, Brokers, Topics]} -> - State = #state{config = Config, - known_topics = Topics, - brokers = Brokers, - cooldown = Cooldown, - client_id = ClientId, - max_retries = MaxRetries, - retry_interval = RetryInterval}, - process_flag(trap_exit, true), - {ok, idle, State}; - {errors, Errors} -> - ok = lists:foreach(fun(E) -> - _ = lager:critical("Metadata config error ~p", [E]) - end, Errors), - {stop, bad_config} - end. + Schema = [{client_id, binary, {default, <<"kafkerl_client">>}}, + {metadata_tcp_timeout, positive_integer, {default, 1500}}, + {metadata_request_cooldown, positive_integer, {default, 333}}, + {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, + {brokers, [{string, {integer, {1, 65535}}}], required}, + {topics, [binary], required}], + case normalizerl:normalize_proplist(Schema, Config) of + {ok, [ClientId, RetryInterval, Cooldown, MaxRetries, Brokers, Topics]} -> + State = #state{config = Config, + known_topics = Topics, + brokers = Brokers, + cooldown = Cooldown, + client_id = ClientId, + max_retries = MaxRetries, + retry_interval = RetryInterval}, + process_flag(trap_exit, true), + {ok, idle, State}; + {errors, Errors} -> + ok = lists:foreach(fun(E) -> + _ = lager:critical("Metadata config error ~p", [E]) + end, Errors), + {stop, bad_config} + end. %%============================================================================== %% Request logic %%============================================================================== schedule_metadata_request(State) -> - schedule_metadata_request(undefined, State). + schedule_metadata_request(undefined, State). schedule_metadata_request(Retries, State = #state{brokers = Brokers, - max_retries = MaxRetries, - known_topics = Topics, - retry_interval = Interval}) -> - Request = metadata_request(State, Topics), - case Retries of - undefined -> - Params = [Brokers, Request, MaxRetries], - _ = spawn(?MODULE, make_request, Params); - _ -> - Params = [Brokers, Request, Retries], - {ok, _} = timer:apply_after(Interval, ?MODULE, make_request, Params) - end, - ok. + max_retries = MaxRetries, + known_topics = Topics, + retry_interval = Interval}) -> + Request = metadata_request(State, Topics), + case Retries of + undefined -> + Params = [Brokers, Request, MaxRetries], + _ = spawn(?MODULE, make_request, Params); + _ -> + Params = [Brokers, Request, Retries], + {ok, _} = timer:apply_after(Interval, ?MODULE, make_request, Params) + end, + ok. make_request(Brokers, Request, Retries) -> - case do_request_metadata(Brokers, Request) of - {ok, TopicMapping} -> - gen_fsm:send_event(?MODULE, {metadata_updated, TopicMapping}); - Error -> - _ = lager:debug("Metadata request error: ~p", [Error]), - NewRetries = case Retries of -1 -> -1; _ -> Retries - 1 end, - gen_fsm:send_event(?MODULE, {metadata_retry, NewRetries}) - end. + case do_request_metadata(Brokers, Request) of + {ok, TopicMapping} -> + gen_fsm:send_event(?MODULE, {metadata_updated, TopicMapping}); + Error -> + _ = lager:debug("Metadata request error: ~p", [Error]), + NewRetries = case Retries of -1 -> -1; _ -> Retries - 1 end, + gen_fsm:send_event(?MODULE, {metadata_retry, NewRetries}) + end. do_request_metadata([], _Request) -> - {error, all_down}; + {error, all_down}; do_request_metadata([{Host, Port} = _Broker | T], Request) -> - _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), - % Connect to the Broker - case gen_tcp:connect(Host, Port, get_metadata_tcp_options()) of - {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - % Failed, try with the next one in the list - do_request_metadata(T, Request); - {ok, Socket} -> - % On success, send the metadata request - case gen_tcp:send(Socket, Request) of + _ = lager:debug("Attempting to connect to broker at ~s:~p", [Host, Port]), + % Connect to the Broker + case gen_tcp:connect(Host, Port, get_metadata_tcp_options()) of {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - % Unable to send request, try the next broker - do_request_metadata(T, Request); - ok -> - case gen_tcp:recv(Socket, 0, 6000) of - {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - gen_tcp:close(Socket), - % Nothing received (probably a timeout), try the next broker - do_request_metadata(T, Request); - {ok, Data} -> - gen_tcp:close(Socket), - case kafkerl_protocol:parse_metadata_response(Data) of + log_metadata_request_error(Host, Port, Reason), + % Failed, try with the next one in the list + do_request_metadata(T, Request); + {ok, Socket} -> + % On success, send the metadata request + case gen_tcp:send(Socket, Request) of {error, Reason} -> - log_metadata_request_error(Host, Port, Reason), - % The parsing failed, try the next broker - do_request_metadata(T, Request); - {ok, _CorrelationId, Metadata} -> - % We received a metadata response, make sure it has brokers - {ok, get_topic_mapping(Metadata)} - end - end - end - end. + log_metadata_request_error(Host, Port, Reason), + % Unable to send request, try the next broker + do_request_metadata(T, Request); + ok -> + case gen_tcp:recv(Socket, 0, 6000) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + gen_tcp:close(Socket), + % Nothing received (probably a timeout), try the next broker + do_request_metadata(T, Request); + {ok, Data} -> + gen_tcp:close(Socket), + case kafkerl_protocol:parse_metadata_response(Data) of + {error, Reason} -> + log_metadata_request_error(Host, Port, Reason), + % The parsing failed, try the next broker + do_request_metadata(T, Request); + {ok, _CorrelationId, Metadata} -> + % We received a metadata response, make sure it has brokers + {ok, get_topic_mapping(Metadata)} + end + end + end + end. %%============================================================================== %% Utils %%============================================================================== get_metadata_tcp_options() -> - kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). + kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). log_metadata_request_error(Host, Port, Reason) -> - _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", - [Host, Port, Reason]). + _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", + [Host, Port, Reason]). metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> - kafkerl_protocol:build_metadata_request([], 0, ClientId); + kafkerl_protocol:build_metadata_request([], 0, ClientId); metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, - NewTopics) -> - AllTopics = lists:umerge(KnownTopics, NewTopics), - kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). + NewTopics) -> + AllTopics = lists:umerge(KnownTopics, NewTopics), + kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). %%============================================================================== %% Topic/broker mapping %%============================================================================== get_topic_mapping({BrokerMetadata, TopicMetadata}) -> - % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] - Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), - % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] - Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), - % Converts the BrokerIds from the previous array into socket addresses - lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> - case lists:keyfind(BrokerId, 1, BrokerMetadata) of - {BrokerId, HostData} -> - {true, {{Topic, Partition, BrokerId}, HostData}}; - _Any -> - false - end - end, Partitions). + % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] + Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), + % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] + Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), + % Converts the BrokerIds from the previous array into socket addresses + lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> + case lists:keyfind(BrokerId, 1, BrokerMetadata) of + {BrokerId, HostData} -> + {true, {{Topic, Partition, BrokerId}, HostData}}; + _Any -> + false + end + end, Partitions). expand_topic({?NO_ERROR, Topic, Partitions}) -> - {true, {Topic, Partitions}}; + {true, {Topic, Partitions}}; expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> - % Replica not available can be ignored, still, show a warning - _ = lager:warning("Ignoring ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, Partitions}}; + % Replica not available can be ignored, still, show a warning + _ = lager:warning("Ignoring ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, Partitions}}; expand_topic({Error, Topic, _Partitions}) -> - _ = lager:error("Error ~p on metadata for topic ~p", - [kafkerl_error:get_error_name(Error), Topic]), - {true, {Topic, []}}. + _ = lager:error("Error ~p on metadata for topic ~p", + [kafkerl_error:get_error_name(Error), Topic]), + {true, {Topic, []}}. expand_partitions(Metadata) -> - expand_partitions(Metadata, []). + expand_partitions(Metadata, []). expand_partitions({_Topic, []}, Acc) -> - {true, Acc}; + {true, Acc}; expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, - _, _} | T]}, Acc) -> - _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - ExpandedPartition = {{Topic, Partition}, Leader}, - expand_partitions({Topic, T}, [ExpandedPartition | Acc]); + _, _} | T]}, Acc) -> + _ = lager:warning("Ignoring ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + ExpandedPartition = {{Topic, Partition}, Leader}, + expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> - _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", - [kafkerl_error:get_error_name(Error), Topic, Partition]), - expand_partitions({Topic, T}, Acc). + _ = lager:error("Error ~p on metadata for topic ~p, partition ~p", + [kafkerl_error:get_error_name(Error), Topic, Partition]), + expand_partitions({Topic, T}, Acc). get_broker_mapping(TopicMapping, State) -> - get_broker_mapping(TopicMapping, State, State#state.connection_index, - State#state.broker_connections). + get_broker_mapping(TopicMapping, State, State#state.connection_index, + State#state.broker_connections). get_broker_mapping([], _State, N, Acc) -> + %% ACC is [{ConnId, {Topic, Partition}, ProcessName}] {N, lists:usort(Acc)}; -get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], - State = #state{config = Config}, N, Acc) -> - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = kafkerl_buffer:create_buffer(Buffer, fifo), - {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of - false -> - {start_broker_connection(N, Address, Config), N + 1}; - {ConnId, _, BrokerConnection} -> - {BrokerConnection, N} - end, - - Buffer = kafkerl_utils:buffer_name(Topic, Partition), - _ = kafkerl_buffer:create_buffer(Buffer, fifo), - kafkerl_broker_connection:add_buffer(Conn, Buffer), - - NewMapping = {ConnId, {Topic, Partition}, Conn}, - get_broker_mapping(T, State, NewN, [NewMapping | Acc]). +get_broker_mapping([{{Topic, Partition, BrokerId}, Address} | T], + State = #state{config = Config}, N, Acc) -> + %% generate the topic/partions buffer name + Buffer = kafkerl_utils:buffer_name(Topic, Partition), + _ = kafkerl_buffer:create_buffer(Buffer, fifo), + {Conn, NewN} = + case lists:keyfind(BrokerId, 1, Acc) of + false -> + %% no connection for current broker id + %% start broker connection + {start_broker_connection(N, Address, Config), N + 1}; + {BrokerId, _, BrokerConnectionProcess} -> + %% there is already connection for broker id, just continue; + {BrokerConnectionProcess, N} + end, + NewMapping = {BrokerId, {Topic, Partition}, Conn}, + kafkerl_broker_connection:add_buffer(Conn, Buffer), + NewAcc = + case lists:keyfind({Topic, Partition}, 2, Acc) of + false -> + %% the topic is not bind to any other processes, just continue + Acc; + {_BrokerId, {Topic, Partition}, ConnectionProcess} -> + % the topic is bind to other processes, ask those process to remove the binding + kafkerl_broker_connection:delete_buffer(ConnectionProcess, Buffer), + % delete current binding from Acc + lists:keydelete({Topic, Partition}, 2, Acc) + end, + get_broker_mapping(T, State, NewN, [NewMapping | NewAcc]). start_broker_connection(N, Address, Config) -> - case kafkerl_broker_connection:start_link(N, Address, Config) of - {ok, Name, _Pid} -> - Name; - {error, {already_started, Pid}} -> - kafkerl_broker_connection:clear_buffers(Pid), - Pid - end. \ No newline at end of file + case kafkerl_broker_connection:start_link(N, Address, Config) of + {ok, Name, _Pid} -> + Name; + {error, {already_started, Pid}} -> + kafkerl_broker_connection:clear_buffers(Pid), + Pid + end. diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index f05b396..57224e1 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -196,7 +196,7 @@ build_produce_request({Topic, Partition, Messages}, Compression) -> {Size, MessageSet} = build_message_set(Messages, Compression), {Size + TopicSize + 24, [<<-1:?SHORT, - -1:?INT, % Timeout + -1:?INT, % Timeout %% TODO: get timeout error from kafka when this values is set to -1, after changing it to 1000, error disappers. need to double check if this value is updated in latest kafka protocol 1:?UINT, % TopicCount TopicSize:?USHORT>>, Topic, From e068d69538e703e280ace4fc72a29577835c67ec Mon Sep 17 00:00:00 2001 From: georgeye Date: Mon, 11 Jul 2016 18:31:24 -0700 Subject: [PATCH 56/72] update version --- src/kafkerl.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kafkerl.app.src b/src/kafkerl.app.src index 8859cfb..817a006 100644 --- a/src/kafkerl.app.src +++ b/src/kafkerl.app.src @@ -1,6 +1,6 @@ {application, kafkerl, [{description, []}, - {vsn, "1.1"}, + {vsn, "1.2"}, {registered, []}, {applications, [kernel, stdlib]}, {mod, {kafkerl, []}}, From 9315c64d5306f13e4f53c5535b80ac277bceb967 Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Wed, 13 Jul 2016 20:53:23 +0800 Subject: [PATCH 57/72] add get_dump_files/0 --- src/kafkerl.erl | 5 ++++- src/kafkerl_connector.erl | 21 ++++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 95f36d1..e7c365e 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -5,7 +5,7 @@ -export([produce/3, consume/2, consume/3, stop_consuming/2, request_metadata/0, request_metadata/1, - partitions/0, + partitions/0, get_dump_files/0, subscribe/1, subscribe/2, unsubscribe/1]). -export([version/0]). @@ -76,6 +76,9 @@ consume(Topic, Partition, Options) -> kafkerl_connector:fetch(Topic, Partition, Options) end. +get_dump_files() -> + kafkerl_connector:get_dump_files(). + -spec stop_consuming(topic(), partition()) -> ok. stop_consuming(Topic, Partition) -> kafkerl_connector:stop_fetch(Topic, Partition). diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 72b3a95..0e64e09 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -18,6 +18,8 @@ -export([topic_mapping_updated/1]). % Supervisors -export([start_link/1]). + +-export([get_dump_files/0]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). @@ -93,6 +95,9 @@ get_partitions() -> Error -> Error end. +-spec get_dump_files() -> {ok, any()} | {error, any()}. +get_dump_files() -> + gen_server:call(kafkerl, get_dump_files). -spec subscribe(kafkerl:callback()) -> ok | kafkerl:error(). subscribe(Callback) -> @@ -144,7 +149,21 @@ handle_call({subscribe, Callback}, _From, State) -> end; handle_call({unsubscribe, Callback}, _From, State) -> NewCallbacks = lists:keydelete(Callback, 2, State#state.callbacks), - {reply, ok, State#state{callbacks = NewCallbacks}}. + {reply, ok, State#state{callbacks = NewCallbacks}}; + +handle_call(get_dump_files, _From, State) -> + DumpLocation = State#state.dump_location, + WorkingDirectory = case file:get_cwd() of + {ok, Path} -> Path; + {error, _} -> "" + end, + FilePath = filename:join([WorkingDirectory, DumpLocation]), + case file:list_dir(FilePath) of + {ok, Filenames} -> + {reply, {ok, [FilePath ++ F || F <- Filenames, lists:suffix(".dump", F)]}, State}; + Error -> + {reply, Error, State} + end. -spec handle_info(any(), state()) -> {noreply, state()} | {stop, {error, any()}, state()}. From 86931c9f6163e4a4d355d5fc1fe7d90851a57b1d Mon Sep 17 00:00:00 2001 From: georgeye Date: Mon, 25 Jul 2016 11:45:39 -0700 Subject: [PATCH 58/72] update version --- src/kafkerl.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kafkerl.app.src b/src/kafkerl.app.src index 817a006..ed574af 100644 --- a/src/kafkerl.app.src +++ b/src/kafkerl.app.src @@ -1,6 +1,6 @@ {application, kafkerl, [{description, []}, - {vsn, "1.2"}, + {vsn, "1.3"}, {registered, []}, {applications, [kernel, stdlib]}, {mod, {kafkerl, []}}, From 89540c335b895bf41ad19340d6f3fe005653fef1 Mon Sep 17 00:00:00 2001 From: anders wei Date: Wed, 3 Aug 2016 13:55:54 +0800 Subject: [PATCH 59/72] add support for resending messages from files --- src/kafkerl.erl | 5 +++ src/kafkerl_connector.erl | 52 +++++++++++++++++++++++++++----- src/kafkerl_metadata_handler.erl | 25 +++++++++------ 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/src/kafkerl.erl b/src/kafkerl.erl index 572a98b..392a9a1 100644 --- a/src/kafkerl.erl +++ b/src/kafkerl.erl @@ -6,6 +6,7 @@ consume/2, consume/3, stop_consuming/2, request_metadata/0, request_metadata/1, partitions/0, get_dump_files/0, + produce_messages_from_file/1, subscribe/1, subscribe/2, unsubscribe/1]). -export([version/0]). @@ -84,6 +85,10 @@ stop_consuming(Topic, Partition) -> get_dump_files() -> kafkerl_connector:get_dump_files(). +-spec produce_messages_from_file(list()) -> {ok, any()} | {error, any()}. +produce_messages_from_file(File) -> + kafkerl_connector:produce_messages_from_file(File). + %% Metadata API -spec request_metadata() -> ok. request_metadata() -> diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 0e64e09..9ffee54 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -20,6 +20,7 @@ -export([start_link/1]). -export([get_dump_files/0]). +-export([produce_messages_from_file/1]). % gen_server callbacks -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2]). @@ -33,7 +34,7 @@ -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. --record(state, {broker_mapping = void :: [broker_mapping()] | void, +-record(state, {broker_mapping = [] :: [broker_mapping()] | void, config = [] :: [{atom(), any()}], autocreate_topics = false :: boolean(), callbacks = [] :: [{kafkerl:filters(), @@ -99,6 +100,9 @@ get_partitions() -> get_dump_files() -> gen_server:call(kafkerl, get_dump_files). +produce_messages_from_file(File) -> + gen_server:call(kafkerl, {produce_messages_from_file, File}). + -spec subscribe(kafkerl:callback()) -> ok | kafkerl:error(). subscribe(Callback) -> subscribe(Callback, all). @@ -160,10 +164,13 @@ handle_call(get_dump_files, _From, State) -> FilePath = filename:join([WorkingDirectory, DumpLocation]), case file:list_dir(FilePath) of {ok, Filenames} -> - {reply, {ok, [FilePath ++ F || F <- Filenames, lists:suffix(".dump", F)]}, State}; + {reply, {ok, [filename:join([FilePath, F]) || F <- Filenames, lists:suffix(".dump", F)]}, State}; Error -> {reply, Error, State} - end. + end; +handle_call({produce_messages_from_file, File}, _From, State) -> + Result = handle_resend_file(File), + {reply, Result, State}. -spec handle_info(any(), state()) -> {noreply, state()} | {stop, {error, any()}, state()}. @@ -270,12 +277,26 @@ handle_request_metadata(Topics) -> kafkerl_metadata_handler:request_metadata(Topics). handle_dump_buffer_to_disk(State = #state{dump_location = DumpLocation, + broker_mapping = Mapping, last_dump_name = LastDumpName}) -> + + %% get all buffers that hold specific known topics + partitions. + MappingBuffers = [ + kafkerl_utils:buffer_name(Topic, Partition) + ||{{Topic, Partition},_Connection} <- Mapping + ], + % Get the buffer name and all the messages from it - Buffer = kafkerl_utils:default_buffer_name(), - MessagesInBuffer = ets_buffer:read_all(Buffer), - % Split them between the ones that should be retried and those that don't - {ToDump, ToRetry} = split_message_dump(MessagesInBuffer, State), + DefaultBuffer = kafkerl_utils:default_buffer_name(), + AllBuffers = [DefaultBuffer | MappingBuffers], + + {ToDump, ToRetry} = + lists:foldl(fun(Buffer, {ToDumps, ToRetries}) -> + MessagesInBuffer = ets_buffer:read_all(Buffer), + {ToDump, ToRetry} = split_message_dump(MessagesInBuffer, State), + {ToDump ++ ToDumps, ToRetry ++ ToRetries} + end, {[], []}, AllBuffers), + % Retry the messages on an async function (to avoid locking this gen_server) ok = retry_messages(ToRetry), % And dump the messages that need to be dumped into a file @@ -301,6 +322,23 @@ handle_dump_buffer_to_disk(State = #state{dump_location = DumpLocation, State end. +handle_resend_file(File) -> + case file:read_file(File) of + {ok, Binary} -> + try + Messages = erlang:binary_to_term(Binary), + retry_messages(Messages), + lager:notice("Kafkerl AUDIT: resending file ~p into queue", [File]), + {ok, resending} + catch Error:_ -> + lager:notice("Kafkerl AUDIT: resending file ~p failed with reason ~p", [File, Error]), + {error, Error} + end; + {error, Error} -> + lager:notice("Kafkerl AUDIT: resending file ~p failed, file error ~p", [File, Error]), + {error, Error} + end. + %%============================================================================== %% Utils %%============================================================================== diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index 4ee5be4..ae4c66c 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -28,6 +28,7 @@ connection_index = 0 :: non_neg_integer()}). -type state() :: #state{}. +-define(METADATA_REFRESH_INTERVAL, 60000). %%============================================================================== %% API %%============================================================================== @@ -76,13 +77,21 @@ requesting({metadata_updated, RawMapping}, State) -> {N, TopicMapping} = get_broker_mapping(RawMapping, State), OldMapping = State#state.broker_connections, %%OldMapping -- TopicMapping =/= [] andalso - lager:warning("sync broker mappings old ~p new ~p", + lager:info("sync broker mappings old ~p new ~p", [OldMapping, TopicMapping]), NewMapping2 = [{{Topic, Partition}, Conn} || {_ConnId, {Topic, Partition}, Conn} <- TopicMapping], lager:debug("Refreshed topic mapping: ~p", [NewMapping2]), - ok = kafkerl_connector:topic_mapping_updated(NewMapping2), - {next_state, idle, State#state{connection_index = N, broker_connections = TopicMapping}}; + (TopicMapping /= OldMapping) orelse + kafkerl_connector:topic_mapping_updated(NewMapping2), + Topics = lists:usort([Topic || + {_ConnId, {Topic, _Partition}, _Conn} <- TopicMapping]), + KnownTopics = State#state.known_topics, + NewState = State#state{connection_index = N, + known_topics = lists:umerge(KnownTopics, Topics), + broker_connections = TopicMapping}, + timer:apply_after(?METADATA_REFRESH_INTERVAL, ?MODULE, request_metadata, [[]]), + {next_state, idle, NewState}; % If we have no more retries left, go on cooldown requesting({metadata_retry, 0}, State = #state{cooldown = Cooldown}) -> Params = [?MODULE, on_timer], @@ -118,7 +127,7 @@ handle_info({'EXIT', Pid, Reason}, StateName, State) -> lager:info("process ~p crashed with reason ~p ", [Pid, Reason]), BrokerConnections = [{Name, {Topic, Partition}, Conn} || {Name, {Topic, Partition}, Conn} <- State#state.broker_connections, whereis(Conn) /= Pid, whereis(Conn) /= undefined], - lager:info("current connections ~p, updated connections ~p ~n", [State#state.broker_connections, BrokerConnections]), + lager:debug("current connections ~p, updated connections ~p ~n", [State#state.broker_connections, BrokerConnections]), timer:apply_after(1000, ?MODULE, request_metadata, [[]]), {next_state, StateName, State#state{broker_connections = BrokerConnections}}; @@ -252,12 +261,8 @@ log_metadata_request_error(Host, Port, Reason) -> _ = lager:warning("Unable to retrieve metadata from ~s:~p, reason: ~p", [Host, Port, Reason]). -metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> - kafkerl_protocol:build_metadata_request([], 0, ClientId); -metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, - NewTopics) -> - AllTopics = lists:umerge(KnownTopics, NewTopics), - kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). +metadata_request(#state{client_id = ClientId}, _NewTopics) -> + kafkerl_protocol:build_metadata_request([], 0, ClientId). %%============================================================================== %% Topic/broker mapping From 8b1fb60cabdb8b818279a88eb3b4ac89e9f52279 Mon Sep 17 00:00:00 2001 From: georgeye Date: Wed, 3 Aug 2016 10:32:52 -0700 Subject: [PATCH 60/72] update to fix dump to disk resend --- src/kafkerl.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kafkerl.app.src b/src/kafkerl.app.src index ed574af..ea2f1b8 100644 --- a/src/kafkerl.app.src +++ b/src/kafkerl.app.src @@ -1,6 +1,6 @@ {application, kafkerl, [{description, []}, - {vsn, "1.3"}, + {vsn, "1.4"}, {registered, []}, {applications, [kernel, stdlib]}, {mod, {kafkerl, []}}, From bb0a43fde1eb381eaec445014bdeb85c29436dc5 Mon Sep 17 00:00:00 2001 From: anders wei Date: Tue, 3 Jan 2017 12:19:31 +0800 Subject: [PATCH 61/72] TS-3001 fix kafkerl crashes when try to write to disk --- src/kafkerl_connector.erl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/kafkerl_connector.erl b/src/kafkerl_connector.erl index 9ffee54..107b324 100644 --- a/src/kafkerl_connector.erl +++ b/src/kafkerl_connector.erl @@ -348,12 +348,23 @@ retry_messages(Messages) -> _Pid = spawn(fun() -> [send(M) || M <- Messages] end), ok. +%% in most cases, no message in buffer, pass +split_message_dump([], _) -> + {[], []}; + split_message_dump(Messages, #state{max_buffer_size = MaxBufferSize, + broker_mapping = Mapping, save_bad_messages = SaveBadMessages}) when is_list(Messages) -> - KnownTopics = kafkerl_metadata_handler:get_known_topics(), + KnownTopics = [ + Topic + || {{Topic, _Partition},_Connection} <- Mapping + ], + % Split messages between for topics kafkerl knows exist and those that do not. + % NOTE: before metadata is fetched from kafka, messages are saved in default buffer. + % so we need such process to partition them. {Known, Unknown} = lists:partition(fun({Topic, _Partition, _Payload}) -> lists:member(Topic, KnownTopics) end, Messages), From 0e2e004bb204a443ba56b5a949133f188cfa737f Mon Sep 17 00:00:00 2001 From: georgeye Date: Tue, 3 Jan 2017 11:47:12 -0800 Subject: [PATCH 62/72] update version --- src/kafkerl.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kafkerl.app.src b/src/kafkerl.app.src index ea2f1b8..7d01c5a 100644 --- a/src/kafkerl.app.src +++ b/src/kafkerl.app.src @@ -1,6 +1,6 @@ {application, kafkerl, [{description, []}, - {vsn, "1.4"}, + {vsn, "1.5"}, {registered, []}, {applications, [kernel, stdlib]}, {mod, {kafkerl, []}}, From e07f23e5fb542f6e7cb72d1948295eec7d6351f5 Mon Sep 17 00:00:00 2001 From: dvliman Date: Fri, 23 Mar 2018 12:56:40 -0700 Subject: [PATCH 63/72] point to epocxy master --- rebar.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rebar.config b/rebar.config index bc8c780..a286f54 100644 --- a/rebar.config +++ b/rebar.config @@ -16,8 +16,8 @@ warn_untyped_record, {i, "include"}]}. {deps, - [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, + [{parse_trans, ".*", {git, "git@github.com:tigertext/parse_trans.git", "master"}}, {lager, ".*", {git, "git@github.com:tigertext/lager.git", {tag, "2.1.1"}}}, - {epocxy, "1.0.1", {git, "git@github.com:tigertext/epocxy.git", {tag, "1.0.1"}}}, - {eper, "0.69", {git, "git@github.com:tigertext/eper.git", "HEAD" }}, + {epocxy, ".*", {git, "git@github.com:tigertext/epocxy.git", "master"}}, + {eper, "0.69", {git, "git@github.com:tigertext/eper.git", "HEAD"}}, {validerl, ".*", {git, "git@github.com:tigertext/validerl.git", "master"}}]}. From b60e43dc2fceae4bbcc92063a5ffdb2634e9e4ef Mon Sep 17 00:00:00 2001 From: Evan Vigil-McClanahan Date: Fri, 25 May 2018 15:04:27 -0700 Subject: [PATCH 64/72] add validerl to applications --- src/kafkerl.app.src | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/kafkerl.app.src b/src/kafkerl.app.src index 7d01c5a..57780f5 100644 --- a/src/kafkerl.app.src +++ b/src/kafkerl.app.src @@ -2,7 +2,12 @@ [{description, []}, {vsn, "1.5"}, {registered, []}, - {applications, [kernel, stdlib]}, + {applications, + [ + kernel, + stdlib, + validerl + ]}, {mod, {kafkerl, []}}, {env, []}, {lager, [{handlers, [{lager_console_backend, info}]}]}]}. From 91200d6bd7fd9d95fdb998b787fbbb406294944d Mon Sep 17 00:00:00 2001 From: Anders Wei Date: Thu, 13 Dec 2018 14:32:42 +0800 Subject: [PATCH 65/72] TS-5150 set timeout --- src/kafkerl_protocol.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/kafkerl_protocol.erl b/src/kafkerl_protocol.erl index 57224e1..acf88ef 100644 --- a/src/kafkerl_protocol.erl +++ b/src/kafkerl_protocol.erl @@ -193,10 +193,11 @@ build_produce_request({Topic, [{Partition, Messages}]}, Compression) -> build_produce_request({Topic, Partition, Messages}, Compression) -> % This is a fast version used when producing for a single topic and partition TopicSize = byte_size(Topic), + Timeout = application:get_env(kafkerl, kafka_cluster_sync_timeout, 2000), {Size, MessageSet} = build_message_set(Messages, Compression), {Size + TopicSize + 24, - [<<-1:?SHORT, - -1:?INT, % Timeout %% TODO: get timeout error from kafka when this values is set to -1, after changing it to 1000, error disappers. need to double check if this value is updated in latest kafka protocol + [<<-1:?SHORT, %% ACK + Timeout:?INT, % Timeout to write over the whole kafka cluster 1:?UINT, % TopicCount TopicSize:?USHORT>>, Topic, @@ -208,11 +209,12 @@ build_produce_request(Data, Compression) -> % Build the body of the request with multiple topics/partitions % (Docs at: http://goo.gl/J3C50c) TopicCount = length(Data), + Timeout = application:get_env(kafkerl, kafka_cluster_sync_timeout, 2000), {TopicsSize, Topics} = build_topics(Data, Compression), % 10 is the size of the header {TopicsSize + 10, [<<-1:?SHORT, % RequiredAcks - -1:?INT, % Timeout + Timeout:?INT, % Timeout TopicCount:?UINT>>, Topics]}. From d24b8df5fdf29b185cbfe201f8032caaf093f458 Mon Sep 17 00:00:00 2001 From: anders wei Date: Thu, 14 Mar 2019 02:53:33 +0800 Subject: [PATCH 66/72] TS-5500 --- src/kafkerl_broker_connection.erl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index f0610d2..45e4f94 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -105,9 +105,7 @@ handle_info({connected, Socket}, State) -> handle_flush(State#state{socket = Socket}); handle_info(connection_timeout, State) -> {stop, {error, unable_to_connect}, State}; -handle_info({tcp_closed, _Socket}, State = #state{name = Name, - address = {Host, Port}}) -> - _ = lager:warning("~p lost connection to ~p:~p", [Name, Host, Port]), +handle_info({tcp_closed, _Socket}, State = #state{}) -> NewState = handle_tcp_close(State), {noreply, NewState}; handle_info({tcp, _Socket, Bin}, State) -> From c840bf5ef65c2429f6e7f9c1b89f806ff5cb1007 Mon Sep 17 00:00:00 2001 From: Ang Li Date: Thu, 19 Sep 2019 18:29:11 +0800 Subject: [PATCH 67/72] Remove compile warnings. --- .gitignore | 4 +++- kafkerl_buffer.erl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 kafkerl_buffer.erl diff --git a/.gitignore b/.gitignore index 960a23e..efc5bbb 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,6 @@ rebar *.dump bin/tmp logs/ -rel/ \ No newline at end of file +rel/ +.idea/ +*.iml diff --git a/kafkerl_buffer.erl b/kafkerl_buffer.erl new file mode 100644 index 0000000..0c308fc --- /dev/null +++ b/kafkerl_buffer.erl @@ -0,0 +1,45 @@ + +-module(kafkerl_buffer). +-author("anders"). +-behavior(gen_server). + +%% API +-export([start_link/0, init/1, create_buffer/2, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, + start_link/1, create_buffer/1]). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +start_link(_) -> + start_link(). + + +init([]) -> + {ok, []}. + +create_buffer(Name, Type) -> + gen_server:call(?MODULE, {create_buffer, Name, Type}). +create_buffer(Name) -> + gen_server:call(?MODULE, {create_buffer, Name}). + +handle_call({create_buffer, Name, Type}, _from, State) -> + Alredy_Exists = ets_buffer:list(Name) =/= [], + Res = ets_buffer:create(Name, Type), + lager:debug("buffer ~p type ~p created ~p, already exists ~p", [Name, Type, Res, Alredy_Exists]), + {reply, ok, State}; +handle_call({create_buffer, Name}, _From, State) -> + Res = ets_buffer:create(Name), + lager:debug("buffer ~p created ~p", [Name, Res]), + {reply, ok, State}. + +handle_cast(_Request, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. From a0918e278ba0156fa4798f3f93fa8c64917e0e9f Mon Sep 17 00:00:00 2001 From: Ang Li Date: Thu, 19 Sep 2019 18:49:41 +0800 Subject: [PATCH 68/72] fix --- kafkerl_buffer.erl | 45 ------------------------------------------ src/kafkerl_buffer.erl | 13 +++++++++++- src/kafkerl_utils.erl | 18 +---------------- 3 files changed, 13 insertions(+), 63 deletions(-) delete mode 100644 kafkerl_buffer.erl diff --git a/kafkerl_buffer.erl b/kafkerl_buffer.erl deleted file mode 100644 index 0c308fc..0000000 --- a/kafkerl_buffer.erl +++ /dev/null @@ -1,45 +0,0 @@ - --module(kafkerl_buffer). --author("anders"). --behavior(gen_server). - -%% API --export([start_link/0, init/1, create_buffer/2, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, - start_link/1, create_buffer/1]). - -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - -start_link(_) -> - start_link(). - - -init([]) -> - {ok, []}. - -create_buffer(Name, Type) -> - gen_server:call(?MODULE, {create_buffer, Name, Type}). -create_buffer(Name) -> - gen_server:call(?MODULE, {create_buffer, Name}). - -handle_call({create_buffer, Name, Type}, _from, State) -> - Alredy_Exists = ets_buffer:list(Name) =/= [], - Res = ets_buffer:create(Name, Type), - lager:debug("buffer ~p type ~p created ~p, already exists ~p", [Name, Type, Res, Alredy_Exists]), - {reply, ok, State}; -handle_call({create_buffer, Name}, _From, State) -> - Res = ets_buffer:create(Name), - lager:debug("buffer ~p created ~p", [Name, Res]), - {reply, ok, State}. - -handle_cast(_Request, State) -> - {noreply, State}. - -handle_info(_Info, State) -> - {noreply, State}. - -terminate(_Reason, _State) -> - ok. - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. diff --git a/src/kafkerl_buffer.erl b/src/kafkerl_buffer.erl index 4483120..0c308fc 100644 --- a/src/kafkerl_buffer.erl +++ b/src/kafkerl_buffer.erl @@ -4,7 +4,8 @@ -behavior(gen_server). %% API --export([start_link/0, init/1, create_buffer/2, handle_call/3, start_link/1, create_buffer/1]). +-export([start_link/0, init/1, create_buffer/2, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, + start_link/1, create_buffer/1]). start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). @@ -31,4 +32,14 @@ handle_call({create_buffer, Name}, _From, State) -> lager:debug("buffer ~p created ~p", [Name, Res]), {reply, ok, State}. +handle_cast(_Request, State) -> + {noreply, State}. +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. diff --git a/src/kafkerl_utils.erl b/src/kafkerl_utils.erl index e7e3b34..cf0de9b 100644 --- a/src/kafkerl_utils.erl +++ b/src/kafkerl_utils.erl @@ -123,22 +123,6 @@ merge_messages(A, B) -> {false, false} -> [B, A] end. -is_list_of_binaries(L) when is_list(L) -> - length(L) > 0 andalso lists:all(fun is_binary/1, L); -is_list_of_binaries(_Any) -> - false. - -is_partition_list(L) when is_list(L) -> - length(L) > 0 andalso lists:all(fun is_partition/1, L); -is_partition_list(_Any) -> - false. - -is_partition({Partition, Messages}) -> - is_integer(Partition) andalso Partition >= 0 andalso - (is_binary(Messages) orelse is_list_of_binaries(Messages)); -is_partition(_Any) -> - false. - gather_consume_responses() -> gather_consume_responses(2500). gather_consume_responses(Timeout) -> @@ -153,4 +137,4 @@ gather_consume_responses(Timeout, Acc) -> Error after Timeout -> [] - end. \ No newline at end of file + end. From 6cd42331e8c48f28cf6ef58ce21b79a47b6f7f69 Mon Sep 17 00:00:00 2001 From: Ang Li Date: Mon, 23 Sep 2019 13:24:58 +0800 Subject: [PATCH 69/72] Address comments. --- src/kafkerl_buffer.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) mode change 100644 => 100755 src/kafkerl_buffer.erl diff --git a/src/kafkerl_buffer.erl b/src/kafkerl_buffer.erl old mode 100644 new mode 100755 index 0c308fc..f3030fa --- a/src/kafkerl_buffer.erl +++ b/src/kafkerl_buffer.erl @@ -3,9 +3,11 @@ -author("anders"). -behavior(gen_server). +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). + %% API --export([start_link/0, init/1, create_buffer/2, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, - start_link/1, create_buffer/1]). +-export([start_link/0, start_link/1, create_buffer/1, create_buffer/2]). start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). From f9da6f83365060b0973d24a63f3c1138a03ec913 Mon Sep 17 00:00:00 2001 From: HaoJiang Date: Mon, 29 Mar 2021 10:59:57 +0800 Subject: [PATCH 70/72] Hao/TS-9303 update kafka error code mappings --- include/kafkerl.hrl | 100 ++++- src/kafkerl_broker_connection.erl | 4 +- src/kafkerl_error.erl | 639 ++++++++++++++++++++++++++---- src/kafkerl_metadata_handler.erl | 4 +- 4 files changed, 654 insertions(+), 93 deletions(-) diff --git a/include/kafkerl.hrl b/include/kafkerl.hrl index bf4955c..020f726 100644 --- a/include/kafkerl.hrl +++ b/include/kafkerl.hrl @@ -1,18 +1,98 @@ %% Error codes --define(NO_ERROR, 0). +-define(UNKNOWN_SERVER_ERROR, -1). +-define(NONE, 0). -define(OFFSET_OUT_OF_RANGE, 1). --define(INVALID_MESSAGE, 2). +-define(CORRUPT_MESSAGE, 2). -define(UNKNOWN_TOPIC_OR_PARTITION, 3). --define(INVALID_MESSAGE_SIZE, 4). +-define(INVALID_FETCH_SIZE, 4). -define(LEADER_NOT_AVAILABLE, 5). --define(NOT_LEADER_FOR_PARTITION, 6). --define(REQUEST_TIMEDOUT, 7). +-define(NOT_LEADER_OR_FOLLOWER, 6). +-define(REQUEST_TIMED_OUT, 7). -define(BROKER_NOT_AVAILABLE, 8). -define(REPLICA_NOT_AVAILABLE, 9). --define(MESSAGE_SIZE_TOO_LARGE, 10). +-define(MESSAGE_TOO_LARGE, 10). -define(STALE_CONTROLLER_EPOCH, 11). -define(OFFSET_METADATA_TOO_LARGE, 12). --define(OFFSETS_LOAD_IN_PROGRESS_CODE, 14). --define(CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE, 15). --define(NOT_COORDINATOR_FOR_CONSUMER_CODE, 16). --define(UNKNOWN, -1). \ No newline at end of file +-define(COORDINATOR_LOAD_IN_PROGRESS, 14). +-define(COORDINATOR_NOT_AVAILABLE, 15). +-define(NOT_COORDINATOR, 16). +-define(INVALID_TOPIC_EXCEPTION, 17). +-define(RECORD_LIST_TOO_LARGE, 18). +-define(NOT_ENOUGH_REPLICAS, 19). +-define(NOT_ENOUGH_REPLICAS_AFTER_APPEND, 20). +-define(INVALID_REQUIRED_ACKS, 21). +-define(ILLEGAL_GENERATION, 22). +-define(INCONSISTENT_GROUP_PROTOCOL, 23). +-define(INVALID_GROUP_ID, 24). +-define(UNKNOWN_MEMBER_ID, 25). +-define(INVALID_SESSION_TIMEOUT, 26). +-define(REBALANCE_IN_PROGRESS, 27). +-define(INVALID_COMMIT_OFFSET_SIZE, 28). +-define(TOPIC_AUTHORIZATION_FAILED, 29). +-define(GROUP_AUTHORIZATION_FAILED, 30). +-define(CLUSTER_AUTHORIZATION_FAILED, 31). +-define(INVALID_TIMESTAMP, 32). +-define(UNSUPPORTED_SASL_MECHANISM, 33). +-define(ILLEGAL_SASL_STATE, 34). +-define(UNSUPPORTED_VERSION, 35). +-define(TOPIC_ALREADY_EXISTS, 36). +-define(INVALID_PARTITIONS, 37). +-define(INVALID_REPLICATION_FACTOR, 38). +-define(INVALID_REPLICA_ASSIGNMENT, 39). +-define(INVALID_CONFIG, 40). +-define(NOT_CONTROLLER, 41). +-define(INVALID_REQUEST, 42). +-define(UNSUPPORTED_FOR_MESSAGE_FORMAT, 43). +-define(POLICY_VIOLATION, 44). +-define(OUT_OF_ORDER_SEQUENCE_NUMBER, 45). +-define(DUPLICATE_SEQUENCE_NUMBER, 46). +-define(INVALID_PRODUCER_EPOCH, 47). +-define(INVALID_TXN_STATE, 48). +-define(INVALID_PRODUCER_ID_MAPPING, 49). +-define(INVALID_TRANSACTION_TIMEOUT, 50). +-define(CONCURRENT_TRANSACTIONS, 51). +-define(TRANSACTION_COORDINATOR_FENCED, 52). +-define(TRANSACTIONAL_ID_AUTHORIZATION_FAILED, 53). +-define(SECURITY_DISABLED, 54). +-define(OPERATION_NOT_ATTEMPTED, 55). +-define(KAFKA_STORAGE_ERROR, 56). +-define(LOG_DIR_NOT_FOUND, 57). +-define(SASL_AUTHENTICATION_FAILED, 58). +-define(UNKNOWN_PRODUCER_ID, 59). +-define(REASSIGNMENT_IN_PROGRESS, 60). +-define(DELEGATION_TOKEN_AUTH_DISABLED, 61). +-define(DELEGATION_TOKEN_NOT_FOUND, 62). +-define(DELEGATION_TOKEN_OWNER_MISMATCH, 63). +-define(DELEGATION_TOKEN_REQUEST_NOT_ALLOWED, 64). +-define(DELEGATION_TOKEN_AUTHORIZATION_FAILED, 65). +-define(DELEGATION_TOKEN_EXPIRED, 66). +-define(INVALID_PRINCIPAL_TYPE, 67). +-define(NON_EMPTY_GROUP, 68). +-define(GROUP_ID_NOT_FOUND, 69). +-define(FETCH_SESSION_ID_NOT_FOUND, 70). +-define(INVALID_FETCH_SESSION_EPOCH, 71). +-define(LISTENER_NOT_FOUND, 72). +-define(TOPIC_DELETION_DISABLED, 73). +-define(FENCED_LEADER_EPOCH, 74). +-define(UNKNOWN_LEADER_EPOCH, 75). +-define(UNSUPPORTED_COMPRESSION_TYPE, 76). +-define(STALE_BROKER_EPOCH, 77). +-define(OFFSET_NOT_AVAILABLE, 78). +-define(MEMBER_ID_REQUIRED, 79). +-define(PREFERRED_LEADER_NOT_AVAILABLE, 80). +-define(GROUP_MAX_SIZE_REACHED, 81). +-define(FENCED_INSTANCE_ID, 82). +-define(ELIGIBLE_LEADERS_NOT_AVAILABLE, 83). +-define(ELECTION_NOT_NEEDED, 84). +-define(NO_REASSIGNMENT_IN_PROGRESS, 85). +-define(GROUP_SUBSCRIBED_TO_TOPIC, 86). +-define(INVALID_RECORD, 87). +-define(UNSTABLE_OFFSET_COMMIT, 88). +-define(THROTTLING_QUOTA_EXCEEDED, 89). +-define(PRODUCER_FENCED, 90). +-define(RESOURCE_NOT_FOUND, 91). +-define(DUPLICATE_RESOURCE, 92). +-define(UNACCEPTABLE_CREDENTIAL, 93). +-define(INCONSISTENT_VOTER_SET, 94). +-define(INVALID_UPDATE_VERSION, 95). +-define(FEATURE_UPDATE_FAILED, 96). diff --git a/src/kafkerl_broker_connection.erl b/src/kafkerl_broker_connection.erl index 45e4f94..0fb3d51 100644 --- a/src/kafkerl_broker_connection.erl +++ b/src/kafkerl_broker_connection.erl @@ -419,7 +419,7 @@ split_errors_and_successes(Topics) -> split_errors_and_successes([], Acc) -> Acc; split_errors_and_successes([{Topic, Partitions} | T], Acc) -> - F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> + F = fun({Partition, ?NONE, Offset}, {E, S}) -> {E, [{Topic, Partition, Offset} | S]}; ({Partition, Error, _}, {E, S}) -> {[{Topic, Partition, Error} | E], S} @@ -440,7 +440,7 @@ handle_errors(Errors, Messages, Name) -> handle_error({Topic, Partition, Error}, Messages, Name) when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse - Error =:= ?NOT_LEADER_FOR_PARTITION orelse + Error =:= ?NOT_LEADER_OR_FOLLOWER orelse Error =:= ?LEADER_NOT_AVAILABLE -> case get_message_for_error(Topic, Partition, Messages, Name) of undefined -> false; diff --git a/src/kafkerl_error.erl b/src/kafkerl_error.erl index 77e8a52..3a35be9 100644 --- a/src/kafkerl_error.erl +++ b/src/kafkerl_error.erl @@ -8,120 +8,601 @@ %%============================================================================== %% API %%============================================================================== -get_error_name(?NO_ERROR) -> - "NoError"; +get_error_name(?UNKNOWN_SERVER_ERROR) -> + "UnknownServerError"; +get_error_name(?NONE) -> + "None"; get_error_name(?OFFSET_OUT_OF_RANGE) -> "OffsetOutOfRange"; -get_error_name(?INVALID_MESSAGE) -> - "InvalidMessage"; +get_error_name(?CORRUPT_MESSAGE) -> + "CorruptMessage"; get_error_name(?UNKNOWN_TOPIC_OR_PARTITION) -> "UnknownTopicOrPartition"; -get_error_name(?INVALID_MESSAGE_SIZE) -> - "InvalidMessageSize"; +get_error_name(?INVALID_FETCH_SIZE) -> + "InvalidFetchSize"; get_error_name(?LEADER_NOT_AVAILABLE) -> "LeaderNotAvailable"; -get_error_name(?NOT_LEADER_FOR_PARTITION) -> - "NotLeaderForPartition"; -get_error_name(?REQUEST_TIMEDOUT) -> +get_error_name(?NOT_LEADER_OR_FOLLOWER) -> + "NotLeaderOrFollower"; +get_error_name(?REQUEST_TIMED_OUT) -> "RequestTimedOut"; get_error_name(?BROKER_NOT_AVAILABLE) -> "BrokerNotAvailable"; get_error_name(?REPLICA_NOT_AVAILABLE) -> "ReplicaNotAvailable"; -get_error_name(?MESSAGE_SIZE_TOO_LARGE) -> - "MessageSizeTooLarge"; +get_error_name(?MESSAGE_TOO_LARGE) -> + "MessageTooLarge"; get_error_name(?STALE_CONTROLLER_EPOCH) -> "StaleControllerEpoch"; get_error_name(?OFFSET_METADATA_TOO_LARGE) -> "OffsetMetadataTooLarge"; -get_error_name(?OFFSETS_LOAD_IN_PROGRESS_CODE) -> - "OffsetsLoadInProgressCode"; -get_error_name(?CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE) -> - "ConsumerCoordinatorNotAvailableCode"; -get_error_name(?NOT_COORDINATOR_FOR_CONSUMER_CODE) -> - "NotCoordinatorForConsumerCode"; -get_error_name(?UNKNOWN) -> - "Unknown". +get_error_name(?COORDINATOR_LOAD_IN_PROGRESS) -> + "CoordinatorLoadInProgress"; +get_error_name(?COORDINATOR_NOT_AVAILABLE) -> + "CoordinatorNotAvailable"; +get_error_name(?NOT_COORDINATOR) -> + "NotCoordinator"; +get_error_name(?INVALID_TOPIC_EXCEPTION) -> + "InvalidTopicException"; +get_error_name(?RECORD_LIST_TOO_LARGE) -> + "RecordListTooLarge"; +get_error_name(?NOT_ENOUGH_REPLICAS) -> + "NotEnoughReplicas"; +get_error_name(?NOT_ENOUGH_REPLICAS_AFTER_APPEND) -> + "NotEnoughReplicasAfterAppend"; +get_error_name(?INVALID_REQUIRED_ACKS) -> + "InvalidRequiredAcks"; +get_error_name(?ILLEGAL_GENERATION) -> + "IllegalGeneration"; +get_error_name(?INCONSISTENT_GROUP_PROTOCOL) -> + "InconsistentGroupProtocol"; +get_error_name(?INVALID_GROUP_ID) -> + "InvalidGroupId"; +get_error_name(?UNKNOWN_MEMBER_ID) -> + "UnknownMemberId"; +get_error_name(?INVALID_SESSION_TIMEOUT) -> + "InvalidSessionTimeout"; +get_error_name(?REBALANCE_IN_PROGRESS) -> + "RebalanceInProgress"; +get_error_name(?INVALID_COMMIT_OFFSET_SIZE) -> + "InvalidCommitOffsetSize"; +get_error_name(?TOPIC_AUTHORIZATION_FAILED) -> + "TopicAuthorizationFailed"; +get_error_name(?GROUP_AUTHORIZATION_FAILED) -> + "GroupAuthorizationFailed"; +get_error_name(?CLUSTER_AUTHORIZATION_FAILED) -> + "ClusterAuthorizationFailed"; +get_error_name(?INVALID_TIMESTAMP) -> + "InvalidTimestamp"; +get_error_name(?UNSUPPORTED_SASL_MECHANISM) -> + "UnsupportedSaslMechanism"; +get_error_name(?ILLEGAL_SASL_STATE) -> + "IllegalSaslState"; +get_error_name(?UNSUPPORTED_VERSION) -> + "UnsupportedVersion"; +get_error_name(?TOPIC_ALREADY_EXISTS) -> + "TopicAlreadyExists"; +get_error_name(?INVALID_PARTITIONS) -> + "InvalidPartitions"; +get_error_name(?INVALID_REPLICATION_FACTOR) -> + "InvalidReplicationFactor"; +get_error_name(?INVALID_REPLICA_ASSIGNMENT) -> + "InvalidReplicaAssignment"; +get_error_name(?INVALID_CONFIG) -> + "InvalidConfig"; +get_error_name(?NOT_CONTROLLER) -> + "NotController"; +get_error_name(?INVALID_REQUEST) -> + "InvalidRequest"; +get_error_name(?UNSUPPORTED_FOR_MESSAGE_FORMAT) -> + "UnsupportedForMessageFormat"; +get_error_name(?POLICY_VIOLATION) -> + "PolicyViolation"; +get_error_name(?OUT_OF_ORDER_SEQUENCE_NUMBER) -> + "OutOfOrderSequenceNumber"; +get_error_name(?DUPLICATE_SEQUENCE_NUMBER) -> + "DuplicateSequenceNumber"; +get_error_name(?INVALID_PRODUCER_EPOCH) -> + "InvalidProducerEpoch"; +get_error_name(?INVALID_TXN_STATE) -> + "InvalidTxnState"; +get_error_name(?INVALID_PRODUCER_ID_MAPPING) -> + "InvalidProducerIdMapping"; +get_error_name(?INVALID_TRANSACTION_TIMEOUT) -> + "InvalidTransactionTimeout"; +get_error_name(?CONCURRENT_TRANSACTIONS) -> + "ConcurrentTransactions"; +get_error_name(?TRANSACTION_COORDINATOR_FENCED) -> + "TransactionCoordinatorFenced"; +get_error_name(?TRANSACTIONAL_ID_AUTHORIZATION_FAILED) -> + "TransactionalIdAuthorizationFailed"; +get_error_name(?SECURITY_DISABLED) -> + "SecurityDisabled"; +get_error_name(?OPERATION_NOT_ATTEMPTED) -> + "OperationNotAttempted"; +get_error_name(?KAFKA_STORAGE_ERROR) -> + "KafkaSotrageError"; +get_error_name(?LOG_DIR_NOT_FOUND) -> + "LogDirNotFound"; +get_error_name(?SASL_AUTHENTICATION_FAILED) -> + "SaslAuthenticationFailed"; +get_error_name(?UNKNOWN_PRODUCER_ID) -> + "UnknownProducerId"; +get_error_name(?REASSIGNMENT_IN_PROGRESS) -> + "ReassignmentInProgress"; +get_error_name(?DELEGATION_TOKEN_AUTH_DISABLED) -> + "DelegationTokenAuthDisabled"; +get_error_name(?DELEGATION_TOKEN_NOT_FOUND) -> + "DelegationTokenNotFound"; +get_error_name(?DELEGATION_TOKEN_OWNER_MISMATCH) -> + "DelegationTokenOwnerMismatch"; +get_error_name(?DELEGATION_TOKEN_REQUEST_NOT_ALLOWED) -> + "DelegationTokenRequestNotAllowed"; +get_error_name(?DELEGATION_TOKEN_AUTHORIZATION_FAILED) -> + "DelegationTokenAuthorizationFailed"; +get_error_name(?DELEGATION_TOKEN_EXPIRED) -> + "DelegationTokenExpired"; +get_error_name(?INVALID_PRINCIPAL_TYPE) -> + "InvalidPrincipalType"; +get_error_name(?NON_EMPTY_GROUP) -> + "NonEmptyGroup"; +get_error_name(?GROUP_ID_NOT_FOUND) -> + "GroupIdNotFound"; +get_error_name(?FETCH_SESSION_ID_NOT_FOUND) -> + "FetchSessionIdNotFound"; +get_error_name(?INVALID_FETCH_SESSION_EPOCH) -> + "InvalidFetchSessionEpoch"; +get_error_name(?LISTENER_NOT_FOUND) -> + "ListenerNotFound"; +get_error_name(?TOPIC_DELETION_DISABLED) -> + "TopicDeletionDisabled"; +get_error_name(?FENCED_LEADER_EPOCH) -> + "FencedLeaderEpoch"; +get_error_name(?UNKNOWN_LEADER_EPOCH) -> + "UnknownLeaderEpoch"; +get_error_name(?UNSUPPORTED_COMPRESSION_TYPE) -> + "UnsupportedCompressionType"; +get_error_name(?STALE_BROKER_EPOCH) -> + "StaleBrokerEpoch"; +get_error_name(?OFFSET_NOT_AVAILABLE) -> + "OffsetNotAvailable"; +get_error_name(?MEMBER_ID_REQUIRED) -> + "MemberIdRequired"; +get_error_name(?PREFERRED_LEADER_NOT_AVAILABLE) -> + "PreferedLeaderNotAvailable"; +get_error_name(?GROUP_MAX_SIZE_REACHED) -> + "GroupMaxSizeReached"; +get_error_name(?FENCED_INSTANCE_ID) -> + "FencedInstanceId"; +get_error_name(?ELIGIBLE_LEADERS_NOT_AVAILABLE) -> + "EligibleLeadersNotAvailable"; +get_error_name(?ELECTION_NOT_NEEDED) -> + "ElectionNotNeeded"; +get_error_name(?NO_REASSIGNMENT_IN_PROGRESS) -> + "NoReassignmentInProgress"; +get_error_name(?GROUP_SUBSCRIBED_TO_TOPIC) -> + "GroupSubscribedToTopic"; +get_error_name(?INVALID_RECORD) -> + "InvalidRecord"; +get_error_name(?UNSTABLE_OFFSET_COMMIT) -> + "UnstableOffsetCommit"; +get_error_name(?THROTTLING_QUOTA_EXCEEDED) -> + "ThrottlingQuotaExceeded"; +get_error_name(?PRODUCER_FENCED) -> + "ProducerFenced"; +get_error_name(?RESOURCE_NOT_FOUND) -> + "ResourceNotFound"; +get_error_name(?DUPLICATE_RESOURCE) -> + "DuplicateResource"; +get_error_name(?UNACCEPTABLE_CREDENTIAL) -> + "UnacceptableCredential"; +get_error_name(?INCONSISTENT_VOTER_SET) -> + "InconsistentVoterSet"; +get_error_name(?INVALID_UPDATE_VERSION) -> + "InvalidUpdateVersion"; +get_error_name(?FEATURE_UPDATE_FAILED) -> + "FeatureUpdateFailed". -get_error_description(?NO_ERROR) -> - "No error"; +get_error_description(?UNKNOWN_SERVER_ERROR) -> + "The server experienced an unexpected error when processing the request."; +get_error_description(?NONE) -> + "None"; get_error_description(?OFFSET_OUT_OF_RANGE) -> - "The requested offset is outside the range of offsets maintained by the " ++ - "server for the given topic/partition."; -get_error_description(?INVALID_MESSAGE) -> - "If you specify a string larger than configured maximum for offset metadata."; + "The requested offset is not within the range of offsets maintained by the server."; +get_error_description(?CORRUPT_MESSAGE) -> + "This message has failed its CRC checksum, exceeds the valid size, has a null key " ++ + "for a compacted topic, or is otherwise corrupt."; get_error_description(?UNKNOWN_TOPIC_OR_PARTITION) -> - "This request is for a topic or partition that does not exist on this broker"; -get_error_description(?INVALID_MESSAGE_SIZE) -> - "The message has a negative size."; + "This server does not host this topic-partition."; +get_error_description(?INVALID_FETCH_SIZE) -> + "The requested fetch size is invalid."; get_error_description(?LEADER_NOT_AVAILABLE) -> - "This error is thrown if we are in the middle of a leadership election " ++ - "and there is currently no leader for this partition and hence it is " ++ - "unavailable for writes."; -get_error_description(?NOT_LEADER_FOR_PARTITION) -> - "This error is thrown if the client attempts to send messages to a " ++ - "replica that is not the leader for some partition. It indicates that the " ++ - "clients metadata is out of date."; -get_error_description(?REQUEST_TIMEDOUT) -> - "This error is thrown if the request exceeds the user-specified time " ++ - "limit in the request."; + "There is no leader for this topic-partition as we are in the middle of a leadership election."; +get_error_description(?NOT_LEADER_OR_FOLLOWER) -> + "For requests intended only for the leader, this error indicates that the broker is not " ++ + "the current leader. For requests intended for any replica, this error indicates that the " ++ + "broker is not a replica of the topic partition."; +get_error_description(?REQUEST_TIMED_OUT) -> + "The request timed out."; get_error_description(?BROKER_NOT_AVAILABLE) -> - "This is not a client facing error and is used only internally by " ++ - "intra-cluster broker communication."; + "The broker is not available."; get_error_description(?REPLICA_NOT_AVAILABLE) -> - "Unused."; -get_error_description(?MESSAGE_SIZE_TOO_LARGE) -> - "The server has a configurable maximum message size to avoid unbounded " ++ - "memory allocation. This error is thrown if the client attempt to produce " ++ - "a message larger than this maximum."; + "The replica is not available for the requested topic-partition. Produce/Fetch requests and " ++ + "other requests intended only for the leader or follower return NOT_LEADER_OR_FOLLOWER if the " ++ + "broker is not a replica of the topic-partition."; +get_error_description(?MESSAGE_TOO_LARGE) -> + "The request included a message larger than the max message size the server will accept."; get_error_description(?STALE_CONTROLLER_EPOCH) -> - "Internal error code for broker-to-broker communication."; + "The controller moved to another broker."; get_error_description(?OFFSET_METADATA_TOO_LARGE) -> - "If you specify a string larger than configured maximum for offset metadata."; -get_error_description(?OFFSETS_LOAD_IN_PROGRESS_CODE) -> - "The broker returns this error code for an offset fetch request if it is " ++ - "still loading offsets (after a leader change for that offsets topic " ++ - "partition)."; -get_error_description(?CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE) -> - "The broker returns this error code for consumer metadata requests or " ++ - "offset commit requests if the offsets topic has not yet been created."; -get_error_description(?NOT_COORDINATOR_FOR_CONSUMER_CODE) -> - "The broker returns this error code if it receives an offset fetch or " ++ - "commit request for a consumer group that it is not a coordinator for."; -get_error_description(?UNKNOWN) -> - "An unexpected server error". + "The metadata field of the offset request was too large."; +get_error_description(?COORDINATOR_LOAD_IN_PROGRESS) -> + "The coordinator is loading and hence can't process requests."; +get_error_description(?COORDINATOR_NOT_AVAILABLE) -> + "The coordinator is not available."; +get_error_description(?NOT_COORDINATOR) -> + "This is not the correct coordinator."; +get_error_description(?INVALID_TOPIC_EXCEPTION) -> + "The request attempted to perform an operation on an invalid topic."; +get_error_description(?RECORD_LIST_TOO_LARGE) -> + "The request included message batch larger than the configured segment size on the server."; +get_error_description(?NOT_ENOUGH_REPLICAS) -> + "Messages are rejected since there are fewer in-sync replicas than required."; +get_error_description(?NOT_ENOUGH_REPLICAS_AFTER_APPEND) -> + "Messages are written to the log, but to fewer in-sync replicas than required."; +get_error_description(?INVALID_REQUIRED_ACKS) -> + "Produce request specified an invalid value for required acks."; +get_error_description(?ILLEGAL_GENERATION) -> + "Specified group generation id is not valid."; +get_error_description(?INCONSISTENT_GROUP_PROTOCOL) -> + "The group member's supported protocols are incompatible with those of existing members or first " ++ + "group member tried to join with empty protocol type or empty protocol list."; +get_error_description(?INVALID_GROUP_ID) -> + "The configured groupId is invalid."; +get_error_description(?UNKNOWN_MEMBER_ID) -> + "The coordinator is not aware of this member."; +get_error_description(?INVALID_SESSION_TIMEOUT) -> + "The session timeout is not within the range allowed by the broker (as configured by group.min.session.timeout.ms " ++ + "and group.max.session.timeout.ms)."; +get_error_description(?REBALANCE_IN_PROGRESS) -> + "The group is rebalancing, so a rejoin is needed."; +get_error_description(?INVALID_COMMIT_OFFSET_SIZE) -> + "The committing offset data size is not valid."; +get_error_description(?TOPIC_AUTHORIZATION_FAILED) -> + "Topic authorization failed."; +get_error_description(?GROUP_AUTHORIZATION_FAILED) -> + "Group authorization failed."; +get_error_description(?CLUSTER_AUTHORIZATION_FAILED) -> + "Cluster authorization failed."; +get_error_description(?INVALID_TIMESTAMP) -> + "The timestamp of the message is out of acceptable range."; +get_error_description(?UNSUPPORTED_SASL_MECHANISM) -> + "The broker does not support the requested SASL mechanism."; +get_error_description(?ILLEGAL_SASL_STATE) -> + "Request is not valid given the current SASL state."; +get_error_description(?UNSUPPORTED_VERSION) -> + "The version of API is not supported."; +get_error_description(?TOPIC_ALREADY_EXISTS) -> + "Topic with this name already exists."; +get_error_description(?INVALID_PARTITIONS) -> + "Number of partitions is below 1."; +get_error_description(?INVALID_REPLICATION_FACTOR) -> + "Replication factor is below 1 or larger than the number of available brokers."; +get_error_description(?INVALID_REPLICA_ASSIGNMENT) -> + "Replica assignment is invalid."; +get_error_description(?INVALID_CONFIG) -> + "Configuration is invalid."; +get_error_description(?NOT_CONTROLLER) -> + "This is not the correct controller for this cluster."; +get_error_description(?INVALID_REQUEST) -> + "This most likely occurs because of a request being malformed by the client library or the message was sent " ++ + "to an incompatible broker. See the broker logs for more details."; +get_error_description(?UNSUPPORTED_FOR_MESSAGE_FORMAT) -> + "The message format version on the broker does not support the request."; +get_error_description(?POLICY_VIOLATION) -> + "Request parameters do not satisfy the configured policy."; +get_error_description(?OUT_OF_ORDER_SEQUENCE_NUMBER) -> + "The broker received an out of order sequence number."; +get_error_description(?DUPLICATE_SEQUENCE_NUMBER) -> + "The broker received a duplicate sequence number."; +get_error_description(?INVALID_PRODUCER_EPOCH) -> + "Producer attempted to produce with an old epoch."; +get_error_description(?INVALID_TXN_STATE) -> + "The producer attempted a transactional operation in an invalid state."; +get_error_description(?INVALID_PRODUCER_ID_MAPPING) -> + "The producer attempted to use a producer id which is not currently assigned to its transactional id."; +get_error_description(?INVALID_TRANSACTION_TIMEOUT) -> + "The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms)."; +get_error_description(?CONCURRENT_TRANSACTIONS) -> + "The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing."; +get_error_description(?TRANSACTION_COORDINATOR_FENCED) -> + "Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer."; +get_error_description(?TRANSACTIONAL_ID_AUTHORIZATION_FAILED) -> + "Transactional Id authorization failed."; +get_error_description(?SECURITY_DISABLED) -> + "Security features are disabled."; +get_error_description(?OPERATION_NOT_ATTEMPTED) -> + "The broker did not attempt to execute this operation. This may happen for batched RPCs where some operations in " ++ + "the batch failed, causing the broker to respond without trying the rest."; +get_error_description(?KAFKA_STORAGE_ERROR) -> + "Disk error when trying to access log file on the disk."; +get_error_description(?LOG_DIR_NOT_FOUND) -> + "The user-specified log directory is not found in the broker config."; +get_error_description(?SASL_AUTHENTICATION_FAILED) -> + "SASL Authentication failed."; +get_error_description(?UNKNOWN_PRODUCER_ID) -> + "This exception is raised by the broker if it could not locate the producer metadata associated with the " ++ + "producerId in question. This could happen if, for instance, the producer's records were deleted because their " ++ + "retention time had elapsed. Once the last records of the producerId are removed, the producer's metadata is " ++ + "removed from the broker, and future appends by the producer will return this exception."; +get_error_description(?REASSIGNMENT_IN_PROGRESS) -> + "A partition reassignment is in progress."; +get_error_description(?DELEGATION_TOKEN_AUTH_DISABLED) -> + "Delegation Token feature is not enabled."; +get_error_description(?DELEGATION_TOKEN_NOT_FOUND) -> + "Delegation Token is not found on server."; +get_error_description(?DELEGATION_TOKEN_OWNER_MISMATCH) -> + "Specified Principal is not valid Owner/Renewer."; +get_error_description(?DELEGATION_TOKEN_REQUEST_NOT_ALLOWED) -> + "Delegation Token requests are not allowed on PLAINTEXT/1-way SSL channels and on delegation token authenticated channels."; +get_error_description(?DELEGATION_TOKEN_AUTHORIZATION_FAILED) -> + "Delegation Token authorization failed."; +get_error_description(?DELEGATION_TOKEN_EXPIRED) -> + "Delegation Token is expired."; +get_error_description(?INVALID_PRINCIPAL_TYPE) -> + "Supplied principalType is not supported."; +get_error_description(?NON_EMPTY_GROUP) -> + "The group is not empty."; +get_error_description(?GROUP_ID_NOT_FOUND) -> + "The group id does not exist."; +get_error_description(?FETCH_SESSION_ID_NOT_FOUND) -> + "The fetch session ID was not found."; +get_error_description(?INVALID_FETCH_SESSION_EPOCH) -> + "The fetch session epoch is invalid."; +get_error_description(?LISTENER_NOT_FOUND) -> + "There is no listener on the leader broker that matches the listener on which metadata request was processed."; +get_error_description(?TOPIC_DELETION_DISABLED) -> + "Topic deletion is disabled."; +get_error_description(?FENCED_LEADER_EPOCH) -> + "The leader epoch in the request is older than the epoch on the broker."; +get_error_description(?UNKNOWN_LEADER_EPOCH) -> + "The leader epoch in the request is newer than the epoch on the broker."; +get_error_description(?UNSUPPORTED_COMPRESSION_TYPE) -> + "The requesting client does not support the compression type of given partition."; +get_error_description(?STALE_BROKER_EPOCH) -> + "Broker epoch has changed."; +get_error_description(?OFFSET_NOT_AVAILABLE) -> + "The leader high watermark has not caught up from a recent leader election so the offsets cannot be guaranteed " ++ + "to be monotonically increasing."; +get_error_description(?MEMBER_ID_REQUIRED) -> + "The group member needs to have a valid member id before actually entering a consumer group."; +get_error_description(?PREFERRED_LEADER_NOT_AVAILABLE) -> + "The preferred leader was not available."; +get_error_description(?GROUP_MAX_SIZE_REACHED) -> + "The consumer group has reached its max size."; +get_error_description(?FENCED_INSTANCE_ID) -> + "The broker rejected this static consumer since another consumer with the same group.instance.id has registered " ++ + "with a different member.id."; +get_error_description(?ELIGIBLE_LEADERS_NOT_AVAILABLE) -> + "Eligible topic partition leaders are not available."; +get_error_description(?ELECTION_NOT_NEEDED) -> + "Leader election not needed for topic partition."; +get_error_description(?NO_REASSIGNMENT_IN_PROGRESS) -> + "No partition reassignment is in progress."; +get_error_description(?GROUP_SUBSCRIBED_TO_TOPIC) -> + "Deleting offsets of a topic is forbidden while the consumer group is actively subscribed to it."; +get_error_description(?INVALID_RECORD) -> + "This record has failed the validation on broker and hence will be rejected."; +get_error_description(?UNSTABLE_OFFSET_COMMIT) -> + "There are unstable offsets that need to be cleared."; +get_error_description(?THROTTLING_QUOTA_EXCEEDED) -> + "The throttling quota has been exceeded."; +get_error_description(?PRODUCER_FENCED) -> + "There is a newer producer with the same transactionalId which fences the current one."; +get_error_description(?RESOURCE_NOT_FOUND) -> + "A request illegally referred to a resource that does not exist."; +get_error_description(?DUPLICATE_RESOURCE) -> + "A request illegally referred to the same resource twice."; +get_error_description(?UNACCEPTABLE_CREDENTIAL) -> + "Requested credential would not meet criteria for acceptability."; +get_error_description(?INCONSISTENT_VOTER_SET) -> + "Indicates that the either the sender or recipient of a voter-only request is not one of the expected voters"; +get_error_description(?INVALID_UPDATE_VERSION) -> + "The given update version was invalid."; +get_error_description(?FEATURE_UPDATE_FAILED) -> + "Unable to update finalized features due to an unexpected server error.". -get_error_tuple(?NO_ERROR) -> - {error, no_error}; +get_error_tuple(?UNKNOWN_SERVER_ERROR) -> + {error, unknown_server_error}; +get_error_tuple(?NONE) -> + {error, none}; get_error_tuple(?OFFSET_OUT_OF_RANGE) -> {error, offset_out_of_range}; -get_error_tuple(?INVALID_MESSAGE) -> - {error, invalid_message}; +get_error_tuple(?CORRUPT_MESSAGE) -> + {error, corrupt_message}; get_error_tuple(?UNKNOWN_TOPIC_OR_PARTITION) -> {error, unknown_topic_or_partition}; -get_error_tuple(?INVALID_MESSAGE_SIZE) -> - {error, invalid_message_size}; +get_error_tuple(?INVALID_FETCH_SIZE) -> + {error, invalid_fetch_size}; get_error_tuple(?LEADER_NOT_AVAILABLE) -> {error, leader_not_available}; -get_error_tuple(?NOT_LEADER_FOR_PARTITION) -> - {error, not_leader_for_partition}; -get_error_tuple(?REQUEST_TIMEDOUT) -> - {error, request_timedout}; +get_error_tuple(?NOT_LEADER_OR_FOLLOWER) -> + {error, not_leader_or_follower}; +get_error_tuple(?REQUEST_TIMED_OUT) -> + {error, request_timed_out}; get_error_tuple(?BROKER_NOT_AVAILABLE) -> {error, broker_not_available}; get_error_tuple(?REPLICA_NOT_AVAILABLE) -> {error, replica_not_available}; -get_error_tuple(?MESSAGE_SIZE_TOO_LARGE) -> - {error, message_size_too_large}; +get_error_tuple(?MESSAGE_TOO_LARGE) -> + {error, message_too_large}; get_error_tuple(?STALE_CONTROLLER_EPOCH) -> {error, stale_controller_epoch}; get_error_tuple(?OFFSET_METADATA_TOO_LARGE) -> {error, offset_metadata_too_large}; -get_error_tuple(?OFFSETS_LOAD_IN_PROGRESS_CODE) -> - {error, offsets_load_in_progress_code}; -get_error_tuple(?CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE) -> - {error, consumer_coordinator_not_available_code}; -get_error_tuple(?NOT_COORDINATOR_FOR_CONSUMER_CODE) -> - {error, not_coordinator_for_consumer_code}; -get_error_tuple(?UNKNOWN) -> - {error, unknown}. \ No newline at end of file +get_error_tuple(?COORDINATOR_LOAD_IN_PROGRESS) -> + {error, coordinator_load_in_progress}; +get_error_tuple(?COORDINATOR_NOT_AVAILABLE) -> + {error, coordinator_not_available}; +get_error_tuple(?NOT_COORDINATOR) -> + {error, not_coordinator}; +get_error_tuple(?INVALID_TOPIC_EXCEPTION) -> + {error, invalid_topic_exception}; +get_error_tuple(?RECORD_LIST_TOO_LARGE) -> + {error, record_list_too_large}; +get_error_tuple(?NOT_ENOUGH_REPLICAS) -> + {error, not_enough_replicas}; +get_error_tuple(?NOT_ENOUGH_REPLICAS_AFTER_APPEND) -> + {error, not_enough_replicas_after_append}; +get_error_tuple(?INVALID_REQUIRED_ACKS) -> + {error, invalid_required_acks}; +get_error_tuple(?ILLEGAL_GENERATION) -> + {error, illegal_generation}; +get_error_tuple(?INCONSISTENT_GROUP_PROTOCOL) -> + {error, inconsistent_group_protocol}; +get_error_tuple(?INVALID_GROUP_ID) -> + {error, invalid_group_id}; +get_error_tuple(?UNKNOWN_MEMBER_ID) -> + {error, unknown_member_id}; +get_error_tuple(?INVALID_SESSION_TIMEOUT) -> + {error, invalid_session_timeout}; +get_error_tuple(?REBALANCE_IN_PROGRESS) -> + {error, rebalance_in_progress}; +get_error_tuple(?INVALID_COMMIT_OFFSET_SIZE) -> + {error, invalid_commit_offset_size}; +get_error_tuple(?TOPIC_AUTHORIZATION_FAILED) -> + {error, topic_authorization_failed}; +get_error_tuple(?GROUP_AUTHORIZATION_FAILED) -> + {error, group_authorization_failed}; +get_error_tuple(?CLUSTER_AUTHORIZATION_FAILED) -> + {error, cluster_authorization_failed}; +get_error_tuple(?INVALID_TIMESTAMP) -> + {error, invalid_timestamp}; +get_error_tuple(?UNSUPPORTED_SASL_MECHANISM) -> + {error, unsupported_sasl_mechanism}; +get_error_tuple(?ILLEGAL_SASL_STATE) -> + {error, illegal_sasl_state}; +get_error_tuple(?UNSUPPORTED_VERSION) -> + {error, unsupported_version}; +get_error_tuple(?TOPIC_ALREADY_EXISTS) -> + {error, topic_already_exists}; +get_error_tuple(?INVALID_PARTITIONS) -> + {error, invalid_partitions}; +get_error_tuple(?INVALID_REPLICATION_FACTOR) -> + {error, invalid_replication_factor}; +get_error_tuple(?INVALID_REPLICA_ASSIGNMENT) -> + {error, invalid_replica_assignment}; +get_error_tuple(?INVALID_CONFIG) -> + {error, invalid_config}; +get_error_tuple(?NOT_CONTROLLER) -> + {error, not_controller}; +get_error_tuple(?INVALID_REQUEST) -> + {error, invalid_request}; +get_error_tuple(?UNSUPPORTED_FOR_MESSAGE_FORMAT) -> + {error, unsupported_for_message_format}; +get_error_tuple(?POLICY_VIOLATION) -> + {error, policy_violation}; +get_error_tuple(?OUT_OF_ORDER_SEQUENCE_NUMBER) -> + {error, out_of_order_sequence_number}; +get_error_tuple(?DUPLICATE_SEQUENCE_NUMBER) -> + {error, duplicate_sequence_number}; +get_error_tuple(?INVALID_PRODUCER_EPOCH) -> + {error, invalid_producer_epoch}; +get_error_tuple(?INVALID_TXN_STATE) -> + {error, invalid_txn_state}; +get_error_tuple(?INVALID_PRODUCER_ID_MAPPING) -> + {error, invalid_producer_id_mapping}; +get_error_tuple(?INVALID_TRANSACTION_TIMEOUT) -> + {error, invalid_transaction_timeout}; +get_error_tuple(?CONCURRENT_TRANSACTIONS) -> + {error, concurrent_transactions}; +get_error_tuple(?TRANSACTION_COORDINATOR_FENCED) -> + {error, transaction_coordinator_fenced}; +get_error_tuple(?TRANSACTIONAL_ID_AUTHORIZATION_FAILED) -> + {error, transactional_id_authorization_failed}; +get_error_tuple(?SECURITY_DISABLED) -> + {error, security_disabled}; +get_error_tuple(?OPERATION_NOT_ATTEMPTED) -> + {error, operation_not_attempted}; +get_error_tuple(?KAFKA_STORAGE_ERROR) -> + {error, kafka_storage_error}; +get_error_tuple(?LOG_DIR_NOT_FOUND) -> + {error, log_dir_not_found}; +get_error_tuple(?SASL_AUTHENTICATION_FAILED) -> + {error, sasl_authentication_failed}; +get_error_tuple(?UNKNOWN_PRODUCER_ID) -> + {error, unknown_producer_id}; +get_error_tuple(?REASSIGNMENT_IN_PROGRESS) -> + {error, reassignment_in_progress}; +get_error_tuple(?DELEGATION_TOKEN_AUTH_DISABLED) -> + {error, delegation_token_auth_disabled}; +get_error_tuple(?DELEGATION_TOKEN_NOT_FOUND) -> + {error, delegation_token_not_found}; +get_error_tuple(?DELEGATION_TOKEN_OWNER_MISMATCH) -> + {error, delegation_token_owner_mismatch}; +get_error_tuple(?DELEGATION_TOKEN_REQUEST_NOT_ALLOWED) -> + {error, delegation_token_request_not_allowed}; +get_error_tuple(?DELEGATION_TOKEN_AUTHORIZATION_FAILED) -> + {error, delegation_token_authorization_failed}; +get_error_tuple(?DELEGATION_TOKEN_EXPIRED) -> + {error, delegation_token_expired}; +get_error_tuple(?INVALID_PRINCIPAL_TYPE) -> + {error, invalid_principal_type}; +get_error_tuple(?NON_EMPTY_GROUP) -> + {error, non_empty_group}; +get_error_tuple(?GROUP_ID_NOT_FOUND) -> + {error, group_id_not_found}; +get_error_tuple(?FETCH_SESSION_ID_NOT_FOUND) -> + {error, fetch_session_id_not_found}; +get_error_tuple(?INVALID_FETCH_SESSION_EPOCH) -> + {error, invalid_fetch_session_epoch}; +get_error_tuple(?LISTENER_NOT_FOUND) -> + {error, listener_not_found}; +get_error_tuple(?TOPIC_DELETION_DISABLED) -> + {error, topic_deletion_disabled}; +get_error_tuple(?FENCED_LEADER_EPOCH) -> + {error, fenced_leader_epoch}; +get_error_tuple(?UNKNOWN_LEADER_EPOCH) -> + {error, unknown_leader_epoch}; +get_error_tuple(?UNSUPPORTED_COMPRESSION_TYPE) -> + {error, unsupported_compression_type}; +get_error_tuple(?STALE_BROKER_EPOCH) -> + {error, stale_broker_epoch}; +get_error_tuple(?OFFSET_NOT_AVAILABLE) -> + {error, offset_not_available}; +get_error_tuple(?MEMBER_ID_REQUIRED) -> + {error, member_id_required}; +get_error_tuple(?PREFERRED_LEADER_NOT_AVAILABLE) -> + {error, preferred_leader_not_available}; +get_error_tuple(?GROUP_MAX_SIZE_REACHED) -> + {error, group_max_size_reached}; +get_error_tuple(?FENCED_INSTANCE_ID) -> + {error, fenced_instance_id}; +get_error_tuple(?ELIGIBLE_LEADERS_NOT_AVAILABLE) -> + {error, eligible_leaders_not_available}; +get_error_tuple(?ELECTION_NOT_NEEDED) -> + {error, election_not_needed}; +get_error_tuple(?NO_REASSIGNMENT_IN_PROGRESS) -> + {error, no_reassignment_in_progress}; +get_error_tuple(?GROUP_SUBSCRIBED_TO_TOPIC) -> + {error, group_subscribed_to_topic}; +get_error_tuple(?INVALID_RECORD) -> + {error, invalid_record}; +get_error_tuple(?UNSTABLE_OFFSET_COMMIT) -> + {error, unstable_offset_commit}; +get_error_tuple(?THROTTLING_QUOTA_EXCEEDED) -> + {error, throttling_quota_exceeded}; +get_error_tuple(?PRODUCER_FENCED) -> + {error, producer_fenced}; +get_error_tuple(?RESOURCE_NOT_FOUND) -> + {error, resource_not_found}; +get_error_tuple(?DUPLICATE_RESOURCE) -> + {error, duplicate_resource}; +get_error_tuple(?UNACCEPTABLE_CREDENTIAL) -> + {error, unacceptable_credential}; +get_error_tuple(?INCONSISTENT_VOTER_SET) -> + {error, inconsistent_voter_set}; +get_error_tuple(?INVALID_UPDATE_VERSION) -> + {error, invalid_update_version}; +get_error_tuple(?FEATURE_UPDATE_FAILED) -> + {error, feature_update_failed}. diff --git a/src/kafkerl_metadata_handler.erl b/src/kafkerl_metadata_handler.erl index ae4c66c..a601424 100644 --- a/src/kafkerl_metadata_handler.erl +++ b/src/kafkerl_metadata_handler.erl @@ -282,7 +282,7 @@ get_topic_mapping({BrokerMetadata, TopicMetadata}) -> end end, Partitions). -expand_topic({?NO_ERROR, Topic, Partitions}) -> +expand_topic({?NONE, Topic, Partitions}) -> {true, {Topic, Partitions}}; expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> % Replica not available can be ignored, still, show a warning @@ -299,7 +299,7 @@ expand_partitions(Metadata) -> expand_partitions({_Topic, []}, Acc) -> {true, Acc}; -expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> +expand_partitions({Topic, [{?NONE, Partition, Leader, _, _} | T]}, Acc) -> ExpandedPartition = {{Topic, Partition}, Leader}, expand_partitions({Topic, T}, [ExpandedPartition | Acc]); expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, From f5cdb2d5452c89fcf1ab1efe58a54a872a15d78a Mon Sep 17 00:00:00 2001 From: HaoJiang Date: Mon, 29 Mar 2021 11:22:11 +0800 Subject: [PATCH 71/72] Hao/TS-9303 add missing codes --- include/kafkerl.hrl | 1 + src/kafkerl_error.erl | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/include/kafkerl.hrl b/include/kafkerl.hrl index 020f726..c6d9fc6 100644 --- a/include/kafkerl.hrl +++ b/include/kafkerl.hrl @@ -13,6 +13,7 @@ -define(MESSAGE_TOO_LARGE, 10). -define(STALE_CONTROLLER_EPOCH, 11). -define(OFFSET_METADATA_TOO_LARGE, 12). +-define(NETWORK_EXCEPTION, 13). -define(COORDINATOR_LOAD_IN_PROGRESS, 14). -define(COORDINATOR_NOT_AVAILABLE, 15). -define(NOT_COORDINATOR, 16). diff --git a/src/kafkerl_error.erl b/src/kafkerl_error.erl index 3a35be9..0bc9ef9 100644 --- a/src/kafkerl_error.erl +++ b/src/kafkerl_error.erl @@ -36,6 +36,8 @@ get_error_name(?STALE_CONTROLLER_EPOCH) -> "StaleControllerEpoch"; get_error_name(?OFFSET_METADATA_TOO_LARGE) -> "OffsetMetadataTooLarge"; +get_error_name(?NETWORK_EXCEPTION) -> + "NetworkException"; get_error_name(?COORDINATOR_LOAD_IN_PROGRESS) -> "CoordinatorLoadInProgress"; get_error_name(?COORDINATOR_NOT_AVAILABLE) -> @@ -236,6 +238,8 @@ get_error_description(?STALE_CONTROLLER_EPOCH) -> "The controller moved to another broker."; get_error_description(?OFFSET_METADATA_TOO_LARGE) -> "The metadata field of the offset request was too large."; +get_error_description(?NETWORK_EXCEPTION) -> + "The server disconnected before a response was received."; get_error_description(?COORDINATOR_LOAD_IN_PROGRESS) -> "The coordinator is loading and hence can't process requests."; get_error_description(?COORDINATOR_NOT_AVAILABLE) -> @@ -440,6 +444,8 @@ get_error_tuple(?STALE_CONTROLLER_EPOCH) -> {error, stale_controller_epoch}; get_error_tuple(?OFFSET_METADATA_TOO_LARGE) -> {error, offset_metadata_too_large}; +get_error_tuple(?NETWORK_EXCEPTION) -> + {error, network_exception}; get_error_tuple(?COORDINATOR_LOAD_IN_PROGRESS) -> {error, coordinator_load_in_progress}; get_error_tuple(?COORDINATOR_NOT_AVAILABLE) -> From 145f13780b5cc0a6ec2ec31e6a3e6b58b7d730b1 Mon Sep 17 00:00:00 2001 From: HaoJiang Date: Tue, 30 Mar 2021 11:06:33 +0800 Subject: [PATCH 72/72] Hao/TS-9303 edit tab --- src/kafkerl_error.erl | 616 +++++++++++++++++++++--------------------- 1 file changed, 308 insertions(+), 308 deletions(-) diff --git a/src/kafkerl_error.erl b/src/kafkerl_error.erl index 0bc9ef9..cf3a514 100644 --- a/src/kafkerl_error.erl +++ b/src/kafkerl_error.erl @@ -9,606 +9,606 @@ %% API %%============================================================================== get_error_name(?UNKNOWN_SERVER_ERROR) -> - "UnknownServerError"; + "UnknownServerError"; get_error_name(?NONE) -> - "None"; + "None"; get_error_name(?OFFSET_OUT_OF_RANGE) -> - "OffsetOutOfRange"; + "OffsetOutOfRange"; get_error_name(?CORRUPT_MESSAGE) -> - "CorruptMessage"; + "CorruptMessage"; get_error_name(?UNKNOWN_TOPIC_OR_PARTITION) -> - "UnknownTopicOrPartition"; + "UnknownTopicOrPartition"; get_error_name(?INVALID_FETCH_SIZE) -> - "InvalidFetchSize"; + "InvalidFetchSize"; get_error_name(?LEADER_NOT_AVAILABLE) -> - "LeaderNotAvailable"; + "LeaderNotAvailable"; get_error_name(?NOT_LEADER_OR_FOLLOWER) -> - "NotLeaderOrFollower"; + "NotLeaderOrFollower"; get_error_name(?REQUEST_TIMED_OUT) -> - "RequestTimedOut"; + "RequestTimedOut"; get_error_name(?BROKER_NOT_AVAILABLE) -> - "BrokerNotAvailable"; + "BrokerNotAvailable"; get_error_name(?REPLICA_NOT_AVAILABLE) -> - "ReplicaNotAvailable"; + "ReplicaNotAvailable"; get_error_name(?MESSAGE_TOO_LARGE) -> - "MessageTooLarge"; + "MessageTooLarge"; get_error_name(?STALE_CONTROLLER_EPOCH) -> - "StaleControllerEpoch"; + "StaleControllerEpoch"; get_error_name(?OFFSET_METADATA_TOO_LARGE) -> - "OffsetMetadataTooLarge"; + "OffsetMetadataTooLarge"; get_error_name(?NETWORK_EXCEPTION) -> - "NetworkException"; + "NetworkException"; get_error_name(?COORDINATOR_LOAD_IN_PROGRESS) -> - "CoordinatorLoadInProgress"; + "CoordinatorLoadInProgress"; get_error_name(?COORDINATOR_NOT_AVAILABLE) -> - "CoordinatorNotAvailable"; + "CoordinatorNotAvailable"; get_error_name(?NOT_COORDINATOR) -> - "NotCoordinator"; + "NotCoordinator"; get_error_name(?INVALID_TOPIC_EXCEPTION) -> - "InvalidTopicException"; + "InvalidTopicException"; get_error_name(?RECORD_LIST_TOO_LARGE) -> - "RecordListTooLarge"; + "RecordListTooLarge"; get_error_name(?NOT_ENOUGH_REPLICAS) -> - "NotEnoughReplicas"; + "NotEnoughReplicas"; get_error_name(?NOT_ENOUGH_REPLICAS_AFTER_APPEND) -> - "NotEnoughReplicasAfterAppend"; + "NotEnoughReplicasAfterAppend"; get_error_name(?INVALID_REQUIRED_ACKS) -> - "InvalidRequiredAcks"; + "InvalidRequiredAcks"; get_error_name(?ILLEGAL_GENERATION) -> - "IllegalGeneration"; + "IllegalGeneration"; get_error_name(?INCONSISTENT_GROUP_PROTOCOL) -> - "InconsistentGroupProtocol"; + "InconsistentGroupProtocol"; get_error_name(?INVALID_GROUP_ID) -> - "InvalidGroupId"; + "InvalidGroupId"; get_error_name(?UNKNOWN_MEMBER_ID) -> - "UnknownMemberId"; + "UnknownMemberId"; get_error_name(?INVALID_SESSION_TIMEOUT) -> - "InvalidSessionTimeout"; + "InvalidSessionTimeout"; get_error_name(?REBALANCE_IN_PROGRESS) -> - "RebalanceInProgress"; + "RebalanceInProgress"; get_error_name(?INVALID_COMMIT_OFFSET_SIZE) -> - "InvalidCommitOffsetSize"; + "InvalidCommitOffsetSize"; get_error_name(?TOPIC_AUTHORIZATION_FAILED) -> - "TopicAuthorizationFailed"; + "TopicAuthorizationFailed"; get_error_name(?GROUP_AUTHORIZATION_FAILED) -> - "GroupAuthorizationFailed"; + "GroupAuthorizationFailed"; get_error_name(?CLUSTER_AUTHORIZATION_FAILED) -> - "ClusterAuthorizationFailed"; + "ClusterAuthorizationFailed"; get_error_name(?INVALID_TIMESTAMP) -> - "InvalidTimestamp"; + "InvalidTimestamp"; get_error_name(?UNSUPPORTED_SASL_MECHANISM) -> - "UnsupportedSaslMechanism"; + "UnsupportedSaslMechanism"; get_error_name(?ILLEGAL_SASL_STATE) -> - "IllegalSaslState"; + "IllegalSaslState"; get_error_name(?UNSUPPORTED_VERSION) -> - "UnsupportedVersion"; + "UnsupportedVersion"; get_error_name(?TOPIC_ALREADY_EXISTS) -> - "TopicAlreadyExists"; + "TopicAlreadyExists"; get_error_name(?INVALID_PARTITIONS) -> - "InvalidPartitions"; + "InvalidPartitions"; get_error_name(?INVALID_REPLICATION_FACTOR) -> - "InvalidReplicationFactor"; + "InvalidReplicationFactor"; get_error_name(?INVALID_REPLICA_ASSIGNMENT) -> - "InvalidReplicaAssignment"; + "InvalidReplicaAssignment"; get_error_name(?INVALID_CONFIG) -> - "InvalidConfig"; + "InvalidConfig"; get_error_name(?NOT_CONTROLLER) -> - "NotController"; + "NotController"; get_error_name(?INVALID_REQUEST) -> - "InvalidRequest"; + "InvalidRequest"; get_error_name(?UNSUPPORTED_FOR_MESSAGE_FORMAT) -> - "UnsupportedForMessageFormat"; + "UnsupportedForMessageFormat"; get_error_name(?POLICY_VIOLATION) -> - "PolicyViolation"; + "PolicyViolation"; get_error_name(?OUT_OF_ORDER_SEQUENCE_NUMBER) -> - "OutOfOrderSequenceNumber"; + "OutOfOrderSequenceNumber"; get_error_name(?DUPLICATE_SEQUENCE_NUMBER) -> - "DuplicateSequenceNumber"; + "DuplicateSequenceNumber"; get_error_name(?INVALID_PRODUCER_EPOCH) -> - "InvalidProducerEpoch"; + "InvalidProducerEpoch"; get_error_name(?INVALID_TXN_STATE) -> - "InvalidTxnState"; + "InvalidTxnState"; get_error_name(?INVALID_PRODUCER_ID_MAPPING) -> - "InvalidProducerIdMapping"; + "InvalidProducerIdMapping"; get_error_name(?INVALID_TRANSACTION_TIMEOUT) -> - "InvalidTransactionTimeout"; + "InvalidTransactionTimeout"; get_error_name(?CONCURRENT_TRANSACTIONS) -> - "ConcurrentTransactions"; + "ConcurrentTransactions"; get_error_name(?TRANSACTION_COORDINATOR_FENCED) -> - "TransactionCoordinatorFenced"; + "TransactionCoordinatorFenced"; get_error_name(?TRANSACTIONAL_ID_AUTHORIZATION_FAILED) -> - "TransactionalIdAuthorizationFailed"; + "TransactionalIdAuthorizationFailed"; get_error_name(?SECURITY_DISABLED) -> - "SecurityDisabled"; + "SecurityDisabled"; get_error_name(?OPERATION_NOT_ATTEMPTED) -> - "OperationNotAttempted"; + "OperationNotAttempted"; get_error_name(?KAFKA_STORAGE_ERROR) -> - "KafkaSotrageError"; + "KafkaSotrageError"; get_error_name(?LOG_DIR_NOT_FOUND) -> - "LogDirNotFound"; + "LogDirNotFound"; get_error_name(?SASL_AUTHENTICATION_FAILED) -> - "SaslAuthenticationFailed"; + "SaslAuthenticationFailed"; get_error_name(?UNKNOWN_PRODUCER_ID) -> - "UnknownProducerId"; + "UnknownProducerId"; get_error_name(?REASSIGNMENT_IN_PROGRESS) -> - "ReassignmentInProgress"; + "ReassignmentInProgress"; get_error_name(?DELEGATION_TOKEN_AUTH_DISABLED) -> - "DelegationTokenAuthDisabled"; + "DelegationTokenAuthDisabled"; get_error_name(?DELEGATION_TOKEN_NOT_FOUND) -> - "DelegationTokenNotFound"; + "DelegationTokenNotFound"; get_error_name(?DELEGATION_TOKEN_OWNER_MISMATCH) -> - "DelegationTokenOwnerMismatch"; + "DelegationTokenOwnerMismatch"; get_error_name(?DELEGATION_TOKEN_REQUEST_NOT_ALLOWED) -> - "DelegationTokenRequestNotAllowed"; + "DelegationTokenRequestNotAllowed"; get_error_name(?DELEGATION_TOKEN_AUTHORIZATION_FAILED) -> - "DelegationTokenAuthorizationFailed"; + "DelegationTokenAuthorizationFailed"; get_error_name(?DELEGATION_TOKEN_EXPIRED) -> - "DelegationTokenExpired"; + "DelegationTokenExpired"; get_error_name(?INVALID_PRINCIPAL_TYPE) -> - "InvalidPrincipalType"; + "InvalidPrincipalType"; get_error_name(?NON_EMPTY_GROUP) -> - "NonEmptyGroup"; + "NonEmptyGroup"; get_error_name(?GROUP_ID_NOT_FOUND) -> - "GroupIdNotFound"; + "GroupIdNotFound"; get_error_name(?FETCH_SESSION_ID_NOT_FOUND) -> - "FetchSessionIdNotFound"; + "FetchSessionIdNotFound"; get_error_name(?INVALID_FETCH_SESSION_EPOCH) -> - "InvalidFetchSessionEpoch"; + "InvalidFetchSessionEpoch"; get_error_name(?LISTENER_NOT_FOUND) -> - "ListenerNotFound"; + "ListenerNotFound"; get_error_name(?TOPIC_DELETION_DISABLED) -> - "TopicDeletionDisabled"; + "TopicDeletionDisabled"; get_error_name(?FENCED_LEADER_EPOCH) -> - "FencedLeaderEpoch"; + "FencedLeaderEpoch"; get_error_name(?UNKNOWN_LEADER_EPOCH) -> - "UnknownLeaderEpoch"; + "UnknownLeaderEpoch"; get_error_name(?UNSUPPORTED_COMPRESSION_TYPE) -> - "UnsupportedCompressionType"; + "UnsupportedCompressionType"; get_error_name(?STALE_BROKER_EPOCH) -> - "StaleBrokerEpoch"; + "StaleBrokerEpoch"; get_error_name(?OFFSET_NOT_AVAILABLE) -> - "OffsetNotAvailable"; + "OffsetNotAvailable"; get_error_name(?MEMBER_ID_REQUIRED) -> - "MemberIdRequired"; + "MemberIdRequired"; get_error_name(?PREFERRED_LEADER_NOT_AVAILABLE) -> - "PreferedLeaderNotAvailable"; + "PreferedLeaderNotAvailable"; get_error_name(?GROUP_MAX_SIZE_REACHED) -> - "GroupMaxSizeReached"; + "GroupMaxSizeReached"; get_error_name(?FENCED_INSTANCE_ID) -> - "FencedInstanceId"; + "FencedInstanceId"; get_error_name(?ELIGIBLE_LEADERS_NOT_AVAILABLE) -> - "EligibleLeadersNotAvailable"; + "EligibleLeadersNotAvailable"; get_error_name(?ELECTION_NOT_NEEDED) -> - "ElectionNotNeeded"; + "ElectionNotNeeded"; get_error_name(?NO_REASSIGNMENT_IN_PROGRESS) -> - "NoReassignmentInProgress"; + "NoReassignmentInProgress"; get_error_name(?GROUP_SUBSCRIBED_TO_TOPIC) -> - "GroupSubscribedToTopic"; + "GroupSubscribedToTopic"; get_error_name(?INVALID_RECORD) -> - "InvalidRecord"; + "InvalidRecord"; get_error_name(?UNSTABLE_OFFSET_COMMIT) -> - "UnstableOffsetCommit"; + "UnstableOffsetCommit"; get_error_name(?THROTTLING_QUOTA_EXCEEDED) -> - "ThrottlingQuotaExceeded"; + "ThrottlingQuotaExceeded"; get_error_name(?PRODUCER_FENCED) -> - "ProducerFenced"; + "ProducerFenced"; get_error_name(?RESOURCE_NOT_FOUND) -> - "ResourceNotFound"; + "ResourceNotFound"; get_error_name(?DUPLICATE_RESOURCE) -> - "DuplicateResource"; + "DuplicateResource"; get_error_name(?UNACCEPTABLE_CREDENTIAL) -> - "UnacceptableCredential"; + "UnacceptableCredential"; get_error_name(?INCONSISTENT_VOTER_SET) -> - "InconsistentVoterSet"; + "InconsistentVoterSet"; get_error_name(?INVALID_UPDATE_VERSION) -> - "InvalidUpdateVersion"; + "InvalidUpdateVersion"; get_error_name(?FEATURE_UPDATE_FAILED) -> - "FeatureUpdateFailed". + "FeatureUpdateFailed". get_error_description(?UNKNOWN_SERVER_ERROR) -> - "The server experienced an unexpected error when processing the request."; + "The server experienced an unexpected error when processing the request."; get_error_description(?NONE) -> - "None"; + "None"; get_error_description(?OFFSET_OUT_OF_RANGE) -> - "The requested offset is not within the range of offsets maintained by the server."; + "The requested offset is not within the range of offsets maintained by the server."; get_error_description(?CORRUPT_MESSAGE) -> - "This message has failed its CRC checksum, exceeds the valid size, has a null key " ++ - "for a compacted topic, or is otherwise corrupt."; + "This message has failed its CRC checksum, exceeds the valid size, has a null key " ++ + "for a compacted topic, or is otherwise corrupt."; get_error_description(?UNKNOWN_TOPIC_OR_PARTITION) -> - "This server does not host this topic-partition."; + "This server does not host this topic-partition."; get_error_description(?INVALID_FETCH_SIZE) -> - "The requested fetch size is invalid."; + "The requested fetch size is invalid."; get_error_description(?LEADER_NOT_AVAILABLE) -> - "There is no leader for this topic-partition as we are in the middle of a leadership election."; + "There is no leader for this topic-partition as we are in the middle of a leadership election."; get_error_description(?NOT_LEADER_OR_FOLLOWER) -> - "For requests intended only for the leader, this error indicates that the broker is not " ++ - "the current leader. For requests intended for any replica, this error indicates that the " ++ - "broker is not a replica of the topic partition."; + "For requests intended only for the leader, this error indicates that the broker is not " ++ + "the current leader. For requests intended for any replica, this error indicates that the " ++ + "broker is not a replica of the topic partition."; get_error_description(?REQUEST_TIMED_OUT) -> - "The request timed out."; + "The request timed out."; get_error_description(?BROKER_NOT_AVAILABLE) -> - "The broker is not available."; + "The broker is not available."; get_error_description(?REPLICA_NOT_AVAILABLE) -> - "The replica is not available for the requested topic-partition. Produce/Fetch requests and " ++ - "other requests intended only for the leader or follower return NOT_LEADER_OR_FOLLOWER if the " ++ - "broker is not a replica of the topic-partition."; + "The replica is not available for the requested topic-partition. Produce/Fetch requests and " ++ + "other requests intended only for the leader or follower return NOT_LEADER_OR_FOLLOWER if the " ++ + "broker is not a replica of the topic-partition."; get_error_description(?MESSAGE_TOO_LARGE) -> - "The request included a message larger than the max message size the server will accept."; + "The request included a message larger than the max message size the server will accept."; get_error_description(?STALE_CONTROLLER_EPOCH) -> - "The controller moved to another broker."; + "The controller moved to another broker."; get_error_description(?OFFSET_METADATA_TOO_LARGE) -> - "The metadata field of the offset request was too large."; + "The metadata field of the offset request was too large."; get_error_description(?NETWORK_EXCEPTION) -> - "The server disconnected before a response was received."; + "The server disconnected before a response was received."; get_error_description(?COORDINATOR_LOAD_IN_PROGRESS) -> - "The coordinator is loading and hence can't process requests."; + "The coordinator is loading and hence can't process requests."; get_error_description(?COORDINATOR_NOT_AVAILABLE) -> - "The coordinator is not available."; + "The coordinator is not available."; get_error_description(?NOT_COORDINATOR) -> - "This is not the correct coordinator."; + "This is not the correct coordinator."; get_error_description(?INVALID_TOPIC_EXCEPTION) -> - "The request attempted to perform an operation on an invalid topic."; + "The request attempted to perform an operation on an invalid topic."; get_error_description(?RECORD_LIST_TOO_LARGE) -> - "The request included message batch larger than the configured segment size on the server."; + "The request included message batch larger than the configured segment size on the server."; get_error_description(?NOT_ENOUGH_REPLICAS) -> - "Messages are rejected since there are fewer in-sync replicas than required."; + "Messages are rejected since there are fewer in-sync replicas than required."; get_error_description(?NOT_ENOUGH_REPLICAS_AFTER_APPEND) -> - "Messages are written to the log, but to fewer in-sync replicas than required."; + "Messages are written to the log, but to fewer in-sync replicas than required."; get_error_description(?INVALID_REQUIRED_ACKS) -> - "Produce request specified an invalid value for required acks."; + "Produce request specified an invalid value for required acks."; get_error_description(?ILLEGAL_GENERATION) -> - "Specified group generation id is not valid."; + "Specified group generation id is not valid."; get_error_description(?INCONSISTENT_GROUP_PROTOCOL) -> - "The group member's supported protocols are incompatible with those of existing members or first " ++ - "group member tried to join with empty protocol type or empty protocol list."; + "The group member's supported protocols are incompatible with those of existing members or first " ++ + "group member tried to join with empty protocol type or empty protocol list."; get_error_description(?INVALID_GROUP_ID) -> - "The configured groupId is invalid."; + "The configured groupId is invalid."; get_error_description(?UNKNOWN_MEMBER_ID) -> - "The coordinator is not aware of this member."; + "The coordinator is not aware of this member."; get_error_description(?INVALID_SESSION_TIMEOUT) -> - "The session timeout is not within the range allowed by the broker (as configured by group.min.session.timeout.ms " ++ - "and group.max.session.timeout.ms)."; + "The session timeout is not within the range allowed by the broker (as configured by group.min.session.timeout.ms " ++ + "and group.max.session.timeout.ms)."; get_error_description(?REBALANCE_IN_PROGRESS) -> - "The group is rebalancing, so a rejoin is needed."; + "The group is rebalancing, so a rejoin is needed."; get_error_description(?INVALID_COMMIT_OFFSET_SIZE) -> - "The committing offset data size is not valid."; + "The committing offset data size is not valid."; get_error_description(?TOPIC_AUTHORIZATION_FAILED) -> - "Topic authorization failed."; + "Topic authorization failed."; get_error_description(?GROUP_AUTHORIZATION_FAILED) -> - "Group authorization failed."; + "Group authorization failed."; get_error_description(?CLUSTER_AUTHORIZATION_FAILED) -> - "Cluster authorization failed."; + "Cluster authorization failed."; get_error_description(?INVALID_TIMESTAMP) -> - "The timestamp of the message is out of acceptable range."; + "The timestamp of the message is out of acceptable range."; get_error_description(?UNSUPPORTED_SASL_MECHANISM) -> - "The broker does not support the requested SASL mechanism."; + "The broker does not support the requested SASL mechanism."; get_error_description(?ILLEGAL_SASL_STATE) -> - "Request is not valid given the current SASL state."; + "Request is not valid given the current SASL state."; get_error_description(?UNSUPPORTED_VERSION) -> - "The version of API is not supported."; + "The version of API is not supported."; get_error_description(?TOPIC_ALREADY_EXISTS) -> - "Topic with this name already exists."; + "Topic with this name already exists."; get_error_description(?INVALID_PARTITIONS) -> - "Number of partitions is below 1."; + "Number of partitions is below 1."; get_error_description(?INVALID_REPLICATION_FACTOR) -> - "Replication factor is below 1 or larger than the number of available brokers."; + "Replication factor is below 1 or larger than the number of available brokers."; get_error_description(?INVALID_REPLICA_ASSIGNMENT) -> - "Replica assignment is invalid."; + "Replica assignment is invalid."; get_error_description(?INVALID_CONFIG) -> - "Configuration is invalid."; + "Configuration is invalid."; get_error_description(?NOT_CONTROLLER) -> - "This is not the correct controller for this cluster."; + "This is not the correct controller for this cluster."; get_error_description(?INVALID_REQUEST) -> - "This most likely occurs because of a request being malformed by the client library or the message was sent " ++ - "to an incompatible broker. See the broker logs for more details."; + "This most likely occurs because of a request being malformed by the client library or the message was sent " ++ + "to an incompatible broker. See the broker logs for more details."; get_error_description(?UNSUPPORTED_FOR_MESSAGE_FORMAT) -> - "The message format version on the broker does not support the request."; + "The message format version on the broker does not support the request."; get_error_description(?POLICY_VIOLATION) -> - "Request parameters do not satisfy the configured policy."; + "Request parameters do not satisfy the configured policy."; get_error_description(?OUT_OF_ORDER_SEQUENCE_NUMBER) -> - "The broker received an out of order sequence number."; + "The broker received an out of order sequence number."; get_error_description(?DUPLICATE_SEQUENCE_NUMBER) -> - "The broker received a duplicate sequence number."; + "The broker received a duplicate sequence number."; get_error_description(?INVALID_PRODUCER_EPOCH) -> - "Producer attempted to produce with an old epoch."; + "Producer attempted to produce with an old epoch."; get_error_description(?INVALID_TXN_STATE) -> - "The producer attempted a transactional operation in an invalid state."; + "The producer attempted a transactional operation in an invalid state."; get_error_description(?INVALID_PRODUCER_ID_MAPPING) -> - "The producer attempted to use a producer id which is not currently assigned to its transactional id."; + "The producer attempted to use a producer id which is not currently assigned to its transactional id."; get_error_description(?INVALID_TRANSACTION_TIMEOUT) -> - "The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms)."; + "The transaction timeout is larger than the maximum value allowed by the broker (as configured by transaction.max.timeout.ms)."; get_error_description(?CONCURRENT_TRANSACTIONS) -> - "The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing."; + "The producer attempted to update a transaction while another concurrent operation on the same transaction was ongoing."; get_error_description(?TRANSACTION_COORDINATOR_FENCED) -> - "Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer."; + "Indicates that the transaction coordinator sending a WriteTxnMarker is no longer the current coordinator for a given producer."; get_error_description(?TRANSACTIONAL_ID_AUTHORIZATION_FAILED) -> - "Transactional Id authorization failed."; + "Transactional Id authorization failed."; get_error_description(?SECURITY_DISABLED) -> - "Security features are disabled."; + "Security features are disabled."; get_error_description(?OPERATION_NOT_ATTEMPTED) -> - "The broker did not attempt to execute this operation. This may happen for batched RPCs where some operations in " ++ - "the batch failed, causing the broker to respond without trying the rest."; + "The broker did not attempt to execute this operation. This may happen for batched RPCs where some operations in " ++ + "the batch failed, causing the broker to respond without trying the rest."; get_error_description(?KAFKA_STORAGE_ERROR) -> - "Disk error when trying to access log file on the disk."; + "Disk error when trying to access log file on the disk."; get_error_description(?LOG_DIR_NOT_FOUND) -> - "The user-specified log directory is not found in the broker config."; + "The user-specified log directory is not found in the broker config."; get_error_description(?SASL_AUTHENTICATION_FAILED) -> - "SASL Authentication failed."; + "SASL Authentication failed."; get_error_description(?UNKNOWN_PRODUCER_ID) -> - "This exception is raised by the broker if it could not locate the producer metadata associated with the " ++ - "producerId in question. This could happen if, for instance, the producer's records were deleted because their " ++ - "retention time had elapsed. Once the last records of the producerId are removed, the producer's metadata is " ++ - "removed from the broker, and future appends by the producer will return this exception."; + "This exception is raised by the broker if it could not locate the producer metadata associated with the " ++ + "producerId in question. This could happen if, for instance, the producer's records were deleted because their " ++ + "retention time had elapsed. Once the last records of the producerId are removed, the producer's metadata is " ++ + "removed from the broker, and future appends by the producer will return this exception."; get_error_description(?REASSIGNMENT_IN_PROGRESS) -> - "A partition reassignment is in progress."; + "A partition reassignment is in progress."; get_error_description(?DELEGATION_TOKEN_AUTH_DISABLED) -> - "Delegation Token feature is not enabled."; + "Delegation Token feature is not enabled."; get_error_description(?DELEGATION_TOKEN_NOT_FOUND) -> - "Delegation Token is not found on server."; + "Delegation Token is not found on server."; get_error_description(?DELEGATION_TOKEN_OWNER_MISMATCH) -> - "Specified Principal is not valid Owner/Renewer."; + "Specified Principal is not valid Owner/Renewer."; get_error_description(?DELEGATION_TOKEN_REQUEST_NOT_ALLOWED) -> - "Delegation Token requests are not allowed on PLAINTEXT/1-way SSL channels and on delegation token authenticated channels."; + "Delegation Token requests are not allowed on PLAINTEXT/1-way SSL channels and on delegation token authenticated channels."; get_error_description(?DELEGATION_TOKEN_AUTHORIZATION_FAILED) -> - "Delegation Token authorization failed."; + "Delegation Token authorization failed."; get_error_description(?DELEGATION_TOKEN_EXPIRED) -> - "Delegation Token is expired."; + "Delegation Token is expired."; get_error_description(?INVALID_PRINCIPAL_TYPE) -> - "Supplied principalType is not supported."; + "Supplied principalType is not supported."; get_error_description(?NON_EMPTY_GROUP) -> - "The group is not empty."; + "The group is not empty."; get_error_description(?GROUP_ID_NOT_FOUND) -> - "The group id does not exist."; + "The group id does not exist."; get_error_description(?FETCH_SESSION_ID_NOT_FOUND) -> - "The fetch session ID was not found."; + "The fetch session ID was not found."; get_error_description(?INVALID_FETCH_SESSION_EPOCH) -> - "The fetch session epoch is invalid."; + "The fetch session epoch is invalid."; get_error_description(?LISTENER_NOT_FOUND) -> - "There is no listener on the leader broker that matches the listener on which metadata request was processed."; + "There is no listener on the leader broker that matches the listener on which metadata request was processed."; get_error_description(?TOPIC_DELETION_DISABLED) -> - "Topic deletion is disabled."; + "Topic deletion is disabled."; get_error_description(?FENCED_LEADER_EPOCH) -> - "The leader epoch in the request is older than the epoch on the broker."; + "The leader epoch in the request is older than the epoch on the broker."; get_error_description(?UNKNOWN_LEADER_EPOCH) -> - "The leader epoch in the request is newer than the epoch on the broker."; + "The leader epoch in the request is newer than the epoch on the broker."; get_error_description(?UNSUPPORTED_COMPRESSION_TYPE) -> - "The requesting client does not support the compression type of given partition."; + "The requesting client does not support the compression type of given partition."; get_error_description(?STALE_BROKER_EPOCH) -> - "Broker epoch has changed."; + "Broker epoch has changed."; get_error_description(?OFFSET_NOT_AVAILABLE) -> - "The leader high watermark has not caught up from a recent leader election so the offsets cannot be guaranteed " ++ - "to be monotonically increasing."; + "The leader high watermark has not caught up from a recent leader election so the offsets cannot be guaranteed " ++ + "to be monotonically increasing."; get_error_description(?MEMBER_ID_REQUIRED) -> - "The group member needs to have a valid member id before actually entering a consumer group."; + "The group member needs to have a valid member id before actually entering a consumer group."; get_error_description(?PREFERRED_LEADER_NOT_AVAILABLE) -> - "The preferred leader was not available."; + "The preferred leader was not available."; get_error_description(?GROUP_MAX_SIZE_REACHED) -> - "The consumer group has reached its max size."; + "The consumer group has reached its max size."; get_error_description(?FENCED_INSTANCE_ID) -> - "The broker rejected this static consumer since another consumer with the same group.instance.id has registered " ++ - "with a different member.id."; + "The broker rejected this static consumer since another consumer with the same group.instance.id has registered " ++ + "with a different member.id."; get_error_description(?ELIGIBLE_LEADERS_NOT_AVAILABLE) -> - "Eligible topic partition leaders are not available."; + "Eligible topic partition leaders are not available."; get_error_description(?ELECTION_NOT_NEEDED) -> - "Leader election not needed for topic partition."; + "Leader election not needed for topic partition."; get_error_description(?NO_REASSIGNMENT_IN_PROGRESS) -> - "No partition reassignment is in progress."; + "No partition reassignment is in progress."; get_error_description(?GROUP_SUBSCRIBED_TO_TOPIC) -> - "Deleting offsets of a topic is forbidden while the consumer group is actively subscribed to it."; + "Deleting offsets of a topic is forbidden while the consumer group is actively subscribed to it."; get_error_description(?INVALID_RECORD) -> - "This record has failed the validation on broker and hence will be rejected."; + "This record has failed the validation on broker and hence will be rejected."; get_error_description(?UNSTABLE_OFFSET_COMMIT) -> - "There are unstable offsets that need to be cleared."; + "There are unstable offsets that need to be cleared."; get_error_description(?THROTTLING_QUOTA_EXCEEDED) -> - "The throttling quota has been exceeded."; + "The throttling quota has been exceeded."; get_error_description(?PRODUCER_FENCED) -> - "There is a newer producer with the same transactionalId which fences the current one."; + "There is a newer producer with the same transactionalId which fences the current one."; get_error_description(?RESOURCE_NOT_FOUND) -> - "A request illegally referred to a resource that does not exist."; + "A request illegally referred to a resource that does not exist."; get_error_description(?DUPLICATE_RESOURCE) -> - "A request illegally referred to the same resource twice."; + "A request illegally referred to the same resource twice."; get_error_description(?UNACCEPTABLE_CREDENTIAL) -> - "Requested credential would not meet criteria for acceptability."; + "Requested credential would not meet criteria for acceptability."; get_error_description(?INCONSISTENT_VOTER_SET) -> - "Indicates that the either the sender or recipient of a voter-only request is not one of the expected voters"; + "Indicates that the either the sender or recipient of a voter-only request is not one of the expected voters"; get_error_description(?INVALID_UPDATE_VERSION) -> - "The given update version was invalid."; + "The given update version was invalid."; get_error_description(?FEATURE_UPDATE_FAILED) -> - "Unable to update finalized features due to an unexpected server error.". + "Unable to update finalized features due to an unexpected server error.". get_error_tuple(?UNKNOWN_SERVER_ERROR) -> - {error, unknown_server_error}; + {error, unknown_server_error}; get_error_tuple(?NONE) -> - {error, none}; + {error, none}; get_error_tuple(?OFFSET_OUT_OF_RANGE) -> - {error, offset_out_of_range}; + {error, offset_out_of_range}; get_error_tuple(?CORRUPT_MESSAGE) -> - {error, corrupt_message}; + {error, corrupt_message}; get_error_tuple(?UNKNOWN_TOPIC_OR_PARTITION) -> - {error, unknown_topic_or_partition}; + {error, unknown_topic_or_partition}; get_error_tuple(?INVALID_FETCH_SIZE) -> - {error, invalid_fetch_size}; + {error, invalid_fetch_size}; get_error_tuple(?LEADER_NOT_AVAILABLE) -> - {error, leader_not_available}; + {error, leader_not_available}; get_error_tuple(?NOT_LEADER_OR_FOLLOWER) -> - {error, not_leader_or_follower}; + {error, not_leader_or_follower}; get_error_tuple(?REQUEST_TIMED_OUT) -> - {error, request_timed_out}; + {error, request_timed_out}; get_error_tuple(?BROKER_NOT_AVAILABLE) -> - {error, broker_not_available}; + {error, broker_not_available}; get_error_tuple(?REPLICA_NOT_AVAILABLE) -> - {error, replica_not_available}; + {error, replica_not_available}; get_error_tuple(?MESSAGE_TOO_LARGE) -> - {error, message_too_large}; + {error, message_too_large}; get_error_tuple(?STALE_CONTROLLER_EPOCH) -> - {error, stale_controller_epoch}; + {error, stale_controller_epoch}; get_error_tuple(?OFFSET_METADATA_TOO_LARGE) -> - {error, offset_metadata_too_large}; + {error, offset_metadata_too_large}; get_error_tuple(?NETWORK_EXCEPTION) -> - {error, network_exception}; + {error, network_exception}; get_error_tuple(?COORDINATOR_LOAD_IN_PROGRESS) -> - {error, coordinator_load_in_progress}; + {error, coordinator_load_in_progress}; get_error_tuple(?COORDINATOR_NOT_AVAILABLE) -> - {error, coordinator_not_available}; + {error, coordinator_not_available}; get_error_tuple(?NOT_COORDINATOR) -> - {error, not_coordinator}; + {error, not_coordinator}; get_error_tuple(?INVALID_TOPIC_EXCEPTION) -> - {error, invalid_topic_exception}; + {error, invalid_topic_exception}; get_error_tuple(?RECORD_LIST_TOO_LARGE) -> - {error, record_list_too_large}; + {error, record_list_too_large}; get_error_tuple(?NOT_ENOUGH_REPLICAS) -> - {error, not_enough_replicas}; + {error, not_enough_replicas}; get_error_tuple(?NOT_ENOUGH_REPLICAS_AFTER_APPEND) -> - {error, not_enough_replicas_after_append}; + {error, not_enough_replicas_after_append}; get_error_tuple(?INVALID_REQUIRED_ACKS) -> - {error, invalid_required_acks}; + {error, invalid_required_acks}; get_error_tuple(?ILLEGAL_GENERATION) -> - {error, illegal_generation}; + {error, illegal_generation}; get_error_tuple(?INCONSISTENT_GROUP_PROTOCOL) -> - {error, inconsistent_group_protocol}; + {error, inconsistent_group_protocol}; get_error_tuple(?INVALID_GROUP_ID) -> - {error, invalid_group_id}; + {error, invalid_group_id}; get_error_tuple(?UNKNOWN_MEMBER_ID) -> - {error, unknown_member_id}; + {error, unknown_member_id}; get_error_tuple(?INVALID_SESSION_TIMEOUT) -> - {error, invalid_session_timeout}; + {error, invalid_session_timeout}; get_error_tuple(?REBALANCE_IN_PROGRESS) -> - {error, rebalance_in_progress}; + {error, rebalance_in_progress}; get_error_tuple(?INVALID_COMMIT_OFFSET_SIZE) -> - {error, invalid_commit_offset_size}; + {error, invalid_commit_offset_size}; get_error_tuple(?TOPIC_AUTHORIZATION_FAILED) -> - {error, topic_authorization_failed}; + {error, topic_authorization_failed}; get_error_tuple(?GROUP_AUTHORIZATION_FAILED) -> - {error, group_authorization_failed}; + {error, group_authorization_failed}; get_error_tuple(?CLUSTER_AUTHORIZATION_FAILED) -> - {error, cluster_authorization_failed}; + {error, cluster_authorization_failed}; get_error_tuple(?INVALID_TIMESTAMP) -> - {error, invalid_timestamp}; + {error, invalid_timestamp}; get_error_tuple(?UNSUPPORTED_SASL_MECHANISM) -> - {error, unsupported_sasl_mechanism}; + {error, unsupported_sasl_mechanism}; get_error_tuple(?ILLEGAL_SASL_STATE) -> - {error, illegal_sasl_state}; + {error, illegal_sasl_state}; get_error_tuple(?UNSUPPORTED_VERSION) -> - {error, unsupported_version}; + {error, unsupported_version}; get_error_tuple(?TOPIC_ALREADY_EXISTS) -> - {error, topic_already_exists}; + {error, topic_already_exists}; get_error_tuple(?INVALID_PARTITIONS) -> - {error, invalid_partitions}; + {error, invalid_partitions}; get_error_tuple(?INVALID_REPLICATION_FACTOR) -> - {error, invalid_replication_factor}; + {error, invalid_replication_factor}; get_error_tuple(?INVALID_REPLICA_ASSIGNMENT) -> - {error, invalid_replica_assignment}; + {error, invalid_replica_assignment}; get_error_tuple(?INVALID_CONFIG) -> - {error, invalid_config}; + {error, invalid_config}; get_error_tuple(?NOT_CONTROLLER) -> - {error, not_controller}; + {error, not_controller}; get_error_tuple(?INVALID_REQUEST) -> - {error, invalid_request}; + {error, invalid_request}; get_error_tuple(?UNSUPPORTED_FOR_MESSAGE_FORMAT) -> - {error, unsupported_for_message_format}; + {error, unsupported_for_message_format}; get_error_tuple(?POLICY_VIOLATION) -> - {error, policy_violation}; + {error, policy_violation}; get_error_tuple(?OUT_OF_ORDER_SEQUENCE_NUMBER) -> - {error, out_of_order_sequence_number}; + {error, out_of_order_sequence_number}; get_error_tuple(?DUPLICATE_SEQUENCE_NUMBER) -> - {error, duplicate_sequence_number}; + {error, duplicate_sequence_number}; get_error_tuple(?INVALID_PRODUCER_EPOCH) -> - {error, invalid_producer_epoch}; + {error, invalid_producer_epoch}; get_error_tuple(?INVALID_TXN_STATE) -> - {error, invalid_txn_state}; + {error, invalid_txn_state}; get_error_tuple(?INVALID_PRODUCER_ID_MAPPING) -> - {error, invalid_producer_id_mapping}; + {error, invalid_producer_id_mapping}; get_error_tuple(?INVALID_TRANSACTION_TIMEOUT) -> - {error, invalid_transaction_timeout}; + {error, invalid_transaction_timeout}; get_error_tuple(?CONCURRENT_TRANSACTIONS) -> - {error, concurrent_transactions}; + {error, concurrent_transactions}; get_error_tuple(?TRANSACTION_COORDINATOR_FENCED) -> - {error, transaction_coordinator_fenced}; + {error, transaction_coordinator_fenced}; get_error_tuple(?TRANSACTIONAL_ID_AUTHORIZATION_FAILED) -> - {error, transactional_id_authorization_failed}; + {error, transactional_id_authorization_failed}; get_error_tuple(?SECURITY_DISABLED) -> - {error, security_disabled}; + {error, security_disabled}; get_error_tuple(?OPERATION_NOT_ATTEMPTED) -> - {error, operation_not_attempted}; + {error, operation_not_attempted}; get_error_tuple(?KAFKA_STORAGE_ERROR) -> - {error, kafka_storage_error}; + {error, kafka_storage_error}; get_error_tuple(?LOG_DIR_NOT_FOUND) -> - {error, log_dir_not_found}; + {error, log_dir_not_found}; get_error_tuple(?SASL_AUTHENTICATION_FAILED) -> - {error, sasl_authentication_failed}; + {error, sasl_authentication_failed}; get_error_tuple(?UNKNOWN_PRODUCER_ID) -> - {error, unknown_producer_id}; + {error, unknown_producer_id}; get_error_tuple(?REASSIGNMENT_IN_PROGRESS) -> - {error, reassignment_in_progress}; + {error, reassignment_in_progress}; get_error_tuple(?DELEGATION_TOKEN_AUTH_DISABLED) -> - {error, delegation_token_auth_disabled}; + {error, delegation_token_auth_disabled}; get_error_tuple(?DELEGATION_TOKEN_NOT_FOUND) -> - {error, delegation_token_not_found}; + {error, delegation_token_not_found}; get_error_tuple(?DELEGATION_TOKEN_OWNER_MISMATCH) -> - {error, delegation_token_owner_mismatch}; + {error, delegation_token_owner_mismatch}; get_error_tuple(?DELEGATION_TOKEN_REQUEST_NOT_ALLOWED) -> - {error, delegation_token_request_not_allowed}; + {error, delegation_token_request_not_allowed}; get_error_tuple(?DELEGATION_TOKEN_AUTHORIZATION_FAILED) -> - {error, delegation_token_authorization_failed}; + {error, delegation_token_authorization_failed}; get_error_tuple(?DELEGATION_TOKEN_EXPIRED) -> - {error, delegation_token_expired}; + {error, delegation_token_expired}; get_error_tuple(?INVALID_PRINCIPAL_TYPE) -> - {error, invalid_principal_type}; + {error, invalid_principal_type}; get_error_tuple(?NON_EMPTY_GROUP) -> - {error, non_empty_group}; + {error, non_empty_group}; get_error_tuple(?GROUP_ID_NOT_FOUND) -> - {error, group_id_not_found}; + {error, group_id_not_found}; get_error_tuple(?FETCH_SESSION_ID_NOT_FOUND) -> - {error, fetch_session_id_not_found}; + {error, fetch_session_id_not_found}; get_error_tuple(?INVALID_FETCH_SESSION_EPOCH) -> - {error, invalid_fetch_session_epoch}; + {error, invalid_fetch_session_epoch}; get_error_tuple(?LISTENER_NOT_FOUND) -> - {error, listener_not_found}; + {error, listener_not_found}; get_error_tuple(?TOPIC_DELETION_DISABLED) -> - {error, topic_deletion_disabled}; + {error, topic_deletion_disabled}; get_error_tuple(?FENCED_LEADER_EPOCH) -> - {error, fenced_leader_epoch}; + {error, fenced_leader_epoch}; get_error_tuple(?UNKNOWN_LEADER_EPOCH) -> - {error, unknown_leader_epoch}; + {error, unknown_leader_epoch}; get_error_tuple(?UNSUPPORTED_COMPRESSION_TYPE) -> - {error, unsupported_compression_type}; + {error, unsupported_compression_type}; get_error_tuple(?STALE_BROKER_EPOCH) -> - {error, stale_broker_epoch}; + {error, stale_broker_epoch}; get_error_tuple(?OFFSET_NOT_AVAILABLE) -> - {error, offset_not_available}; + {error, offset_not_available}; get_error_tuple(?MEMBER_ID_REQUIRED) -> - {error, member_id_required}; + {error, member_id_required}; get_error_tuple(?PREFERRED_LEADER_NOT_AVAILABLE) -> - {error, preferred_leader_not_available}; + {error, preferred_leader_not_available}; get_error_tuple(?GROUP_MAX_SIZE_REACHED) -> - {error, group_max_size_reached}; + {error, group_max_size_reached}; get_error_tuple(?FENCED_INSTANCE_ID) -> - {error, fenced_instance_id}; + {error, fenced_instance_id}; get_error_tuple(?ELIGIBLE_LEADERS_NOT_AVAILABLE) -> - {error, eligible_leaders_not_available}; + {error, eligible_leaders_not_available}; get_error_tuple(?ELECTION_NOT_NEEDED) -> - {error, election_not_needed}; + {error, election_not_needed}; get_error_tuple(?NO_REASSIGNMENT_IN_PROGRESS) -> - {error, no_reassignment_in_progress}; + {error, no_reassignment_in_progress}; get_error_tuple(?GROUP_SUBSCRIBED_TO_TOPIC) -> - {error, group_subscribed_to_topic}; + {error, group_subscribed_to_topic}; get_error_tuple(?INVALID_RECORD) -> - {error, invalid_record}; + {error, invalid_record}; get_error_tuple(?UNSTABLE_OFFSET_COMMIT) -> - {error, unstable_offset_commit}; + {error, unstable_offset_commit}; get_error_tuple(?THROTTLING_QUOTA_EXCEEDED) -> - {error, throttling_quota_exceeded}; + {error, throttling_quota_exceeded}; get_error_tuple(?PRODUCER_FENCED) -> - {error, producer_fenced}; + {error, producer_fenced}; get_error_tuple(?RESOURCE_NOT_FOUND) -> - {error, resource_not_found}; + {error, resource_not_found}; get_error_tuple(?DUPLICATE_RESOURCE) -> - {error, duplicate_resource}; + {error, duplicate_resource}; get_error_tuple(?UNACCEPTABLE_CREDENTIAL) -> - {error, unacceptable_credential}; + {error, unacceptable_credential}; get_error_tuple(?INCONSISTENT_VOTER_SET) -> - {error, inconsistent_voter_set}; + {error, inconsistent_voter_set}; get_error_tuple(?INVALID_UPDATE_VERSION) -> - {error, invalid_update_version}; + {error, invalid_update_version}; get_error_tuple(?FEATURE_UPDATE_FAILED) -> - {error, feature_update_failed}. + {error, feature_update_failed}.