Skip to content

Commit

Permalink
feat: impl benchmark writing (#22)
Browse files Browse the repository at this point in the history
* feat: impl benchmark writing

* feat: tweak encoder performance

* test: writing in concurrency
  • Loading branch information
killme2008 authored Jun 29, 2023
1 parent 6e37ec0 commit ed1ef46
Show file tree
Hide file tree
Showing 3 changed files with 342 additions and 46 deletions.
106 changes: 106 additions & 0 deletions eprof.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@

****** Process <0.909.0> -- 100.00 % of profiled time ***
FUNCTION CALLS % TIME [uS / CALLS]
-------- ----- ------- ---- [----------]
greptimedb_SUITE:bench_write/4 1 0.00 0 [ 0.00]
greptimedb_SUITE:'-t_bench_perf/1-fun-0-'/1 1 0.00 0 [ 0.00]
erlang:unique_integer/0 1 0.00 0 [ 0.00]
erlang:system_time/0 1 0.00 0 [ 0.00]
rand:seed/1 1 0.00 0 [ 0.00]
rand:seed_s/1 1 0.00 0 [ 0.00]
rand:seed_s/2 1 0.00 0 [ 0.00]
rand:exsss_seed/1 1 0.00 0 [ 0.00]
rand:seed58/1 3 0.00 0 [ 0.00]
erlang:phash2/1 1 0.00 1 [ 1.00]
erlang:apply/2 1 0.00 1 [ 1.00]
rand:default_seed/0 1 0.00 1 [ 1.00]
rand:mk_alg/1 1 0.00 1 [ 1.00]
rand:splitmix64_next/1 3 0.00 3 [ 1.00]
sets:mk_seg/1 10000 0.00 419 [ 0.04]
sets:fold_set/3 10000 0.00 550 [ 0.06]
sets:fold/3 10000 0.00 556 [ 0.06]
lists:seq/2 10000 0.00 577 [ 0.06]
lists:flatten/1 10000 0.00 579 [ 0.06]
lists:reverse/1 10000 0.00 1025 [ 0.10]
sets:new/0 10000 0.00 1064 [ 0.11]
sets:to_list/1 10000 0.00 1083 [ 0.11]
sets:fold_segs/4 20000 0.00 1230 [ 0.06]
greptimedb_encoder:collect_columns/1 10000 0.00 1370 [ 0.14]
greptimedb_encoder:insert_requests/2 10000 0.00 1399 [ 0.14]
greptimedb_SUITE:bench_points/2 10000 0.00 1550 [ 0.15]
sets:from_list/1 10000 0.00 1596 [ 0.16]
proplists:get_value/3 40000 0.00 1794 [ 0.04]
lists:foldl/3 20000 0.00 2019 [ 0.10]
greptimedb_encoder:insert_request/2 20000 0.01 6275 [ 0.31]
sets:'-to_list/1-fun-0-'/2 210000 0.01 7383 [ 0.04]
greptimedb_encoder:'-merge_columns/1-fun-1-'/1 210000 0.01 7452 [ 0.04]
greptimedb_encoder:insert_requests/4 20000 0.01 7764 [ 0.39]
greptimedb_encoder:'-merge_columns/1-fun-3-'/2 210000 0.01 7792 [ 0.04]
maps:remove/2 210000 0.01 9067 [ 0.04]
greptimedb_encoder:merge_columns/1 10000 0.01 9340 [ 0.93]
greptimedb_SUITE:bench_write/5 10001 0.01 10066 [ 1.01]
lists:reverse/2 10000 0.01 10671 [ 1.07]
lists:seq_loop/3 260000 0.01 10732 [ 0.04]
sets:fold_seg/4 170000 0.01 11829 [ 0.07]
sets:maybe_expand/1 210000 0.02 14624 [ 0.07]
maps:update_with/3 210000 0.02 19488 [ 0.09]
sets:fold_bucket/3 370000 0.02 20983 [ 0.06]
greptimedb_encoder:'-insert_request/2-fun-0-'/2 210000 0.03 22504 [ 0.11]
greptimedb_encoder:'-merge_columns/1-fun-4-'/1 210000 0.03 26390 [ 0.13]
greptimedb_encoder:'-merge_columns/1-fun-5-'/1 210000 0.03 28114 [ 0.13]
sets:update_bucket/3 210000 0.04 30147 [ 0.14]
greptimedb_encoder:flatten/1 210000 0.04 31857 [ 0.15]
rand:exsss_uniform/2 1000000 0.04 35355 [ 0.04]
rand:seed_get/0 1000000 0.04 35687 [ 0.04]
base64:encode/1 1000000 0.04 35714 [ 0.04]
erlang:setelement/3 840000 0.04 35747 [ 0.04]
rand:uniform_s/2 1000000 0.04 36359 [ 0.04]
erlang:put/2 1000001 0.05 40028 [ 0.04]
greptimedb_encoder:values_size/1 210000 0.06 50385 [ 0.24]
rand:seed_put/1 1000001 0.08 69585 [ 0.07]
greptimedb_SUITE:rand_string/1 1000000 0.09 77508 [ 0.08]
crypto:strong_rand_bytes/1 1000000 0.09 77718 [ 0.08]
greptimedb_encoder:pad_null_mask/2 210000 0.10 81287 [ 0.39]
maps:keys/1 1000000 0.13 106776 [ 0.11]
lists:map/2 1050000 0.13 113661 [ 0.11]
greptimedb_encoder:merge_columns/2 1000000 0.15 125319 [ 0.13]
greptimedb_encoder:ts_column/1 1000000 0.16 136602 [ 0.14]
greptimedb_encoder:'-merge_columns/1-fun-0-'/1 1000000 0.17 140289 [ 0.14]
maps:iterator/1 2210000 0.17 141323 [ 0.06]
rand:uniform/1 1000000 0.17 142184 [ 0.14]
greptimedb_encoder:collect_columns/2 1010000 0.17 143407 [ 0.14]
maps:merge/2 1000000 0.27 226383 [ 0.23]
base64:encode_binary/2 3000000 0.30 250704 [ 0.08]
greptimedb_SUITE:'-bench_points/2-fun-0-'/2 1000000 0.32 266201 [ 0.27]
erts_internal:map_next/3 2210000 0.34 287450 [ 0.13]
maps:put/3 1000000 0.41 341339 [ 0.34]
greptimedb_encoder:'-convert_columns/1-fun-1-'/2 10000000 0.45 379176 [ 0.04]
maps:from_list/1 2210000 0.50 421257 [ 0.19]
greptimedb_encoder:'-convert_columns/1-fun-0-'/2 10000000 0.51 430521 [ 0.04]
maps:map/2 2210000 0.53 443101 [ 0.20]
greptimedb_encoder:convert_columns/1 1000000 0.63 534406 [ 0.53]
crypto:strong_rand_bytes_nif/1 1000000 0.70 590735 [ 0.59]
sets:get_bucket_s/2 21000000 0.80 676987 [ 0.03]
maps:get/3 21000000 0.84 709155 [ 0.03]
sets:'-from_list/1-fun-0-'/2 21000000 0.89 753292 [ 0.04]
sets:get_bucket/2 21000000 0.90 757840 [ 0.04]
lists:member/2 21000000 0.96 811340 [ 0.04]
maps:next/1 22420000 0.97 814466 [ 0.04]
erlang:phash/2 21000000 1.05 881969 [ 0.04]
greptimedb_encoder:field_column/2 10000000 1.45 1223217 [ 0.12]
greptimedb_encoder:tag_column/2 10000000 1.65 1387376 [ 0.14]
sets:get_slot/2 21000000 1.74 1462640 [ 0.07]
greptimedb_encoder:'-merge_columns/2-fun-0-'/2 21000000 1.78 1498379 [ 0.07]
lists:foldl_1/3 22000000 1.84 1551736 [ 0.07]
lists:do_flatten/2 23010000 2.01 1690727 [ 0.07]
greptimedb_encoder:flatten/2 20790000 2.64 2222132 [ 0.11]
lists:map_1/2 23630000 2.88 2423506 [ 0.10]
greptimedb_encoder:merge_values/2 21000000 3.08 2597834 [ 0.12]
sets:add_element/2 21000000 3.48 2933195 [ 0.14]
maps:map_1/2 22420000 3.49 2942453 [ 0.13]
erts_internal:counters_add/3 197850000 9.09 7660091 [ 0.04]
greptimedb_encoder:merge_column/3 21000000 15.53 13086811 [ 0.62]
counters:add/3 197850000 17.13 14430363 [ 0.07]
persistent_term:get/1 197850000 18.54 15621369 [ 0.08]
------------------------------------------------ ---------- ------- -------- [----------]
Total: 1035360021 100.00% 84242381 [ 0.08]
115 changes: 81 additions & 34 deletions src/greptimedb_encoder.erl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ collect_columns(Points) ->
collect_columns(Points, []).

collect_columns([], Columns) ->
maps:values(merge_columns(Columns));
merge_columns(Columns);
collect_columns([Point | T], Columns) ->
collect_columns(T, [convert_columns(Point) | Columns]).

Expand Down Expand Up @@ -95,17 +95,58 @@ values_size(#{ts_second_values := Values}) ->
length(Values);
values_size(#{ts_millisecond_values := Values}) ->
length(Values);
values_size(#{ts_microsecond_values := Values}) ->
length(Values);
values_size(#{ts_nanosecond_values := Values}) ->
length(Values).

merge_values(V1, V2) when map_size(V1) == 0 ->
V2;
merge_values(#{i8_values := V1} = L, #{i8_values := V2}) ->
L#{i8_values := [V2 | V1]};
merge_values(#{i16_values := V1} = L, #{i16_values := V2}) ->
L#{i16_values := [V2 | V1]};
merge_values(#{i32_values := V1} = L, #{i32_values := V2}) ->
L#{i32_values := [V2 | V1]};
merge_values(#{i64_values := V1} = L, #{i64_values := V2}) ->
L#{i64_values := [V2 | V1]};
merge_values(#{u8_values := V1} = L, #{u8_values := V2}) ->
L#{u8_values := [V2 | V1]};
merge_values(#{u16_values := V1} = L, #{u16_values := V2}) ->
L#{u16_values := [V2 | V1]};
merge_values(#{u32_values := V1} = L, #{u32_values := V2}) ->
L#{u32_values := [V2 | V1]};
merge_values(#{u64_values := V1} = L, #{u64_values := V2}) ->
L#{u64_values := [V2 | V1]};
merge_values(#{f32_values := V1} = L, #{f32_values := V2}) ->
L#{f32_values := [V2 | V1]};
merge_values(#{f64_values := V1} = L, #{f64_values := V2}) ->
L#{f64_values := [V2 | V1]};
merge_values(#{bool_values := V1} = L, #{bool_values := V2}) ->
L#{bool_values := [V2 | V1]};
merge_values(#{binary_values := V1} = L, #{binary_values := V2}) ->
L#{binary_values := [V2 | V1]};
merge_values(#{string_values := V1} = L, #{string_values := V2}) ->
L#{string_values := [V2 | V1]};
merge_values(#{date_values := V1} = L, #{date_values := V2}) ->
L#{date_values := [V2 | V1]};
merge_values(#{ts_second_values := V1} = L, #{ts_second_values := V2}) ->
L#{ts_second_values := [V2 | V1]};
merge_values(#{ts_millisecond_values := V1} = L, #{ts_millisecond_values := V2}) ->
L#{ts_millisecond_values := [V2 | V1]};
merge_values(#{ts_microsecond_values := V1} = L, #{ts_microsecond_values := V2}) ->
L#{ts_microsecond_values := [V2 | V1]};
merge_values(#{ts_nanosecond_values := V1} = L, #{ts_nanosecond_values := V2}) ->
L#{ts_nanosecond_values := [V2 | V1]}.

pad_null_mask(#{values := Values, null_mask := NullMask} = Column, RowCount) ->
ValuesSize = values_size(Values),
NewColumn =
if ValuesSize == RowCount ->
maps:remove(null_mask, Column);
true ->
Pad = 8 - (bit_size(NullMask) - floor(bit_size(NullMask) / 8) * 8),
Column#{null_mask => <<0:Pad/integer, NullMask/bits>>}
Column#{null_mask := <<0:Pad/integer, NullMask/bits>>}
end,
NewColumn.

Expand All @@ -118,46 +159,52 @@ convert_columns(#{fields := Fields,
maps:put(
maps:get(column_name, TsColumn), TsColumn, maps:merge(FieldColumns, TagColumns)).

merge_column(#{null_mask := NullMask} = Column, NewColumn) ->
Values = maps:get(values, Column, #{}),
NewValues = maps:get(values, NewColumn),
MergedValues =
maps:merge_with(fun(_K, V1, V2) -> lists:foldr(fun(X, XS) -> [X | XS] end, V2, V1) end,
Values,
NewValues),
NewColumn1 = maps:merge(Column, NewColumn),
NewColumn1#{values => MergedValues, null_mask => <<NullMask/bits, 1:1/integer>>}.
merge_column(#{null_mask := NullMask} = Column, Name, NextColumns) ->
case NextColumns of
#{Name := NewColumn} ->
Values = maps:get(values, Column, #{}),
NewValues = maps:get(values, NewColumn),
MergedValues = merge_values(Values, NewValues),
case map_size(Column) of
1 ->
NewColumn#{values := MergedValues, null_mask => <<NullMask/bits, 1:1/integer>>};
_ ->
Column#{values := MergedValues, null_mask := <<NullMask/bits, 1:1/integer>>}
end;
_ ->
Column#{null_mask := <<NullMask/bits, 0:1/integer>>}
end.

merge_columns(NextColumns, Columns) ->
maps:fold(fun(Name, #{null_mask := NullMask} = Column, AccColumns) ->
MergedColumn =
case maps:find(Name, NextColumns) of
{ok, NewColumn} ->
merge_column(Column, NewColumn);
_ ->
Column#{null_mask => <<NullMask/bits, 0:1/integer>>}
end,
AccColumns#{Name => MergedColumn}
end,
Columns,
lists:map(fun({Name, Column}) -> {Name, merge_column(Column, Name, NextColumns)} end,
Columns).

empty_column() ->
#{null_mask => <<>>}.
flatten([H]) ->
[H];
flatten([[H] | T]) ->
flatten(T, [H]).

flatten([], Acc) ->
Acc;
flatten([H], Acc) ->
[H | Acc];
flatten([[H] | T], Acc) ->
flatten(T, [H | Acc]).

merge_columns(Columns) ->
Names =
sets:to_list(
sets:union(
lists:map(fun(C) ->
sets:from_list(
maps:keys(C))
end,
Columns))),
EmptyColumns =
maps:from_list(
lists:map(fun(Name) -> {Name, empty_column()} end, Names)),
lists:foldr(fun merge_columns/2, EmptyColumns, Columns).
sets:from_list(
lists:flatten(
lists:map(fun(C) -> maps:keys(C) end, Columns)))),
EmptyColumns = lists:map(fun(Name) -> {Name, #{null_mask => <<>>}} end, Names),
lists:map(fun({_Name, Column}) ->
maps:update_with(values,
fun(Values) -> maps:map(fun(_K, VS) -> flatten(VS) end, Values)
end,
Column)
end,
lists:foldl(fun merge_columns/2, EmptyColumns, lists:reverse(Columns))).

ts_column(Ts) when is_map(Ts) ->
maps:merge(#{column_name => ?TS_COLUMN, semantic_type => 'TIMESTAMP'}, Ts);
Expand Down
Loading

0 comments on commit ed1ef46

Please sign in to comment.