From a2e3532a57758687c26e20b3eb1d26757506c933 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Sun, 16 Jun 2024 16:02:15 +0800 Subject: [PATCH] docs: add guide for tsbs benchmark (#4151) * docs: add guide for tsbs benchmark Signed-off-by: Ruihang Xia * fix typo Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- docs/benchmarks/tsbs/README.md | 253 +++++++++++++++++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 docs/benchmarks/tsbs/README.md diff --git a/docs/benchmarks/tsbs/README.md b/docs/benchmarks/tsbs/README.md new file mode 100644 index 000000000000..8a98b4f8226e --- /dev/null +++ b/docs/benchmarks/tsbs/README.md @@ -0,0 +1,253 @@ +# How to run TSBS Benchmark + +This document contains the steps to run TSBS Benchmark. Our results are listed in other files in the same directory. + +## Prerequires + +You need the following tools to run TSBS Benchmark: +- Go +- git +- make +- rust (optional, if you want to build the DB from source) + +## Build TSBS suite + +Clone our fork of TSBS: + +```shell +git clone https://github.com/GreptimeTeam/tsbs.git +``` + +Then build it: + +```shell +cd tsbs +make +``` + +You can check the `bin/` directory for compiled binaries. We will only use some of them. + +```shell +ls ./bin/ +``` + +Binaries we will use later: +- `tsbs_generate_data` +- `tsbs_generate_queries` +- `tsbs_load_greptime` +- `tsbs_run_queries_influx` + +## Generate test data and queries + +The data is generated by `tsbs_generate_data` + +```shell +mkdir bench-data +./bin/tsbs_generate_data --use-case="cpu-only" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:00Z" \ + --log-interval="10s" --format="influx" \ + > ./bench-data/influx-data.lp +``` + +Here we generates 4000 time-series in 3 days with 10s interval. We'll use influx line protocol to write so the target format is `influx`. + +Queries are generated by `tsbs_generate_queries`. You can change the parameters but need to make sure it matches with `tsbs_generate_data`. + +```shell +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type cpu-max-all-1 \ + --format="greptime" \ + > ./bench-data/greptime-queries-cpu-max-all-1.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type cpu-max-all-8 \ + --format="greptime" \ + > ./bench-data/greptime-queries-cpu-max-all-8.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=50 \ + --query-type double-groupby-1 \ + --format="greptime" \ + > ./bench-data/greptime-queries-double-groupby-1.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=50 \ + --query-type double-groupby-5 \ + --format="greptime" \ + > ./bench-data/greptime-queries-double-groupby-5.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=50 \ + --query-type double-groupby-all \ + --format="greptime" \ + > ./bench-data/greptime-queries-double-groupby-all.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=50 \ + --query-type groupby-orderby-limit \ + --format="greptime" \ + > ./bench-data/greptime-queries-groupby-orderby-limit.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type high-cpu-1 \ + --format="greptime" \ + > ./bench-data/greptime-queries-high-cpu-1.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=50 \ + --query-type high-cpu-all \ + --format="greptime" \ + > ./bench-data/greptime-queries-high-cpu-all.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=10 \ + --query-type lastpoint \ + --format="greptime" \ + > ./bench-data/greptime-queries-lastpoint.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type single-groupby-1-1-1 \ + --format="greptime" \ + > ./bench-data/greptime-queries-single-groupby-1-1-1.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type single-groupby-1-1-12 \ + --format="greptime" \ + > ./bench-data/greptime-queries-single-groupby-1-1-12.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type single-groupby-1-8-1 \ + --format="greptime" \ + > ./bench-data/greptime-queries-single-groupby-1-8-1.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type single-groupby-5-1-1 \ + --format="greptime" \ + > ./bench-data/greptime-queries-single-groupby-5-1-1.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type single-groupby-5-1-12 \ + --format="greptime" \ + > ./bench-data/greptime-queries-single-groupby-5-1-12.dat +./bin/tsbs_generate_queries \ + --use-case="devops" --seed=123 --scale=4000 \ + --timestamp-start="2023-06-11T00:00:00Z" \ + --timestamp-end="2023-06-14T00:00:01Z" \ + --queries=100 \ + --query-type single-groupby-5-8-1 \ + --format="greptime" \ + > ./bench-data/greptime-queries-single-groupby-5-8-1.dat +``` + +## Start GreptimeDB + +Reference to our [document](https://docs.greptime.com/getting-started/installation/overview) for how to install and start a GreptimeDB. Or you can also check this [document](https://docs.greptime.com/contributor-guide/getting-started#compile-and-run) for how to build a GreptimeDB from source. + +## Write Data + +After the DB is started, we can use `tsbs_load_greptime` to test the write performance. + +```shell +./bin/tsbs_load_greptime \ + --urls=http://localhost:4000 \ + --file=./bench-data/influx-data.lp \ + --batch-size=3000 \ + --gzip=false \ + --workers=6 +``` + +Parameters here are only provided as an example. You can choose whatever you like or adjust them to match your target scenario. + +Notice that if you want to rerun `tsbs_load_greptime`, please destroy and restart the DB and clear its previous data first. Existing duplicated data will impact the write and query performance. + +## Query Data + +After the data is imported, you can then run queries. The following script runs all queries. You can also choose a subset of queries to run. + +```shell +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-cpu-max-all-1.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-cpu-max-all-8.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-double-groupby-1.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-double-groupby-5.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-double-groupby-all.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-groupby-orderby-limit.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-high-cpu-1.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-high-cpu-all.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-lastpoint.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-single-groupby-1-1-1.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-single-groupby-1-1-12.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-single-groupby-1-8-1.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-single-groupby-5-1-1.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-single-groupby-5-1-12.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +./bin/tsbs_run_queries_influx --file=./bench-data/greptime-queries-single-groupby-5-8-1.dat \ + --db-name=benchmark \ + --urls="http://localhost:4000" +``` + +Rerun queries need not to re-import data. Just execute the corresponding command again is fine.