From 5ad20fa17290b3f25f0fd3da7a06436a8ed597b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=9B=A7=E5=9B=A7?= Date: Tue, 14 May 2024 01:16:00 -0400 Subject: [PATCH] Migrate benchmark to new server (#3487) * X * X * Revert env var (cherry picked from commit 74263dfed0aaf9313079bc34dbf6f5e864ebc6a2) --- benchmark/benchmark_runner.py | 48 ++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/benchmark/benchmark_runner.py b/benchmark/benchmark_runner.py index 68aef463494..d81c44e84ec 100644 --- a/benchmark/benchmark_runner.py +++ b/benchmark/benchmark_runner.py @@ -267,16 +267,47 @@ def _get_git_revision_hash(): return subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode("utf-8").strip() except: return None + +def _get_commit_message(): + try: + return subprocess.check_output(['git', 'log', '-1', '--pretty=%B']).decode("utf-8").strip() + except: + return None +def _get_commit_author(): + try: + return subprocess.check_output(['git', 'log', '-1', "--pretty=%an"]).decode("utf-8").strip() + except: + return None + +def _get_commit_email(): + try: + return subprocess.check_output(['git', 'log', '-1', "--pretty=%ae"]).decode("utf-8").strip() + except: + return None def get_run_info(): + commit = { + 'hash': os.environ.get('GITHUB_SHA', _get_git_revision_hash()), + 'author': _get_commit_author(), + 'email': _get_commit_email(), + 'message': _get_commit_message() + } return { - 'commit_id': os.environ.get('GITHUB_SHA', _get_git_revision_hash()), - 'run_timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + 'commit': commit, + 'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'note': args.note, 'dataset': args.dataset } +def get_total_files_size(path): + total_size = 0 + for dirpath, _, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + total_size += os.path.getsize(fp) + return total_size + def get_query_info(): results = [] @@ -294,10 +325,11 @@ def get_query_info(): return results -def upload_benchmark_result(): +def upload_benchmark_result(database_size=None): run = get_run_info() queries = get_query_info() - run['queries'] = queries + run['benchmarks'] = queries + run['database_size'] = database_size response = requests.post( benchmark_server_url, json=run, headers={ @@ -333,14 +365,18 @@ def upload_benchmark_result(): benchmark_group.load() logging.info("Running benchmark...") - run_kuzu(serialized_graphs_path[args.dataset + '-ku']) + serialized_graph_path = serialized_graphs_path[args.dataset + '-ku'] + run_kuzu(serialized_graph_path) logging.info("Benchmark finished") + total_size = get_total_files_size(serialized_graph_path) + logging.info("Serialized dataset size: %d MiB", total_size / 1024 ** 2) + if is_dry_run: logging.info("Dry run, skipping upload") sys.exit(0) # upload benchmark result and logs logging.info("Uploading benchmark result...") - upload_benchmark_result() + upload_benchmark_result(total_size) logging.info("Benchmark result uploaded")