From ec97c4cbc0e147b1dcf0da883faf33bb32a4abac Mon Sep 17 00:00:00 2001 From: Franciszek Stachura Date: Fri, 7 Feb 2025 13:21:49 +0100 Subject: [PATCH] update: Implement chunksize calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code by Théo Lebrun --- elixir/update.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/elixir/update.py b/elixir/update.py index 01064e33..7b94052d 100644 --- a/elixir/update.py +++ b/elixir/update.py @@ -1,5 +1,5 @@ from concurrent.futures import ProcessPoolExecutor, wait -from multiprocessing import Manager +from multiprocessing import Manager, cpu_count import logging from threading import Lock @@ -327,12 +327,14 @@ def split_into_chunks(list, chunk_size): return [list[i:i+chunk_size] for i in range(0, len(list), chunk_size)] # Update a single version -def update_version(db, tag, pool, manager, chunk_size, dts_comp_support): +def update_version(db, tag, pool, manager, dts_comp_support): state = build_partial_state(db, tag) # Collect blobs to process and split list of blobs into chunks idxes = [(idx, hash, filename) for (idx, (hash, filename)) in state.idx_to_hash_and_filename.items()] - chunks = split_into_chunks(idxes, chunk_size) + chunksize = int(len(idxes) / cpu_count()) + chunksize = min(max(1, chunksize), 400) + chunks = split_into_chunks(idxes, chunksize) def after_all_defs_done(): # NOTE: defs database cannot be written to from now on. This is very important - process pool is used, @@ -425,7 +427,7 @@ def after_all_comps_done(): if not db.vers.exists(tag): print("updating tag", tag) - update_version(db, tag, pool, manager, 1000, dts_comp_support) + update_version(db, tag, pool, manager, dts_comp_support) db.close() db = None