Feat/benchmark upgrade (#397)

# Description Instead of reducing the number of dialog turns reduce the number of contexts. That way we can still stress-test the db with some absurd configs (such as 10000 dialog turns) and also test with actually long dialogs (500 turns for large-misc-long-dialog). # Checklist - [x] I have performed a self-review of the changes *List here tasks to complete in order to mark this PR as ready for review.* # To Consider - Add tests (if functionality is changed) - Update API reference / tutorials / guides - Update CONTRIBUTING.md (if devel workflow is changed) - Update `.ignore` files, scripts (such as `lint`), distribution manifest (if files are added/deleted) - Search for references to changed entities in the codebase
deeppavlov · Oct 31, 2024 · bd0c535 · bd0c535
1 parent aedd336
commit bd0c535
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 32 deletions.
diff --git a/chatsky/utils/db_benchmark/basic_config.py b/chatsky/utils/db_benchmark/basic_config.py
@@ -91,14 +91,14 @@ class BasicBenchmarkConfig(BenchmarkConfig, frozen=True):
     Dialog length is configured using `from_dialog_len`, `to_dialog_len`, `step_dialog_len`.
     """
 
-    context_num: int = 30
+    context_num: int = 1
     """
     Number of times the contexts will be benchmarked.
     Increasing this number decreases standard error of the mean for benchmarked data.
     """
-    from_dialog_len: int = 25
+    from_dialog_len: int = 50
     """Starting dialog len of a context."""
-    to_dialog_len: int = 50
+    to_dialog_len: int = 75
     """
     Final dialog len of a context.
     :py:meth:`~.BasicBenchmarkConfig.context_updater` will return contexts
@@ -177,7 +177,7 @@ def context_updater(self, context: Context) -> Optional[Context]:
 basic_configurations = {
     "large-misc": BasicBenchmarkConfig(
         from_dialog_len=1,
-        to_dialog_len=26,
+        to_dialog_len=50,
         message_dimensions=(3, 5, 6, 5, 3),
         misc_dimensions=(2, 4, 3, 8, 100),
     ),
@@ -187,24 +187,21 @@ def context_updater(self, context: Context) -> Optional[Context]:
     ),
     "default": BasicBenchmarkConfig(),
     "large-misc-long-dialog": BasicBenchmarkConfig(
-        from_dialog_len=50,
-        to_dialog_len=75,
+        from_dialog_len=500,
+        to_dialog_len=510,
         message_dimensions=(3, 5, 6, 5, 3),
         misc_dimensions=(2, 4, 3, 8, 100),
     ),
     "very-long-dialog-len": BasicBenchmarkConfig(
-        context_num=10,
-        from_dialog_len=1000,
-        to_dialog_len=1050,
+        from_dialog_len=10000,
+        to_dialog_len=10010,
     ),
     "very-long-message-len": BasicBenchmarkConfig(
-        context_num=10,
         from_dialog_len=1,
         to_dialog_len=3,
         message_dimensions=(10000, 1),
     ),
     "very-long-misc-len": BasicBenchmarkConfig(
-        context_num=10,
         from_dialog_len=1,
         to_dialog_len=3,
         misc_dimensions=(10000, 1),

diff --git a/chatsky/utils/db_benchmark/benchmark.py b/chatsky/utils/db_benchmark/benchmark.py
@@ -61,9 +61,6 @@ def time_context_read_write(
         The function should return `None` to stop updating contexts.
         For an example of such function, see implementation of
         :py:meth:`chatsky.utils.db_benchmark.basic_config.BasicBenchmarkConfig.context_updater`.
-
-        To avoid keeping many contexts in memory,
-        this function will be called repeatedly at least `context_num` times.
     :return:
         A tuple of 3 elements.
 
@@ -87,7 +84,7 @@ def time_context_read_write(
     read_times: List[Dict[int, float]] = []
     update_times: List[Dict[int, float]] = []
 
-    for _ in tqdm(range(context_num), desc=f"Benchmarking context storage:{context_storage.full_path}", leave=False):
+    for _ in tqdm(range(context_num), desc="Iteration", leave=False):
         context = context_factory()
 
         ctx_id = uuid4()
@@ -102,25 +99,25 @@ def time_context_read_write(
 
         # read operation benchmark
         read_start = perf_counter()
-        _ = context_storage[ctx_id]
+        context = context_storage[ctx_id]
         read_time = perf_counter() - read_start
         read_times[-1][len(context.labels)] = read_time
 
         if context_updater is not None:
-            updated_context = context_updater(context)
+            context = context_updater(context)
 
-            while updated_context is not None:
+            while context is not None:
                 update_start = perf_counter()
-                context_storage[ctx_id] = updated_context
+                context_storage[ctx_id] = context
                 update_time = perf_counter() - update_start
-                update_times[-1][len(updated_context.labels)] = update_time
+                update_times[-1][len(context.labels)] = update_time
 
                 read_start = perf_counter()
-                _ = context_storage[ctx_id]
+                context = context_storage[ctx_id]
                 read_time = perf_counter() - read_start
-                read_times[-1][len(updated_context.labels)] = read_time
+                read_times[-1][len(context.labels)] = read_time
 
-                updated_context = context_updater(updated_context)
+                context = context_updater(context)
 
         context_storage.clear()
     return write_times, read_times, update_times
@@ -160,7 +157,7 @@ class BenchmarkConfig(BaseModel, abc.ABC, frozen=True):
     Inherit from this class only if `BasicBenchmarkConfig` is not enough for your benchmarking needs.
     """
 
-    context_num: int = 30
+    context_num: int = 1
     """
     Number of times the contexts will be benchmarked.
     Increasing this number decreases standard error of the mean for benchmarked data.

diff --git a/utils/db_benchmark/benchmark_dbs.py b/utils/db_benchmark/benchmark_dbs.py
@@ -9,6 +9,8 @@
 
 from chatsky.utils.db_benchmark import benchmark_all, basic_configurations
 
+from tqdm.auto import tqdm
+
 
 # benchmarks will be saved to this directory
 benchmark_dir = Path("benchmarks")
@@ -36,11 +38,14 @@
 }
 
 
-for db_name, db_uri in dbs.items():
-    benchmark_all(
-        benchmark_dir / f"{db_name}.json",
-        db_name,
-        description="Basic configs",
-        db_uri=db_uri,
-        benchmark_configs=basic_configurations,
-    )
+if __name__ == "__main__":
+    cases = tqdm(dbs.items(), leave=False)
+    for db_name, db_uri in cases:
+        cases.set_description(f"Benchmarking: {db_name}")
+        benchmark_all(
+            benchmark_dir / f"{db_name}.json",
+            db_name,
+            description="Basic configs",
+            db_uri=db_uri,
+            benchmark_configs=basic_configurations,
+        )