address code review

rapidsai · Aug 30, 2024 · 6f3e5c5 · 6f3e5c5
1 parent a15aa35
commit 6f3e5c5
Show file tree

Hide file tree

Showing 7 changed files with 11 additions and 48 deletions.
diff --git a/dask_cuda/benchmarks/local_cudf_groupby.py b/dask_cuda/benchmarks/local_cudf_groupby.py
@@ -7,7 +7,7 @@
 import dask
 import dask.dataframe as dd
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -260,13 +260,6 @@ def parse_args():
             "type": str,
             "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
         {
             "name": "--runs",
             "default": 3,

diff --git a/dask_cuda/benchmarks/local_cudf_merge.py b/dask_cuda/benchmarks/local_cudf_merge.py
@@ -9,7 +9,7 @@
 import dask
 import dask.dataframe as dd
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -335,13 +335,6 @@ def parse_args():
             "action": "store_true",
             "help": "Use shuffle join (takes precedence over '--broadcast-join').",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
         {
             "name": "--frac-match",
             "default": 0.3,

diff --git a/dask_cuda/benchmarks/local_cudf_shuffle.py b/dask_cuda/benchmarks/local_cudf_shuffle.py
@@ -228,13 +228,6 @@ def parse_args():
             "type": str,
             "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
         {
             "name": "--runs",
             "default": 3,

diff --git a/dask_cuda/benchmarks/local_cupy.py b/dask_cuda/benchmarks/local_cupy.py
@@ -8,7 +8,7 @@
 
 from dask import array as da
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -297,13 +297,6 @@ def parse_args():
             "type": int,
             "help": "Chunk size (default 2500).",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB').",
-        },
         {
             "name": "--runs",
             "default": 3,

diff --git a/dask_cuda/benchmarks/local_cupy_map_overlap.py b/dask_cuda/benchmarks/local_cupy_map_overlap.py
@@ -10,7 +10,7 @@
 
 from dask import array as da
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -168,13 +168,6 @@ def parse_args():
             "type": int,
             "help": "Kernel size, 2*k+1, in each dimension (default 1)",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
         {
             "name": "--runs",
             "default": 3,

diff --git a/dask_cuda/benchmarks/local_read_parquet.py → dask_cuda/benchmarks/read_parquet.py b/dask_cuda/benchmarks/local_read_parquet.py → dask_cuda/benchmarks/read_parquet.py
@@ -203,7 +203,6 @@ def parse_args():
         },
         {
             "name": "--path",
-            "default": "s3://dask-cudf-parquet-testing/dedup_parquet",
             "type": str,
             "help": "Parquet directory to read from (must be a flat directory).",
         },
@@ -250,14 +249,6 @@ def parse_args():
             "type": int,
             "help": "Number of runs",
         },
-        # NOTE: The following args are not relevant to this benchmark
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
     ]
 
     return parse_benchmark_args(

diff --git a/dask_cuda/benchmarks/utils.py b/dask_cuda/benchmarks/utils.py
@@ -337,6 +337,13 @@ def parse_benchmark_args(
         "If the files already exist, new files are created with a uniquified "
         "BASENAME.",
     )
+    parser.add_argument(
+        "--ignore-size",
+        default="1 MiB",
+        metavar="nbytes",
+        type=parse_bytes,
+        help="Ignore messages smaller than this (default '1 MB')",
+    )
 
     for args in args_list:
         name = args.pop("name")