diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
index 2859ef05..5828d3a6 100644
--- a/.github/workflows/actions.yml
+++ b/.github/workflows/actions.yml
@@ -26,10 +26,7 @@ jobs:
           src: .
       - name: Install Python dependencies
         run: |
-          pip install -e .
-          pip install -r requirements-dev.txt
-          pip install https://github.com/ursacomputing/conbench/archive/main.zip
-          pip install coveralls
+          pip install -e '.[dev]'
       - name: Lint (flake8)
         run: |
           flake8
diff --git a/README.md b/README.md
index 3f0f2a58..8d757a27 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Arrow C++, Java, and JavaScript micro benchmarks (which are found in the
 [arrow](https://github.com/apache/arrow) repository), and the Arrow R macro
 benchmarks (which are found in the
 [arrowbench](https://github.com/voltrondata-labs/arrowbench) repository). These
-benchmarks use the [Conbench runner](https://github.com/ursacomputing/conbench)
+benchmarks use the [Conbench legacy runner](https://github.com/conbench/conbench/tree/main/legacy)
 for benchmark execution, and the results are published to Arrow's public
 [Conbench server](https://conbench.ursa.dev/).
 
@@ -87,15 +87,13 @@ defaults or to disable a particular benchmark.
 ### Clone repos
     (qa) $ cd ~/workspace/
     (qa) $ git clone https://github.com/voltrondata-labs/benchmarks.git
-    (qa) $ git clone https://github.com/ursacomputing/conbench.git
     (qa) $ git clone https://github.com/apache/arrow.git
     (qa) $ export ARROW_SRC=$(pwd)/arrow
 
 
-### Install benchmarks dependencies
+### Install voltrondata-labs/benchmarks
     (qa) $ cd ~/workspace/benchmarks/
-    (qa) $ pip install -r requirements-dev.txt
-    (qa) $ pip install -e .
+    (qa) $ pip install -e '.[dev]'
 
 
 ### Install arrowbench (to run R benchmarks)
@@ -109,14 +107,6 @@ defaults or to disable a particular benchmark.
     (qa) $ pip install -e arrow/dev/archery
 
 
-### Install conbench dependencies
-    (qa) $ cd ~/workspace/conbench/
-    (qa) $ pip install -r requirements-test.txt
-    (qa) $ pip install -r requirements-build.txt
-    (qa) $ pip install -r requirements-cli.txt
-    (qa) $ pip install .
-
-
 ### Conbench credentials default to this following (edit .conbench to configure)
 
 (This is only needed if you plan on publishing benchmark results to a Conbench server.)
@@ -362,11 +352,11 @@ A "simple benchmark" runs and records the execution time of a unit of work.
 
 Implementation details: Note that this benchmark extends
 `benchmarks._benchmark.Benchmark`, implements the minimum required `run()`
-method, and registers itself with the `@conbench.runner.register_benchmark`
+method, and registers itself with the `@conbenchlegacy.runner.register_benchmark`
 decorator.
 
 ```python
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class SimpleBenchmark(_benchmark.Benchmark):
     """Example benchmark without cases."""
 
@@ -420,7 +410,7 @@ Implementation details: Note that the following benchmark sets
 example above does.
 
 ```python
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class ExternalBenchmark(_benchmark.Benchmark):
     """Example benchmark that just records external results."""
 
@@ -473,7 +463,7 @@ sets both `external` and `r_only` to `True`, defines `r_name`, implements
 `record()`.
 
 ```python
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class WithoutPythonBenchmark(_benchmark.BenchmarkR):
     """Example R benchmark that doesn't have a Python equivalent."""
 
@@ -531,7 +521,7 @@ the cases names).
 
 
 ```python
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class CasesBenchmark(_benchmark.Benchmark):
     """Example benchmark with cases."""
 
diff --git a/benchmarks/_benchmark.py b/benchmarks/_benchmark.py
index ac3e628b..1250b728 100644
--- a/benchmarks/_benchmark.py
+++ b/benchmarks/_benchmark.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow
 from benchclients import ConbenchClient
 
@@ -54,7 +54,7 @@ def arrow_info() -> Dict[str, Any]:
     }
 
 
-class ConbenchCommunicator(conbench.runner.Conbench):
+class ConbenchCommunicator(conbenchlegacy.runner.Conbench):
     """Exactly the same as the legacy "Conbench" communication object, with the
     publish() method overridden to use the new retrying client.
     """
@@ -80,7 +80,7 @@ def publish(self, benchmark: dict) -> None:
         self.conbench_client.post("/benchmark-results/", benchmark)
 
 
-class Benchmark(conbench.runner.Benchmark):
+class Benchmark(conbenchlegacy.runner.Benchmark):
     arguments = []
     options = {"cpu_count": {"type": int}}
 
@@ -396,8 +396,8 @@ class BenchmarkPythonR(BenchmarkR):
     }
 
 
-@conbench.runner.register_list
-class BenchmarkList(conbench.runner.BenchmarkList):
+@conbenchlegacy.runner.register_list
+class BenchmarkList(conbenchlegacy.runner.BenchmarkList):
     def list(self, classes: Dict[str, Benchmark]) -> List[Benchmark]:
         """List of benchmarks to run for all cases & all sources."""
 
diff --git a/benchmarks/_example_benchmarks.py b/benchmarks/_example_benchmarks.py
index affc9afd..1021d948 100644
--- a/benchmarks/_example_benchmarks.py
+++ b/benchmarks/_example_benchmarks.py
@@ -1,11 +1,11 @@
 import itertools
 
-import conbench.runner
+import conbenchlegacy.runner
 
 from benchmarks import _benchmark
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class SimpleBenchmark(_benchmark.Benchmark):
     """Example benchmark without cases."""
 
@@ -20,7 +20,7 @@ def _get_benchmark_function(self):
         return lambda: 1 + 1
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class ExternalBenchmark(_benchmark.Benchmark):
     """Example benchmark that just records external results."""
 
@@ -49,7 +49,7 @@ def run(self, **kwargs):
         )
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class WithoutPythonBenchmark(_benchmark.BenchmarkR):
     """Example R benchmark that doesn't have a Python equivalent."""
 
@@ -69,7 +69,7 @@ def _get_r_command(self, options):
         )
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class CasesBenchmark(_benchmark.Benchmark):
     """Example benchmark with cases."""
 
@@ -103,7 +103,7 @@ def _get_benchmark_function(self, rows, columns):
         return lambda: int(rows) * [int(columns) * [0]]
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class SimpleBenchmarkException(_benchmark.Benchmark):
     name = "example-simple-exception"
 
@@ -116,7 +116,7 @@ def _get_benchmark_function(self):
         return lambda: 100 / 0
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class BenchmarkNonexistentR(_benchmark.BenchmarkR):
     name, r_name = "example-R-only-nonexistent", "foo"
 
@@ -129,7 +129,7 @@ def _get_r_command(self):
         return f"library(arrowbench); run_one(arrowbench:::{self.r_name})"
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class BenchmarkPlaceboR(_benchmark.BenchmarkR):
     name, r_name = "example-R-only-exception", "placebo"
 
@@ -160,7 +160,7 @@ def _get_r_command(self, case: tuple) -> str:
         return command
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class CasesBenchmarkException(_benchmark.Benchmark):
     name = "example-cases-exception"
     valid_cases = (
diff --git a/benchmarks/cpp_micro_benchmarks.py b/benchmarks/cpp_micro_benchmarks.py
index 94be1f29..72c75397 100644
--- a/benchmarks/cpp_micro_benchmarks.py
+++ b/benchmarks/cpp_micro_benchmarks.py
@@ -3,7 +3,7 @@
 import os
 from typing import List
 
-import conbench.runner
+import conbenchlegacy.runner
 from benchadapt.adapters import ArcheryAdapter
 from benchadapt.log import log
 
@@ -81,7 +81,7 @@ def _add_command_options(command: List[str], options: dict):
             command.extend([f"--{option}", value])
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class RecordCppMicroBenchmarks(_benchmark.Benchmark):
     """Run the Arrow C++ micro benchmarks."""
 
diff --git a/benchmarks/csv_benchmark.py b/benchmarks/csv_benchmark.py
index 36d37bc6..4fc22123 100644
--- a/benchmarks/csv_benchmark.py
+++ b/benchmarks/csv_benchmark.py
@@ -1,7 +1,7 @@
 import itertools
 from typing import Callable
 
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow.csv
 
 from benchmarks import _benchmark, _sources
@@ -67,7 +67,7 @@ def _case_to_param_dict(self, case: tuple) -> dict:
         return params
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class CsvReadBenchmark(CsvBenchmark):
     """Read CSV file."""
 
@@ -114,7 +114,7 @@ def read_file():
         return read_streaming if streaming == "streaming" else read_file
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class CsvWriteBenchmark(CsvBenchmark):
     """Write CSV file."""
 
diff --git a/benchmarks/dataframe_to_table_benchmark.py b/benchmarks/dataframe_to_table_benchmark.py
index f1fa3fe8..0cba290e 100644
--- a/benchmarks/dataframe_to_table_benchmark.py
+++ b/benchmarks/dataframe_to_table_benchmark.py
@@ -1,11 +1,11 @@
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow
 import pyarrow.parquet as parquet
 
 from benchmarks import _benchmark
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class DataframeToTableBenchmark(_benchmark.BenchmarkPythonR):
     """Convert a pandas dataframe to an arrow table."""
 
diff --git a/benchmarks/dataset_filter_benchmark.py b/benchmarks/dataset_filter_benchmark.py
index e3e4fac2..0859d9bb 100644
--- a/benchmarks/dataset_filter_benchmark.py
+++ b/benchmarks/dataset_filter_benchmark.py
@@ -1,10 +1,10 @@
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow.dataset
 
 from benchmarks import _benchmark
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class DatasetFilterBenchmark(_benchmark.Benchmark):
     """Read and filter a dataset."""
 
diff --git a/benchmarks/dataset_read_benchmark.py b/benchmarks/dataset_read_benchmark.py
index dde574b6..388b7308 100644
--- a/benchmarks/dataset_read_benchmark.py
+++ b/benchmarks/dataset_read_benchmark.py
@@ -1,4 +1,4 @@
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow
 import pyarrow.dataset
 import pyarrow.fs
@@ -6,7 +6,7 @@
 from benchmarks import _benchmark
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class DatasetReadBenchmark(_benchmark.Benchmark):
     """Read many S3 parquet files into an arrow table."""
 
diff --git a/benchmarks/dataset_select_benchmark.py b/benchmarks/dataset_select_benchmark.py
index b4cbde0e..c91895e3 100644
--- a/benchmarks/dataset_select_benchmark.py
+++ b/benchmarks/dataset_select_benchmark.py
@@ -1,10 +1,10 @@
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow.dataset
 
 from benchmarks import _benchmark
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class DatasetSelectBenchmark(_benchmark.Benchmark):
     """Read and filter a dataset on partition expressions."""
 
diff --git a/benchmarks/dataset_selectivity_benchmark.py b/benchmarks/dataset_selectivity_benchmark.py
index 77441600..2b01172c 100644
--- a/benchmarks/dataset_selectivity_benchmark.py
+++ b/benchmarks/dataset_selectivity_benchmark.py
@@ -1,10 +1,10 @@
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow.dataset as ds
 
 from benchmarks import _benchmark
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class DatasetSelectivityBenchmark(_benchmark.Benchmark):
     """Read and filter a dataset with different selectivity."""
 
diff --git a/benchmarks/dataset_serialize_benchmark.py b/benchmarks/dataset_serialize_benchmark.py
index ab13170b..1ab96c7c 100644
--- a/benchmarks/dataset_serialize_benchmark.py
+++ b/benchmarks/dataset_serialize_benchmark.py
@@ -7,7 +7,7 @@
 import time
 import uuid
 
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow
 import pyarrow.dataset as ds
 
@@ -21,7 +21,7 @@
 OUTPUT_DIR_PREFIX = os.path.join("/dev/shm/", "bench-" + str(uuid.uuid4())[:8])
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class DatasetSerializeBenchmark(_benchmark.Benchmark):
     """
     This benchmark is supposed to measure the time it takes to write data from
diff --git a/benchmarks/file_benchmark.py b/benchmarks/file_benchmark.py
index b68d7608..53e9af5c 100644
--- a/benchmarks/file_benchmark.py
+++ b/benchmarks/file_benchmark.py
@@ -1,6 +1,6 @@
 import time
 
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow
 import pyarrow.feather as feather
 import pyarrow.parquet as parquet
@@ -66,7 +66,7 @@ def _get_r_command(self, source, case, options):
         )
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class FileReadBenchmark(FileBenchmark):
     """Read parquet & feather files to arrow tables & pandas data frames."""
 
@@ -94,7 +94,7 @@ def _get_benchmark_function(self, source, case):
         return f
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class FileWriteBenchmark(FileBenchmark):
     """Write parquet & feather files from arrow tables & pandas data frames."""
 
diff --git a/benchmarks/filesystem_benchmark.py b/benchmarks/filesystem_benchmark.py
index 2d1233e1..1a4366e9 100644
--- a/benchmarks/filesystem_benchmark.py
+++ b/benchmarks/filesystem_benchmark.py
@@ -1,4 +1,4 @@
-import conbench.runner
+import conbenchlegacy.runner
 import pyarrow.dataset as ds
 
 from benchmarks._benchmark import Benchmark
@@ -8,7 +8,7 @@ def run_get_file_info(dataset_uri):
     ds.dataset(dataset_uri, format="parquet")
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class GetFileInfoBenchmark(Benchmark):
     """Recursively list all files"""
 
diff --git a/benchmarks/java_micro_benchmarks.py b/benchmarks/java_micro_benchmarks.py
index 0375d4e4..d2c4ba02 100644
--- a/benchmarks/java_micro_benchmarks.py
+++ b/benchmarks/java_micro_benchmarks.py
@@ -2,7 +2,7 @@
 import json
 import tempfile
 
-import conbench.runner
+import conbenchlegacy.runner
 
 from benchmarks import _benchmark
 
@@ -78,7 +78,7 @@ def _parse_benchmark_name(full_name):
     return suite, name
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class RecordJavaMicroBenchmarks(_benchmark.Benchmark):
     """Run the Arrow Java micro benchmarks."""
 
diff --git a/benchmarks/js_micro_benchmarks.py b/benchmarks/js_micro_benchmarks.py
index 3fda98ac..e39af9f2 100644
--- a/benchmarks/js_micro_benchmarks.py
+++ b/benchmarks/js_micro_benchmarks.py
@@ -2,7 +2,7 @@
 import os
 import pathlib
 
-import conbench.runner
+import conbenchlegacy.runner
 
 from benchmarks import _benchmark
 
@@ -28,7 +28,7 @@ def _parse_benchmark_tags(name):
     return dict(zip(keys, values))
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class RecordJavaScriptMicroBenchmarks(_benchmark.Benchmark):
     """Run the Arrow JavaScript micro benchmarks."""
 
diff --git a/benchmarks/partitioned_dataset_filter_benchmark.py b/benchmarks/partitioned_dataset_filter_benchmark.py
index 05ee504f..74a5b0be 100644
--- a/benchmarks/partitioned_dataset_filter_benchmark.py
+++ b/benchmarks/partitioned_dataset_filter_benchmark.py
@@ -1,9 +1,9 @@
-import conbench.runner
+import conbenchlegacy.runner
 
 from benchmarks import _benchmark
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class PartitionedDatasetFilterBenchmark(_benchmark.BenchmarkR):
     """Read and filter a partitioned dataset."""
 
diff --git a/benchmarks/tests/test_tpch_benchmark.py b/benchmarks/tests/test_tpch_benchmark.py
index 7f20ee5b..bbbd26ef 100644
--- a/benchmarks/tests/test_tpch_benchmark.py
+++ b/benchmarks/tests/test_tpch_benchmark.py
@@ -1,6 +1,6 @@
 import copy
 
-import conbench.runner
+import conbenchlegacy.runner
 import pytest
 
 from .. import tpch_benchmark
@@ -110,7 +110,7 @@ def test_benchmark_r():
 
 def test_cli():
     if (
-        int(conbench.runner.machine_info(None)["memory_bytes"])
+        int(conbenchlegacy.runner.machine_info(None)["memory_bytes"])
         > 1.1 * 32 * 1024 * 1024 * 1024
     ):
         pytest.skip("CLI options are different on machines with more than 32GB RAM")
diff --git a/benchmarks/tpch_benchmark.py b/benchmarks/tpch_benchmark.py
index c502aac0..59993c02 100644
--- a/benchmarks/tpch_benchmark.py
+++ b/benchmarks/tpch_benchmark.py
@@ -1,4 +1,4 @@
-import conbench.runner
+import conbenchlegacy.runner
 
 from benchmarks import _benchmark
 
@@ -7,7 +7,7 @@ def get_valid_cases():
     result = [["query_id", "scale_factor", "format"]]
     scale_factors = [1, 10]
 
-    machine_info = conbench.runner.machine_info(host_name=None)
+    machine_info = conbenchlegacy.runner.machine_info(host_name=None)
     # scale_factor=10 runs on machines with 64GB of memory, but not 32.
     # (Specifically, query 21 will fail with 32GB of memory.)
     # The exact amount of memory needed for all queries to pass is for now unknown.
@@ -21,7 +21,7 @@ def get_valid_cases():
     return result
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class TpchBenchmark(_benchmark.BenchmarkR):
     external, r_only = True, True
     name, r_name = "tpch", "tpc_h"
diff --git a/benchmarks/wide_dataframe_benchmark.py b/benchmarks/wide_dataframe_benchmark.py
index 669ff36c..1a1869ef 100644
--- a/benchmarks/wide_dataframe_benchmark.py
+++ b/benchmarks/wide_dataframe_benchmark.py
@@ -1,7 +1,7 @@
 import os
 import pathlib
 
-import conbench.runner
+import conbenchlegacy.runner
 import numpy
 import pandas
 import pyarrow
@@ -10,7 +10,7 @@
 from benchmarks import _benchmark, _sources
 
 
-@conbench.runner.register_benchmark
+@conbenchlegacy.runner.register_benchmark
 class WideDataframeBenchmark(_benchmark.Benchmark):
     """
     Read wide dataframe from parquet with pandas.
diff --git a/requirements-dev.txt b/requirements-dev.txt
index eab36a83..e36bf870 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,6 @@
 black
 coverage
+coveralls
 flake8
 isort
 pytest
diff --git a/requirements.txt b/requirements.txt
index b1654ed0..76e10a69 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 click
 pandas
 pyarrow
-benchadapt@git+https://github.com/conbench/conbench.git@81a0272cc2#subdirectory=benchadapt/python
-benchclients@git+https://github.com/conbench/conbench.git@81a0272cc2#subdirectory=benchclients/python
+benchadapt@git+https://github.com/conbench/conbench.git@7bbed2b036#subdirectory=benchadapt/python
+benchclients@git+https://github.com/conbench/conbench.git@7bbed2b036#subdirectory=benchclients/python
+conbenchlegacy@git+https://github.com/conbench/conbench.git@7bbed2b036#subdirectory=legacy