Skip to content

Commit

Permalink
Switch to conbenchlegacy
Browse files Browse the repository at this point in the history
  • Loading branch information
alistaire47 committed Aug 22, 2023
1 parent 19a08f4 commit 2d98a14
Show file tree
Hide file tree
Showing 22 changed files with 62 additions and 73 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ jobs:
src: .
- name: Install Python dependencies
run: |
pip install -e .
pip install -r requirements-dev.txt
pip install https://github.com/ursacomputing/conbench/archive/main.zip
pip install coveralls
pip install -e '.[dev]'
- name: Lint (flake8)
run: |
flake8
Expand Down
26 changes: 8 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Arrow C++, Java, and JavaScript micro benchmarks (which are found in the
[arrow](https://github.com/apache/arrow) repository), and the Arrow R macro
benchmarks (which are found in the
[arrowbench](https://github.com/voltrondata-labs/arrowbench) repository). These
benchmarks use the [Conbench runner](https://github.com/ursacomputing/conbench)
benchmarks use the [Conbench legacy runner](https://github.com/conbench/conbench/tree/main/legacy)
for benchmark execution, and the results are published to Arrow's public
[Conbench server](https://conbench.ursa.dev/).

Expand Down Expand Up @@ -87,15 +87,13 @@ defaults or to disable a particular benchmark.
### Clone repos
(qa) $ cd ~/workspace/
(qa) $ git clone https://github.com/voltrondata-labs/benchmarks.git
(qa) $ git clone https://github.com/ursacomputing/conbench.git
(qa) $ git clone https://github.com/apache/arrow.git
(qa) $ export ARROW_SRC=$(pwd)/arrow


### Install benchmarks dependencies
### Install voltrondata-labs/benchmarks
(qa) $ cd ~/workspace/benchmarks/
(qa) $ pip install -r requirements-dev.txt
(qa) $ pip install -e .
(qa) $ pip install -e '.[dev]'


### Install arrowbench (to run R benchmarks)
Expand All @@ -109,14 +107,6 @@ defaults or to disable a particular benchmark.
(qa) $ pip install -e arrow/dev/archery


### Install conbench dependencies
(qa) $ cd ~/workspace/conbench/
(qa) $ pip install -r requirements-test.txt
(qa) $ pip install -r requirements-build.txt
(qa) $ pip install -r requirements-cli.txt
(qa) $ pip install .


### Conbench credentials default to this following (edit .conbench to configure)

(This is only needed if you plan on publishing benchmark results to a Conbench server.)
Expand Down Expand Up @@ -362,11 +352,11 @@ A "simple benchmark" runs and records the execution time of a unit of work.

Implementation details: Note that this benchmark extends
`benchmarks._benchmark.Benchmark`, implements the minimum required `run()`
method, and registers itself with the `@conbench.runner.register_benchmark`
method, and registers itself with the `@conbenchlegacy.runner.register_benchmark`
decorator.

```python
@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class SimpleBenchmark(_benchmark.Benchmark):
"""Example benchmark without cases."""

Expand Down Expand Up @@ -420,7 +410,7 @@ Implementation details: Note that the following benchmark sets
example above does.

```python
@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class ExternalBenchmark(_benchmark.Benchmark):
"""Example benchmark that just records external results."""

Expand Down Expand Up @@ -473,7 +463,7 @@ sets both `external` and `r_only` to `True`, defines `r_name`, implements
`record()`.

```python
@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class WithoutPythonBenchmark(_benchmark.BenchmarkR):
"""Example R benchmark that doesn't have a Python equivalent."""

Expand Down Expand Up @@ -531,7 +521,7 @@ the cases names).


```python
@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class CasesBenchmark(_benchmark.Benchmark):
"""Example benchmark with cases."""

Expand Down
10 changes: 5 additions & 5 deletions benchmarks/_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import conbench.runner
import conbenchlegacy.runner
import pyarrow
from benchclients import ConbenchClient

Expand Down Expand Up @@ -54,7 +54,7 @@ def arrow_info() -> Dict[str, Any]:
}


class ConbenchCommunicator(conbench.runner.Conbench):
class ConbenchCommunicator(conbenchlegacy.runner.Conbench):
"""Exactly the same as the legacy "Conbench" communication object, with the
publish() method overridden to use the new retrying client.
"""
Expand All @@ -80,7 +80,7 @@ def publish(self, benchmark: dict) -> None:
self.conbench_client.post("/benchmark-results/", benchmark)


class Benchmark(conbench.runner.Benchmark):
class Benchmark(conbenchlegacy.runner.Benchmark):
arguments = []
options = {"cpu_count": {"type": int}}

Expand Down Expand Up @@ -396,8 +396,8 @@ class BenchmarkPythonR(BenchmarkR):
}


@conbench.runner.register_list
class BenchmarkList(conbench.runner.BenchmarkList):
@conbenchlegacy.runner.register_list
class BenchmarkList(conbenchlegacy.runner.BenchmarkList):
def list(self, classes: Dict[str, Benchmark]) -> List[Benchmark]:
"""List of benchmarks to run for all cases & all sources."""

Expand Down
18 changes: 9 additions & 9 deletions benchmarks/_example_benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import itertools

import conbench.runner
import conbenchlegacy.runner

from benchmarks import _benchmark


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class SimpleBenchmark(_benchmark.Benchmark):
"""Example benchmark without cases."""

Expand All @@ -20,7 +20,7 @@ def _get_benchmark_function(self):
return lambda: 1 + 1


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class ExternalBenchmark(_benchmark.Benchmark):
"""Example benchmark that just records external results."""

Expand Down Expand Up @@ -49,7 +49,7 @@ def run(self, **kwargs):
)


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class WithoutPythonBenchmark(_benchmark.BenchmarkR):
"""Example R benchmark that doesn't have a Python equivalent."""

Expand All @@ -69,7 +69,7 @@ def _get_r_command(self, options):
)


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class CasesBenchmark(_benchmark.Benchmark):
"""Example benchmark with cases."""

Expand Down Expand Up @@ -103,7 +103,7 @@ def _get_benchmark_function(self, rows, columns):
return lambda: int(rows) * [int(columns) * [0]]


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class SimpleBenchmarkException(_benchmark.Benchmark):
name = "example-simple-exception"

Expand All @@ -116,7 +116,7 @@ def _get_benchmark_function(self):
return lambda: 100 / 0


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class BenchmarkNonexistentR(_benchmark.BenchmarkR):
name, r_name = "example-R-only-nonexistent", "foo"

Expand All @@ -129,7 +129,7 @@ def _get_r_command(self):
return f"library(arrowbench); run_one(arrowbench:::{self.r_name})"


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class BenchmarkPlaceboR(_benchmark.BenchmarkR):
name, r_name = "example-R-only-exception", "placebo"

Expand Down Expand Up @@ -160,7 +160,7 @@ def _get_r_command(self, case: tuple) -> str:
return command


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class CasesBenchmarkException(_benchmark.Benchmark):
name = "example-cases-exception"
valid_cases = (
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/cpp_micro_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from typing import List

import conbench.runner
import conbenchlegacy.runner
from benchadapt.adapters import ArcheryAdapter
from benchadapt.log import log

Expand Down Expand Up @@ -81,7 +81,7 @@ def _add_command_options(command: List[str], options: dict):
command.extend([f"--{option}", value])


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class RecordCppMicroBenchmarks(_benchmark.Benchmark):
"""Run the Arrow C++ micro benchmarks."""

Expand Down
6 changes: 3 additions & 3 deletions benchmarks/csv_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import itertools
from typing import Callable

import conbench.runner
import conbenchlegacy.runner
import pyarrow.csv

from benchmarks import _benchmark, _sources
Expand Down Expand Up @@ -67,7 +67,7 @@ def _case_to_param_dict(self, case: tuple) -> dict:
return params


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class CsvReadBenchmark(CsvBenchmark):
"""Read CSV file."""

Expand Down Expand Up @@ -114,7 +114,7 @@ def read_file():
return read_streaming if streaming == "streaming" else read_file


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class CsvWriteBenchmark(CsvBenchmark):
"""Write CSV file."""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/dataframe_to_table_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import conbench.runner
import conbenchlegacy.runner
import pyarrow
import pyarrow.parquet as parquet

from benchmarks import _benchmark


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class DataframeToTableBenchmark(_benchmark.BenchmarkPythonR):
"""Convert a pandas dataframe to an arrow table."""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/dataset_filter_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import conbench.runner
import conbenchlegacy.runner
import pyarrow.dataset

from benchmarks import _benchmark


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class DatasetFilterBenchmark(_benchmark.Benchmark):
"""Read and filter a dataset."""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/dataset_read_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import conbench.runner
import conbenchlegacy.runner
import pyarrow
import pyarrow.dataset
import pyarrow.fs

from benchmarks import _benchmark


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class DatasetReadBenchmark(_benchmark.Benchmark):
"""Read many S3 parquet files into an arrow table."""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/dataset_select_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import conbench.runner
import conbenchlegacy.runner
import pyarrow.dataset

from benchmarks import _benchmark


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class DatasetSelectBenchmark(_benchmark.Benchmark):
"""Read and filter a dataset on partition expressions."""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/dataset_selectivity_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import conbench.runner
import conbenchlegacy.runner
import pyarrow.dataset as ds

from benchmarks import _benchmark


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class DatasetSelectivityBenchmark(_benchmark.Benchmark):
"""Read and filter a dataset with different selectivity."""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/dataset_serialize_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import time
import uuid

import conbench.runner
import conbenchlegacy.runner
import pyarrow
import pyarrow.dataset as ds

Expand All @@ -21,7 +21,7 @@
OUTPUT_DIR_PREFIX = os.path.join("/dev/shm/", "bench-" + str(uuid.uuid4())[:8])


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class DatasetSerializeBenchmark(_benchmark.Benchmark):
"""
This benchmark is supposed to measure the time it takes to write data from
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/file_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import time

import conbench.runner
import conbenchlegacy.runner
import pyarrow
import pyarrow.feather as feather
import pyarrow.parquet as parquet
Expand Down Expand Up @@ -66,7 +66,7 @@ def _get_r_command(self, source, case, options):
)


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class FileReadBenchmark(FileBenchmark):
"""Read parquet & feather files to arrow tables & pandas data frames."""

Expand Down Expand Up @@ -94,7 +94,7 @@ def _get_benchmark_function(self, source, case):
return f


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class FileWriteBenchmark(FileBenchmark):
"""Write parquet & feather files from arrow tables & pandas data frames."""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/filesystem_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import conbench.runner
import conbenchlegacy.runner
import pyarrow.dataset as ds

from benchmarks._benchmark import Benchmark
Expand All @@ -8,7 +8,7 @@ def run_get_file_info(dataset_uri):
ds.dataset(dataset_uri, format="parquet")


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class GetFileInfoBenchmark(Benchmark):
"""Recursively list all files"""

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/java_micro_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import tempfile

import conbench.runner
import conbenchlegacy.runner

from benchmarks import _benchmark

Expand Down Expand Up @@ -78,7 +78,7 @@ def _parse_benchmark_name(full_name):
return suite, name


@conbench.runner.register_benchmark
@conbenchlegacy.runner.register_benchmark
class RecordJavaMicroBenchmarks(_benchmark.Benchmark):
"""Run the Arrow Java micro benchmarks."""

Expand Down
Loading

0 comments on commit 2d98a14

Please sign in to comment.