Skip to content

Commit

Permalink
Support benchmark type fallback (distributed-system-analysis#3507)
Browse files Browse the repository at this point in the history
PBENCH-1229

The intake code ensures that all dataset (even `server.archiveonly` datasets)
will have a `server.benchmark` value representing the core benchmark used in
the run. The visualization code uses this value to determine compatibility.

However on the staging server, we have pre-existing datasets without the
`server.benchmark` metadata, and the "internal server error" failure mode is
unpleasant.

To be a bit more friendly, this adds a wrapper that will first check the
`server.benchmark` metadata directly, but failing that will check for the
normal Pbench Agent `dataset.metalog.pbench.script` metadata so that older
`pbench-uperf` runs will be recognized. Failing that, the wrapper falls back
to "unknown" so that we'll never have to deal with a `None` value.
  • Loading branch information
dbutenhof authored Jul 28, 2023
1 parent bc7e692 commit fac8d5f
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 14 deletions.
52 changes: 41 additions & 11 deletions lib/pbench/server/api/resources/datasets_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,43 @@
Schema,
)
from pbench.server.cache_manager import CacheManager
from pbench.server.database.models.datasets import Metadata
from pbench.server.database.models.datasets import Dataset, Metadata


class DatasetsCompare(ApiBase):
"""
This class implements the Server API used to retrieve comparison data for visualization.
"""

@staticmethod
def get_benchmark_name(dataset: Dataset) -> str:
"""Convenience function to get dataset's benchmark
The Pbench Server intake constructs a server.benchmark metadata key to
represent the benchmark type. This helper implements a fallback for
datasets processed prior to the implementation of server.benchmark to
avoid null values, by using the Pbench Agent metadata.log "script"
value if that exists and then a constant fallback value.
TODO: This is a workaround from the transition to server.benchmark in
order to cleanly support earlier datasets on the staging server. This
can be removed at some point, but it's not critical.
Args:
dataset: Dataset object
Returns:
A lowercase benchmark identifer string, including the value defined
by Metadata.SERVER_BENCHMARK_UNKNOWN if we can't find a value.
"""
benchmark = Metadata.getvalue(dataset, Metadata.SERVER_BENCHMARK)

if not benchmark:
benchmark = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
if not benchmark:
benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN
return benchmark

def __init__(self, config: PbenchServerConfig):
super().__init__(
config,
Expand Down Expand Up @@ -70,17 +99,8 @@ def _get(
datasets = params.query.get("datasets")
benchmark_choice = None
for dataset in datasets:
benchmark = Metadata.getvalue(dataset, Metadata.SERVER_BENCHMARK)
# Validate if all the selected datasets is of same benchmark
if not benchmark_choice:
benchmark_choice = benchmark
elif benchmark != benchmark_choice:
raise APIAbort(
HTTPStatus.BAD_REQUEST,
f"Selected dataset benchmarks must match: {benchmark_choice} and {benchmark} cannot be compared.",
)

# Validate if the user is authorized to access the selected datasets
# Check that the user is authorized to read each dataset
self._check_authorization(
ApiAuthorization(
ApiAuthorizationType.USER_ACCESS,
Expand All @@ -90,6 +110,16 @@ def _get(
)
)

# Determine the dataset benchmark and check consistency
benchmark = DatasetsCompare.get_benchmark_name(dataset)
if not benchmark_choice:
benchmark_choice = benchmark
elif benchmark != benchmark_choice:
raise APIAbort(
HTTPStatus.BAD_REQUEST,
f"Selected dataset benchmarks must match: {benchmark_choice} and {benchmark} cannot be compared.",
)

benchmark_type = BenchmarkName.__members__.get(benchmark.upper())
if not benchmark_type:
raise APIAbort(
Expand Down
5 changes: 2 additions & 3 deletions lib/pbench/server/api/resources/datasets_visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
ParamType,
Schema,
)
from pbench.server.api.resources.datasets_compare import DatasetsCompare
from pbench.server.cache_manager import CacheManager
from pbench.server.database.models.datasets import Metadata


class DatasetsVisualize(ApiBase):
Expand Down Expand Up @@ -59,8 +59,7 @@ def _get(
"""

dataset = params.uri["dataset"]

benchmark = Metadata.getvalue(dataset, Metadata.SERVER_BENCHMARK)
benchmark = DatasetsCompare.get_benchmark_name(dataset)
benchmark_type = BenchmarkName.__members__.get(benchmark.upper())
if not benchmark_type:
raise APIAbort(
Expand Down
36 changes: 36 additions & 0 deletions lib/pbench/test/unit/server/test_datasets_visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,39 @@ def mock_get_metadata(_d: Dataset, _k: str, _u: Optional[User] = None) -> JSON:
response = query_get_as("uperf_1", "test", HTTPStatus.BAD_REQUEST)
assert response.json["message"] == "Unsupported Benchmark: hammerDB"
assert extract_not_called

@pytest.mark.parametrize(
"script,result", (("hammerDB", "hammerDB"), (None, "unknown"))
)
def test_benchmark_fallback(self, query_get_as, monkeypatch, script, result):
"""Test benchmark metadata fallback.
If server.benchmark isn't defined, try dataset.metalog.pbench.script,
then fall back to "unknown".
NOTE: This is deliberately not merged with test_unsupported_benchmark,
which it closely resembles, because the benchmark identification
tested here is intended to be a transitional workaround.
"""

extract_not_called = True

def mock_extract_data(*args, **kwargs) -> JSON:
nonlocal extract_not_called
extract_not_called = False

@staticmethod
def mock_get_metadata(_d: Dataset, k: str, _u: Optional[User] = None) -> JSON:
if k == Metadata.SERVER_BENCHMARK:
return None
elif k == "dataset.metalog.pbench.script":
return script
else:
return "Not what you expected?"

monkeypatch.setattr(CacheManager, "get_inventory", self.mock_get_inventory)
monkeypatch.setattr(Metadata, "getvalue", mock_get_metadata)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)
response = query_get_as("uperf_1", "test", HTTPStatus.BAD_REQUEST)
assert response.json["message"] == f"Unsupported Benchmark: {result}"
assert extract_not_called

0 comments on commit fac8d5f

Please sign in to comment.