From efc6632a3641350a5f308c36b565e5287c84dc91 Mon Sep 17 00:00:00 2001 From: Drake Eidukas Date: Tue, 29 Oct 2024 14:53:35 -0700 Subject: [PATCH 1/4] Add method to reset start & end boundaries on a run --- nominal/core/run.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/nominal/core/run.py b/nominal/core/run.py index 38d56a17..b8d27f89 100644 --- a/nominal/core/run.py +++ b/nominal/core/run.py @@ -236,6 +236,24 @@ def add_connection(self, ref_name: str, connection: Connection | str) -> None: } self._clients.run.add_data_sources_to_run(self._clients.auth_header, data_sources, self.rid) + def reset_bounds(self) -> Self: + """Update the start and end timestamps on the run by inspecting the bounds on + the datasets that compose this run. + + This is primarily useful when the set of datasets composing the run change, or + the individual datasets get modified to have more or less data. + """ + datasets = self.list_datasets() + + dataset_starts = [dataset.bounds.start for _, dataset in datasets if dataset.bounds] + dataset_ends = [dataset.bounds.end for _, dataset in datasets if dataset.bounds] + + # If there are not yet any datasets with bounds in the run, there is nothing to reset + if not dataset_starts or not dataset_ends: + return self + + return self.update(start=min(dataset_starts), end=max(dataset_ends)) + @classmethod def _from_conjure(cls, clients: _Clients, run: scout_run_api.Run) -> Self: return cls( From 35830ab21afd11921907843855d8933426194098 Mon Sep 17 00:00:00 2001 From: Drake Eidukas Date: Fri, 15 Nov 2024 13:01:08 -0800 Subject: [PATCH 2/4] Make resetting run boundaries more graceful / no-op in the case of missing dataset bounds --- nominal/core/run.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/nominal/core/run.py b/nominal/core/run.py index b8d27f89..458adacb 100644 --- a/nominal/core/run.py +++ b/nominal/core/run.py @@ -245,14 +245,17 @@ def reset_bounds(self) -> Self: """ datasets = self.list_datasets() + # Get the start and end bounds of all composing datasets dataset_starts = [dataset.bounds.start for _, dataset in datasets if dataset.bounds] dataset_ends = [dataset.bounds.end for _, dataset in datasets if dataset.bounds] - # If there are not yet any datasets with bounds in the run, there is nothing to reset - if not dataset_starts or not dataset_ends: - return self - - return self.update(start=min(dataset_starts), end=max(dataset_ends)) + # If there are no start or end bounds across all input datasets, don't update the + # respective bound of the Run by using None + new_start = min(dataset_starts) if dataset_starts else None + new_end = max(dataset_ends) if dataset_ends else None + + # Update the run and return with updated metadata + return self.update(start=new_start, end=new_end) @classmethod def _from_conjure(cls, clients: _Clients, run: scout_run_api.Run) -> Self: From d6e301bffbb6b8b6bf95e026f5a18bfcfabe1fc0 Mon Sep 17 00:00:00 2001 From: Drake Eidukas Date: Fri, 15 Nov 2024 13:26:26 -0800 Subject: [PATCH 3/4] Add warning --- nominal/core/run.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nominal/core/run.py b/nominal/core/run.py index 458adacb..e1a48c92 100644 --- a/nominal/core/run.py +++ b/nominal/core/run.py @@ -242,6 +242,10 @@ def reset_bounds(self) -> Self: This is primarily useful when the set of datasets composing the run change, or the individual datasets get modified to have more or less data. + + NOTE: this only considers datasets when resetting run boundaries. Setting boundaries + on runs containing streaming connections must be done manually using the UI or + the update() function """ datasets = self.list_datasets() From 4301d8b419434b8027eb18b57017c761d733de2c Mon Sep 17 00:00:00 2001 From: Drake Eidukas Date: Fri, 15 Nov 2024 14:37:19 -0800 Subject: [PATCH 4/4] Formatting --- nominal/core/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nominal/core/run.py b/nominal/core/run.py index e1a48c92..2379bb6d 100644 --- a/nominal/core/run.py +++ b/nominal/core/run.py @@ -242,7 +242,7 @@ def reset_bounds(self) -> Self: This is primarily useful when the set of datasets composing the run change, or the individual datasets get modified to have more or less data. - + NOTE: this only considers datasets when resetting run boundaries. Setting boundaries on runs containing streaming connections must be done manually using the UI or the update() function @@ -257,7 +257,7 @@ def reset_bounds(self) -> Self: # respective bound of the Run by using None new_start = min(dataset_starts) if dataset_starts else None new_end = max(dataset_ends) if dataset_ends else None - + # Update the run and return with updated metadata return self.update(start=new_start, end=new_end)