[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
nasa · Nov 15, 2024 · a0ff697 · a0ff697
1 parent 4a6c559
commit a0ff697
Show file tree

Hide file tree

Showing 13 changed files with 42 additions and 116 deletions.
diff --git a/concatenator/attribute_handling.py b/concatenator/attribute_handling.py
@@ -12,13 +12,9 @@
 import concatenator
 
 # Values needed for history_json attribute
-HISTORY_JSON_SCHEMA = (
-    "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
-)
+HISTORY_JSON_SCHEMA = "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
 PROGRAM = "stitchee"
-PROGRAM_REF = (
-    "https://cmr.earthdata.nasa.gov:443/search/concepts/S2940253910-LARC_CLOUD"
-)
+PROGRAM_REF = "https://cmr.earthdata.nasa.gov:443/search/concepts/S2940253910-LARC_CLOUD"
 VERSION = importlib_metadata.distribution("stitchee").version
 
 

diff --git a/concatenator/dataset_and_group_handling.py b/concatenator/dataset_and_group_handling.py
@@ -52,9 +52,9 @@ def walk(
                 # Flatten the paths of variables referenced in the 'coordinates' attribute
                 flatten_coordinate_attribute_paths(new_dataset, var, var_group_name)
 
-                if (
-                    len(var.dimensions) == 1
-                ) and _string_dimension_name_pattern.fullmatch(var.dimensions[0]):
+                if (len(var.dimensions) == 1) and _string_dimension_name_pattern.fullmatch(
+                    var.dimensions[0]
+                ):
                     list_of_character_string_vars.append(var_group_name)
 
             # Delete variables
@@ -145,9 +145,7 @@ def flatten_grouped_dataset(
             if ensure_all_dims_are_coords and (
                 new_dim_name not in list(nc_dataset.variables.keys())
             ):
-                nc_dataset.createVariable(
-                    dim.name, datatype=np.int32, dimensions=(dim.name,)
-                )
+                nc_dataset.createVariable(dim.name, datatype=np.int32, dimensions=(dim.name,))
                 temporary_coordinate_variables.append(dim.name)
 
     list_of_character_string_vars: list[str] = []
@@ -187,9 +185,7 @@ def regroup_flattened_dataset(
         group_lst = []
         # need logic if there is data in the top level not in a group
         for var_name, _ in dataset.variables.items():
-            group_lst.append(
-                "/".join(str(var_name).split(concatenator.group_delim)[:-1])
-            )
+            group_lst.append("/".join(str(var_name).split(concatenator.group_delim)[:-1]))
         group_lst = ["/" if group == "" else group for group in group_lst]
         groups = set(group_lst)
         for group in groups:
@@ -230,13 +226,9 @@ def regroup_flattened_dataset(
                     new_var_dims = tuple(
                         str(d).rsplit(concatenator.group_delim, 1)[-1] for d in var.dims
                     )
-                    dim_sizes = [
-                        _get_dimension_size(base_dataset, dim) for dim in new_var_dims
-                    ]
+                    dim_sizes = [_get_dimension_size(base_dataset, dim) for dim in new_var_dims]
 
-                    chunk_sizes = _calculate_chunks(
-                        dim_sizes, default_low_dim_chunksize=4000
-                    )
+                    chunk_sizes = _calculate_chunks(dim_sizes, default_low_dim_chunksize=4000)
 
                 # Do the variable creation
                 if var.dtype == "O":
@@ -245,11 +237,7 @@ def regroup_flattened_dataset(
                     vartype = str(var.dtype)
 
                 compression: str | None = "zlib"
-                if (
-                    vartype.startswith("<U")
-                    and len(var.shape) == 1
-                    and var.shape[0] < 10
-                ):
+                if vartype.startswith("<U") and len(var.shape) == 1 and var.shape[0] < 10:
                     compression = None
 
                 var_group.createVariable(
@@ -281,9 +269,7 @@ def regroup_flattened_dataset(
 
 def _get_nested_group(dataset: nc.Dataset, group_path: str) -> nc.Group:
     nested_group = dataset
-    for group in group_path.strip(concatenator.group_delim).split(
-        concatenator.group_delim
-    )[:-1]:
+    for group in group_path.strip(concatenator.group_delim).split(concatenator.group_delim)[:-1]:
         nested_group = nested_group.groups[group]
     return nested_group
 

diff --git a/concatenator/dimension_cleanup.py b/concatenator/dimension_cleanup.py
@@ -25,12 +25,8 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
 
     for var_name, var in nc_dataset.variables.items():
         dim_list = list(var.dimensions)
-        if len(set(dim_list)) != len(
-            dim_list
-        ):  # get true if var.dimensions has a duplicate
-            dup_vars[var_name] = (
-                var  # populate dictionary with variables with vars with dup dims
-            )
+        if len(set(dim_list)) != len(dim_list):  # get true if var.dimensions has a duplicate
+            dup_vars[var_name] = var  # populate dictionary with variables with vars with dup dims
 
     for dup_var_name, dup_var in dup_vars.items():
         dim_list = list(
@@ -39,9 +35,7 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
 
         # Dimension(s) that are duplicated are retrieved.
         #   Note: this is not yet tested for more than one duplicated dimension.
-        dim_dup = [
-            item for item, count in collections.Counter(dim_list).items() if count > 1
-        ][0]
+        dim_dup = [item for item, count in collections.Counter(dim_list).items() if count > 1][0]
         dim_dup_length = dup_var.shape[
             dup_var.dimensions.index(dim_dup)
         ]  # length of the duplicated dimension
@@ -87,12 +81,10 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
                     (dim_dup_new,),
                     fill_value=fill_value,
                 )
-                dim_var_attr_contents = (
-                    get_attributes_minus_fillvalue_and_renamed_coords(
-                        original_var_name=dim_dup,
-                        new_var_name=dim_dup_new,
-                        original_dataset=nc_dataset,
-                    )
+                dim_var_attr_contents = get_attributes_minus_fillvalue_and_renamed_coords(
+                    original_var_name=dim_dup,
+                    new_var_name=dim_dup_new,
+                    original_dataset=nc_dataset,
                 )
                 for attr_name, contents in dim_var_attr_contents.items():
                     new_dup_var[dim_dup_new].setncattr(attr_name, contents)
@@ -124,9 +116,7 @@ def get_attributes_minus_fillvalue_and_renamed_coords(
 
     for ncattr in original_dataset.variables[original_var_name].ncattrs():
         if ncattr != "_FillValue":
-            contents: str = original_dataset.variables[original_var_name].getncattr(
-                ncattr
-            )
+            contents: str = original_dataset.variables[original_var_name].getncattr(ncattr)
             if ncattr == "coordinates":
                 contents.replace(original_var_name, new_var_name)
             attrs_contents[ncattr] = contents

diff --git a/concatenator/file_ops.py b/concatenator/file_ops.py
@@ -22,9 +22,7 @@ def make_temp_dir_with_input_file_copies(
 ) -> tuple[list[str], str]:
     """Creates temporary directory and copies input files."""
     new_data_dir = Path(
-        add_label_to_path(
-            str(output_path.parent / "temp_copy"), label=str(uuid.uuid4())
-        )
+        add_label_to_path(str(output_path.parent / "temp_copy"), label=str(uuid.uuid4()))
     ).resolve()
     os.makedirs(new_data_dir, exist_ok=True)
     logger.info("Created temporary directory: %s", str(new_data_dir))
@@ -54,9 +52,7 @@ def validate_output_path(filepath: str, overwrite: bool = False) -> str:
                 f"Run again with `overwrite` option to overwrite existing file."
             )
     if path.is_dir():  # the specified path is an existing directory
-        raise TypeError(
-            "Output path cannot be a directory. Please specify a new filepath."
-        )
+        raise TypeError("Output path cannot be a directory. Please specify a new filepath.")
     return str(path)
 
 

diff --git a/concatenator/harmony/download_worker.py b/concatenator/harmony/download_worker.py
@@ -112,9 +112,7 @@ def _download_worker(
             break
 
         path = Path(
-            download(
-                url, destination_dir, logger=logger, access_token=access_token, cfg=cfg
-            )
+            download(url, destination_dir, logger=logger, access_token=access_token, cfg=cfg)
         )
         filename_match = re.match(r".*\/(.+\..+)", urlparse(url).path)
 

diff --git a/concatenator/harmony/service_adapter.py b/concatenator/harmony/service_adapter.py
@@ -53,9 +53,7 @@ def invoke(self):
             # Message-only support is being depreciated in Harmony, so we should expect to
             # only see requests with catalogs when invoked with a newer Harmony instance
             # https://github.com/nasa/harmony-service-lib-py/blob/21bcfbda17caf626fb14d2ac4f8673be9726b549/harmony/adapter.py#L71
-            raise RuntimeError(
-                "Invoking Batchee without a STAC catalog is not supported"
-            )
+            raise RuntimeError("Invoking Batchee without a STAC catalog is not supported")
 
         return self.message, self.process_catalog(self.catalog)
 
@@ -106,9 +104,7 @@ def process_catalog(self, catalog: pystac.Catalog) -> pystac.Catalog:
                 history_json: list[dict] = []
                 for file_count, file in enumerate(input_files):
                     file_size = sizeof_fmt(file.stat().st_size)
-                    self.logger.info(
-                        f"File {file_count} is size <{file_size}>. Path={file}"
-                    )
+                    self.logger.info(f"File {file_count} is size <{file_size}>. Path={file}")
 
                     with nc.Dataset(file, "r") as dataset:
                         history_json.extend(retrieve_history(dataset))

diff --git a/concatenator/stitchee.py b/concatenator/stitchee.py
@@ -107,9 +107,7 @@ def stitchee(
     # Exit cleanly with the file copied if one workable netCDF file found.
     if num_input_files == 1:
         shutil.copyfile(input_files[0], output_file)
-        logger.info(
-            "One workable netCDF file. Copied to output path without modification."
-        )
+        logger.info("One workable netCDF file. Copied to output path without modification.")
         return output_file
 
     if concat_dim and (concat_method == "xarray-combine"):
@@ -135,9 +133,7 @@ def stitchee(
             for i, filepath in enumerate(input_files):
                 # The group structure is flattened.
                 start_time = time.time()
-                logger.info(
-                    "    ..file %03d/%03d <%s>..", i + 1, num_input_files, filepath
-                )
+                logger.info("    ..file %03d/%03d <%s>..", i + 1, num_input_files, filepath)
 
                 ncfile = context_stack.enter_context(nc.Dataset(filepath, "r+"))
 
@@ -178,9 +174,7 @@ def stitchee(
             # Reorder the xarray datasets according to the concat dim values.
             xrdataset_list = [
                 dataset
-                for _, dataset in sorted(
-                    zip(concat_dim_order, xrdataset_list), key=lambda x: x[0]
-                )
+                for _, dataset in sorted(zip(concat_dim_order, xrdataset_list), key=lambda x: x[0])
             ]
 
             # Flattened files are concatenated together (Using XARRAY).

diff --git a/docs/tutorial_examples.ipynb b/docs/tutorial_examples.ipynb
@@ -449,19 +449,11 @@
     "\n",
     "    ax.set_extent(extent, crs=proj)\n",
     "\n",
-    "    ax.add_feature(\n",
-    "        cfeature.COASTLINE.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\"\n",
-    "    )\n",
-    "    ax.add_feature(\n",
-    "        cfeature.OCEAN, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\"\n",
-    "    )\n",
-    "    ax.add_feature(\n",
-    "        cfeature.LAKES, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\"\n",
-    "    )\n",
+    "    ax.add_feature(cfeature.COASTLINE.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\")\n",
+    "    ax.add_feature(cfeature.OCEAN, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\")\n",
+    "    ax.add_feature(cfeature.LAKES, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\")\n",
     "    ax.add_feature(cfeature.BORDERS, linewidth=0.1, edgecolor=\"dimgray\")\n",
-    "    ax.add_feature(\n",
-    "        cfeature.STATES.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\"\n",
-    "    )\n",
+    "    ax.add_feature(cfeature.STATES.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\")\n",
     "\n",
     "    (H, xbins, ybins) = h2d\n",
     "    H = H.T\n",

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -96,9 +96,7 @@ def toy_empty_dataset(temp_toy_data_dir):
     return filepath
 
 
-def add_to_ds_3dims_3vars_4coords_1group_with_step_values(
-    open_ds: nc.Dataset, step_values: list
-):
+def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list):
     """Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
     grp1 = open_ds.createGroup("Group1")
 
@@ -165,9 +163,7 @@ def ds_3dims_3vars_4coords_1group_part1(temp_toy_data_dir) -> Path:
     filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part1.nc"
 
     f = nc.Dataset(filename=filepath, mode="w")
-    f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(
-        f, step_values=[9, 10, 11]
-    )
+    f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[9, 10, 11])
     f.close()
 
     return filepath
@@ -178,9 +174,7 @@ def ds_3dims_3vars_4coords_1group_part2(temp_toy_data_dir):
     filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part2.nc"
 
     f = nc.Dataset(filename=filepath, mode="w")
-    f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(
-        f, step_values=[12, 13, 14]
-    )
+    f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[12, 13, 14])
     f.close()
 
     return filepath

diff --git a/tests/integration/test_concat_with_subsetting_first.py b/tests/integration/test_concat_with_subsetting_first.py
@@ -36,9 +36,7 @@ def test_concat_with_subsetting_first(temp_output_dir):
     output_path = stitchee(
         file_names,
         output_file=str(
-            (
-                temp_output_dir / "output_harmony_subsetting_to_stitchee_test.nc"
-            ).resolve()
+            (temp_output_dir / "output_harmony_subsetting_to_stitchee_test.nc").resolve()
         ),
         concat_dim="mirror_step",
         concat_method="xarray-concat",

diff --git a/tests/unit/test_dataset_and_group_handling.py b/tests/unit/test_dataset_and_group_handling.py
@@ -15,13 +15,9 @@
 def test_dataset_with_single_empty_input_file():
     """Ensure that a dataset with a single empty input file is propagating empty granule to the output"""
     files_to_concat = [
-        data_for_tests_dir
-        / "unit-test-data"
-        / "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4"
+        data_for_tests_dir / "unit-test-data" / "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4"
     ]
-    workable_files, number_of_workable_files = validate_workable_files(
-        files_to_concat, None
-    )
+    workable_files, number_of_workable_files = validate_workable_files(files_to_concat, None)
     assert number_of_workable_files == 1
 
 

diff --git a/tests/unit/test_harmony_adapter.py b/tests/unit/test_harmony_adapter.py
@@ -63,9 +63,7 @@ def test_service_invoke(self, temp_output_dir):
             out_catalog_path = temp_output_dir.joinpath("catalog.json")
             out_catalog = json.loads(out_catalog_path.read_text())
 
-            item_meta = next(
-                item for item in out_catalog["links"] if item["rel"] == "item"
-            )
+            item_meta = next(item for item in out_catalog["links"] if item["rel"] == "item")
             item_href = item_meta["href"]
             item_path = temp_output_dir.joinpath(item_href).resolve()
 

diff --git a/tests/unit/test_run_stitchee.py b/tests/unit/test_run_stitchee.py
@@ -34,15 +34,9 @@ class TestBatching:
     def test_run_stitchee_cli_with_three_filepaths(self, temp_output_dir):
         test_args = [
             concatenator.run_stitchee.__file__,
-            path_str(
-                granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"
-            ),
-            path_str(
-                granules_path, "TEMPO_NO2_L2_V03_20240601T211614Z_S012G02_subsetted.nc4"
-            ),
-            path_str(
-                granules_path, "TEMPO_NO2_L2_V03_20240601T212254Z_S012G03_subsetted.nc4"
-            ),
+            path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"),
+            path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T211614Z_S012G02_subsetted.nc4"),
+            path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T212254Z_S012G03_subsetted.nc4"),
             "--copy_input_files",
             "--verbose",
             "-o",
@@ -76,9 +70,7 @@ def test_run_stitchee_cli_with_one_directorypath(self, temp_output_dir):
     def test_run_stitchee_cli_with_one_netCDFpath(self, temp_output_dir):
         test_args = [
             concatenator.run_stitchee.__file__,
-            path_str(
-                granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"
-            ),
+            path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"),
             "--copy_input_files",
             "--verbose",
             "-o",