Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Nov 15, 2024
1 parent 4a6c559 commit a0ff697
Show file tree
Hide file tree
Showing 13 changed files with 42 additions and 116 deletions.
8 changes: 2 additions & 6 deletions concatenator/attribute_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,9 @@
import concatenator

# Values needed for history_json attribute
HISTORY_JSON_SCHEMA = (
"https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
)
HISTORY_JSON_SCHEMA = "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-v0.1.0.json"
PROGRAM = "stitchee"
PROGRAM_REF = (
"https://cmr.earthdata.nasa.gov:443/search/concepts/S2940253910-LARC_CLOUD"
)
PROGRAM_REF = "https://cmr.earthdata.nasa.gov:443/search/concepts/S2940253910-LARC_CLOUD"
VERSION = importlib_metadata.distribution("stitchee").version


Expand Down
32 changes: 9 additions & 23 deletions concatenator/dataset_and_group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def walk(
# Flatten the paths of variables referenced in the 'coordinates' attribute
flatten_coordinate_attribute_paths(new_dataset, var, var_group_name)

if (
len(var.dimensions) == 1
) and _string_dimension_name_pattern.fullmatch(var.dimensions[0]):
if (len(var.dimensions) == 1) and _string_dimension_name_pattern.fullmatch(
var.dimensions[0]
):
list_of_character_string_vars.append(var_group_name)

# Delete variables
Expand Down Expand Up @@ -145,9 +145,7 @@ def flatten_grouped_dataset(
if ensure_all_dims_are_coords and (
new_dim_name not in list(nc_dataset.variables.keys())
):
nc_dataset.createVariable(
dim.name, datatype=np.int32, dimensions=(dim.name,)
)
nc_dataset.createVariable(dim.name, datatype=np.int32, dimensions=(dim.name,))
temporary_coordinate_variables.append(dim.name)

list_of_character_string_vars: list[str] = []
Expand Down Expand Up @@ -187,9 +185,7 @@ def regroup_flattened_dataset(
group_lst = []
# need logic if there is data in the top level not in a group
for var_name, _ in dataset.variables.items():
group_lst.append(
"/".join(str(var_name).split(concatenator.group_delim)[:-1])
)
group_lst.append("/".join(str(var_name).split(concatenator.group_delim)[:-1]))
group_lst = ["/" if group == "" else group for group in group_lst]
groups = set(group_lst)
for group in groups:
Expand Down Expand Up @@ -230,13 +226,9 @@ def regroup_flattened_dataset(
new_var_dims = tuple(
str(d).rsplit(concatenator.group_delim, 1)[-1] for d in var.dims
)
dim_sizes = [
_get_dimension_size(base_dataset, dim) for dim in new_var_dims
]
dim_sizes = [_get_dimension_size(base_dataset, dim) for dim in new_var_dims]

chunk_sizes = _calculate_chunks(
dim_sizes, default_low_dim_chunksize=4000
)
chunk_sizes = _calculate_chunks(dim_sizes, default_low_dim_chunksize=4000)

# Do the variable creation
if var.dtype == "O":
Expand All @@ -245,11 +237,7 @@ def regroup_flattened_dataset(
vartype = str(var.dtype)

compression: str | None = "zlib"
if (
vartype.startswith("<U")
and len(var.shape) == 1
and var.shape[0] < 10
):
if vartype.startswith("<U") and len(var.shape) == 1 and var.shape[0] < 10:
compression = None

var_group.createVariable(
Expand Down Expand Up @@ -281,9 +269,7 @@ def regroup_flattened_dataset(

def _get_nested_group(dataset: nc.Dataset, group_path: str) -> nc.Group:
nested_group = dataset
for group in group_path.strip(concatenator.group_delim).split(
concatenator.group_delim
)[:-1]:
for group in group_path.strip(concatenator.group_delim).split(concatenator.group_delim)[:-1]:
nested_group = nested_group.groups[group]
return nested_group

Expand Down
26 changes: 8 additions & 18 deletions concatenator/dimension_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,8 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:

for var_name, var in nc_dataset.variables.items():
dim_list = list(var.dimensions)
if len(set(dim_list)) != len(
dim_list
): # get true if var.dimensions has a duplicate
dup_vars[var_name] = (
var # populate dictionary with variables with vars with dup dims
)
if len(set(dim_list)) != len(dim_list): # get true if var.dimensions has a duplicate
dup_vars[var_name] = var # populate dictionary with variables with vars with dup dims

for dup_var_name, dup_var in dup_vars.items():
dim_list = list(
Expand All @@ -39,9 +35,7 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:

# Dimension(s) that are duplicated are retrieved.
# Note: this is not yet tested for more than one duplicated dimension.
dim_dup = [
item for item, count in collections.Counter(dim_list).items() if count > 1
][0]
dim_dup = [item for item, count in collections.Counter(dim_list).items() if count > 1][0]
dim_dup_length = dup_var.shape[
dup_var.dimensions.index(dim_dup)
] # length of the duplicated dimension
Expand Down Expand Up @@ -87,12 +81,10 @@ def remove_duplicate_dims(nc_dataset: nc.Dataset) -> nc.Dataset:
(dim_dup_new,),
fill_value=fill_value,
)
dim_var_attr_contents = (
get_attributes_minus_fillvalue_and_renamed_coords(
original_var_name=dim_dup,
new_var_name=dim_dup_new,
original_dataset=nc_dataset,
)
dim_var_attr_contents = get_attributes_minus_fillvalue_and_renamed_coords(
original_var_name=dim_dup,
new_var_name=dim_dup_new,
original_dataset=nc_dataset,
)
for attr_name, contents in dim_var_attr_contents.items():
new_dup_var[dim_dup_new].setncattr(attr_name, contents)
Expand Down Expand Up @@ -124,9 +116,7 @@ def get_attributes_minus_fillvalue_and_renamed_coords(

for ncattr in original_dataset.variables[original_var_name].ncattrs():
if ncattr != "_FillValue":
contents: str = original_dataset.variables[original_var_name].getncattr(
ncattr
)
contents: str = original_dataset.variables[original_var_name].getncattr(ncattr)
if ncattr == "coordinates":
contents.replace(original_var_name, new_var_name)
attrs_contents[ncattr] = contents
Expand Down
8 changes: 2 additions & 6 deletions concatenator/file_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def make_temp_dir_with_input_file_copies(
) -> tuple[list[str], str]:
"""Creates temporary directory and copies input files."""
new_data_dir = Path(
add_label_to_path(
str(output_path.parent / "temp_copy"), label=str(uuid.uuid4())
)
add_label_to_path(str(output_path.parent / "temp_copy"), label=str(uuid.uuid4()))
).resolve()
os.makedirs(new_data_dir, exist_ok=True)
logger.info("Created temporary directory: %s", str(new_data_dir))
Expand Down Expand Up @@ -54,9 +52,7 @@ def validate_output_path(filepath: str, overwrite: bool = False) -> str:
f"Run again with `overwrite` option to overwrite existing file."
)
if path.is_dir(): # the specified path is an existing directory
raise TypeError(
"Output path cannot be a directory. Please specify a new filepath."
)
raise TypeError("Output path cannot be a directory. Please specify a new filepath.")
return str(path)


Expand Down
4 changes: 1 addition & 3 deletions concatenator/harmony/download_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,7 @@ def _download_worker(
break

path = Path(
download(
url, destination_dir, logger=logger, access_token=access_token, cfg=cfg
)
download(url, destination_dir, logger=logger, access_token=access_token, cfg=cfg)
)
filename_match = re.match(r".*\/(.+\..+)", urlparse(url).path)

Expand Down
8 changes: 2 additions & 6 deletions concatenator/harmony/service_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ def invoke(self):
# Message-only support is being depreciated in Harmony, so we should expect to
# only see requests with catalogs when invoked with a newer Harmony instance
# https://github.com/nasa/harmony-service-lib-py/blob/21bcfbda17caf626fb14d2ac4f8673be9726b549/harmony/adapter.py#L71
raise RuntimeError(
"Invoking Batchee without a STAC catalog is not supported"
)
raise RuntimeError("Invoking Batchee without a STAC catalog is not supported")

return self.message, self.process_catalog(self.catalog)

Expand Down Expand Up @@ -106,9 +104,7 @@ def process_catalog(self, catalog: pystac.Catalog) -> pystac.Catalog:
history_json: list[dict] = []
for file_count, file in enumerate(input_files):
file_size = sizeof_fmt(file.stat().st_size)
self.logger.info(
f"File {file_count} is size <{file_size}>. Path={file}"
)
self.logger.info(f"File {file_count} is size <{file_size}>. Path={file}")

with nc.Dataset(file, "r") as dataset:
history_json.extend(retrieve_history(dataset))
Expand Down
12 changes: 3 additions & 9 deletions concatenator/stitchee.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,7 @@ def stitchee(
# Exit cleanly with the file copied if one workable netCDF file found.
if num_input_files == 1:
shutil.copyfile(input_files[0], output_file)
logger.info(
"One workable netCDF file. Copied to output path without modification."
)
logger.info("One workable netCDF file. Copied to output path without modification.")
return output_file

if concat_dim and (concat_method == "xarray-combine"):
Expand All @@ -135,9 +133,7 @@ def stitchee(
for i, filepath in enumerate(input_files):
# The group structure is flattened.
start_time = time.time()
logger.info(
" ..file %03d/%03d <%s>..", i + 1, num_input_files, filepath
)
logger.info(" ..file %03d/%03d <%s>..", i + 1, num_input_files, filepath)

ncfile = context_stack.enter_context(nc.Dataset(filepath, "r+"))

Expand Down Expand Up @@ -178,9 +174,7 @@ def stitchee(
# Reorder the xarray datasets according to the concat dim values.
xrdataset_list = [
dataset
for _, dataset in sorted(
zip(concat_dim_order, xrdataset_list), key=lambda x: x[0]
)
for _, dataset in sorted(zip(concat_dim_order, xrdataset_list), key=lambda x: x[0])
]

# Flattened files are concatenated together (Using XARRAY).
Expand Down
16 changes: 4 additions & 12 deletions docs/tutorial_examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -449,19 +449,11 @@
"\n",
" ax.set_extent(extent, crs=proj)\n",
"\n",
" ax.add_feature(\n",
" cfeature.COASTLINE.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\"\n",
" )\n",
" ax.add_feature(\n",
" cfeature.OCEAN, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\"\n",
" )\n",
" ax.add_feature(\n",
" cfeature.LAKES, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\"\n",
" )\n",
" ax.add_feature(cfeature.COASTLINE.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\")\n",
" ax.add_feature(cfeature.OCEAN, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\")\n",
" ax.add_feature(cfeature.LAKES, linewidth=0.1, edgecolor=\"dimgray\", facecolor=\"lightsteelblue\")\n",
" ax.add_feature(cfeature.BORDERS, linewidth=0.1, edgecolor=\"dimgray\")\n",
" ax.add_feature(\n",
" cfeature.STATES.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\"\n",
" )\n",
" ax.add_feature(cfeature.STATES.with_scale(\"10m\"), linewidth=0.1, edgecolor=\"dimgray\")\n",
"\n",
" (H, xbins, ybins) = h2d\n",
" H = H.T\n",
Expand Down
12 changes: 3 additions & 9 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,7 @@ def toy_empty_dataset(temp_toy_data_dir):
return filepath


def add_to_ds_3dims_3vars_4coords_1group_with_step_values(
open_ds: nc.Dataset, step_values: list
):
def add_to_ds_3dims_3vars_4coords_1group_with_step_values(open_ds: nc.Dataset, step_values: list):
"""Creates groups, dimensions, variables; and uses chosen step values in an open dataset"""
grp1 = open_ds.createGroup("Group1")

Expand Down Expand Up @@ -165,9 +163,7 @@ def ds_3dims_3vars_4coords_1group_part1(temp_toy_data_dir) -> Path:
filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part1.nc"

f = nc.Dataset(filename=filepath, mode="w")
f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(
f, step_values=[9, 10, 11]
)
f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[9, 10, 11])
f.close()

return filepath
Expand All @@ -178,9 +174,7 @@ def ds_3dims_3vars_4coords_1group_part2(temp_toy_data_dir):
filepath = temp_toy_data_dir / "test_3dims_3vars_4coords_1group_part2.nc"

f = nc.Dataset(filename=filepath, mode="w")
f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(
f, step_values=[12, 13, 14]
)
f = add_to_ds_3dims_3vars_4coords_1group_with_step_values(f, step_values=[12, 13, 14])
f.close()

return filepath
Expand Down
4 changes: 1 addition & 3 deletions tests/integration/test_concat_with_subsetting_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ def test_concat_with_subsetting_first(temp_output_dir):
output_path = stitchee(
file_names,
output_file=str(
(
temp_output_dir / "output_harmony_subsetting_to_stitchee_test.nc"
).resolve()
(temp_output_dir / "output_harmony_subsetting_to_stitchee_test.nc").resolve()
),
concat_dim="mirror_step",
concat_method="xarray-concat",
Expand Down
8 changes: 2 additions & 6 deletions tests/unit/test_dataset_and_group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@
def test_dataset_with_single_empty_input_file():
"""Ensure that a dataset with a single empty input file is propagating empty granule to the output"""
files_to_concat = [
data_for_tests_dir
/ "unit-test-data"
/ "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4"
data_for_tests_dir / "unit-test-data" / "TEMPO_NO2_L2_V03_20240328T154353Z_S008G01.nc4"
]
workable_files, number_of_workable_files = validate_workable_files(
files_to_concat, None
)
workable_files, number_of_workable_files = validate_workable_files(files_to_concat, None)
assert number_of_workable_files == 1


Expand Down
4 changes: 1 addition & 3 deletions tests/unit/test_harmony_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ def test_service_invoke(self, temp_output_dir):
out_catalog_path = temp_output_dir.joinpath("catalog.json")
out_catalog = json.loads(out_catalog_path.read_text())

item_meta = next(
item for item in out_catalog["links"] if item["rel"] == "item"
)
item_meta = next(item for item in out_catalog["links"] if item["rel"] == "item")
item_href = item_meta["href"]
item_path = temp_output_dir.joinpath(item_href).resolve()

Expand Down
16 changes: 4 additions & 12 deletions tests/unit/test_run_stitchee.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,9 @@ class TestBatching:
def test_run_stitchee_cli_with_three_filepaths(self, temp_output_dir):
test_args = [
concatenator.run_stitchee.__file__,
path_str(
granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"
),
path_str(
granules_path, "TEMPO_NO2_L2_V03_20240601T211614Z_S012G02_subsetted.nc4"
),
path_str(
granules_path, "TEMPO_NO2_L2_V03_20240601T212254Z_S012G03_subsetted.nc4"
),
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"),
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T211614Z_S012G02_subsetted.nc4"),
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T212254Z_S012G03_subsetted.nc4"),
"--copy_input_files",
"--verbose",
"-o",
Expand Down Expand Up @@ -76,9 +70,7 @@ def test_run_stitchee_cli_with_one_directorypath(self, temp_output_dir):
def test_run_stitchee_cli_with_one_netCDFpath(self, temp_output_dir):
test_args = [
concatenator.run_stitchee.__file__,
path_str(
granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"
),
path_str(granules_path, "TEMPO_NO2_L2_V03_20240601T210934Z_S012G01_subsetted.nc4"),
"--copy_input_files",
"--verbose",
"-o",
Expand Down

0 comments on commit a0ff697

Please sign in to comment.