Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow skipping of cog creation #39

Merged
merged 2 commits into from
Nov 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ number as needed.
### Added

- `read_href_modifier` for ocean-heat-content ([#38](https://github.com/stactools-packages/noaa-cdr/pull/38))
- `cog_hrefs` argument for Ocean Heat Content's cogify, to allow skipping of COG
creation ([#39](https://github.com/stactools-packages/noaa-cdr/pull/39))

### Deprecated

Expand Down
14 changes: 4 additions & 10 deletions src/stactools/noaa_cdr/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import rasterio.shutil
import xarray
from numpy.typing import NDArray
from pystac import Asset, MediaType
from pystac import Asset
from rasterio import MemoryFile

from . import dataset
Expand All @@ -32,27 +32,21 @@ def cogify(
if profile.needs_longitude_remap:
values = numpy.roll(values, int(profile.width / 2), 1)
path = os.path.join(directory, f"{file_name}-{variable}.tif")
asset = write(
write(
values,
path,
profile,
)
asset = profile.update_cog_asset(variable, asset)
assets[variable] = asset
assets[variable] = profile.cog_asset(path)
return assets


def write(
values: NDArray[Any],
path: str,
profile: BandProfile,
) -> Asset:
) -> None:
with MemoryFile() as memory_file:
with memory_file.open(**profile.gtiff()) as open_memory_file:
open_memory_file.write(values, 1)
rasterio.shutil.copy(open_memory_file, path, **profile.cog())
asset = Asset(
title=profile.title, href=path, media_type=MediaType.COG, roles=["data"]
)
asset.extra_fields["raster:bands"] = [profile.raster_band().to_dict()]
return asset
33 changes: 22 additions & 11 deletions src/stactools/noaa_cdr/ocean_heat_content/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@
class Cog:
"""Dataclass to hold the result of a cogification operation."""

asset: Asset
href: str
profile: BandProfile
time_resolution: TimeResolution
start_datetime: datetime.datetime
end_datetime: datetime.datetime
datetime: datetime.datetime
attributes: Dict[Hashable, Any]

def asset(self) -> Asset:
return self.profile.cog_asset(self.href)

def time_interval_as_str(self) -> str:
"""Returns this COG's time interval as a string."""
return self.time_resolution.as_str(self.datetime)
Expand All @@ -52,6 +55,7 @@ def cogify(
outdir: Optional[str] = None,
latest_only: bool = False,
read_href_modifier: Optional[ReadHrefModifier] = None,
cog_hrefs: Optional[List[str]] = None,
) -> List[Cog]:
if outdir is None:
outdir = os.path.dirname(href)
Expand All @@ -60,6 +64,10 @@ def cogify(
maybe_modified_href = read_href_modifier(href)
else:
maybe_modified_href = href
if cog_hrefs:
cog_file_names = dict((os.path.basename(h), h) for h in cog_hrefs)
else:
cog_file_names = dict()
with fsspec.open(maybe_modified_href) as file:
with xarray.open_dataset(file, decode_times=False) as ds:
time_resolution = TimeResolution.from_value(ds.time_coverage_resolution)
Expand All @@ -71,22 +79,25 @@ def cogify(
dt = time.add_months_to_datetime(BASE_TIME, month_offset)
start_datetime, end_datetime = time_resolution.datetime_bounds(dt)
suffix = time_resolution.as_str(dt)
path = os.path.join(
outdir,
f"{os.path.splitext(os.path.basename(href))[0]}_{suffix}.tif",
file_name = (
f"{os.path.splitext(os.path.basename(href))[0]}_{suffix}.tif"
)
profile = BandProfile.build(
ds, variable, lambda d: d.isel(time=i).squeeze()
)
values = numpy.flipud(ds[variable].isel(time=i).values.squeeze())
asset = cog.write(
values,
path,
profile,
)
if file_name in cog_file_names:
cog_href = cog_file_names[file_name]
else:
cog_href = os.path.join(outdir, file_name)
values = numpy.flipud(ds[variable].isel(time=i).values.squeeze())
cog.write(
values,
cog_href,
profile,
)
cogs.append(
Cog(
asset=asset,
href=cog_href,
profile=profile,
time_resolution=time_resolution,
datetime=dt,
Expand Down
2 changes: 1 addition & 1 deletion src/stactools/noaa_cdr/ocean_heat_content/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,6 @@ def cogify_command(infile: str, outdir: Optional[Path]) -> None:
if outdir:
os.makedirs(str(outdir), exist_ok=True)
cogs = cog.cogify(infile, None if outdir is None else str(outdir))
print(f"Wrote {len(cogs)} COGs to {os.path.dirname(cogs[0].asset.href)}")
print(f"Wrote {len(cogs)} COGs to {os.path.dirname(cogs[0].asset().href)}")

return ocean_heat_content
9 changes: 6 additions & 3 deletions src/stactools/noaa_cdr/ocean_heat_content/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def create_collection(
def create_items(
hrefs: List[str],
directory: str,
cog_hrefs: Optional[List[str]] = None,
latest_only: bool = False,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> List[Item]:
Expand All @@ -144,6 +145,7 @@ def create_items(
cogs = cog.cogify(
href,
directory,
cog_hrefs=cog_hrefs,
latest_only=latest_only,
read_href_modifier=read_href_modifier,
)
Expand Down Expand Up @@ -175,11 +177,12 @@ def _update_items(items: List[Item], cogs: List[Cog]) -> List[Item]:
title = c.attributes["title"].split(" : ")[0]
min_depth = int(c.attributes["geospatial_vertical_min"])
max_depth = int(c.attributes["geospatial_vertical_max"])
c.asset.title = f"{title} : {min_depth}-{max_depth}m {c.time_interval_as_str()}"
item.add_asset(c.asset_key(), c.asset)
asset = c.asset()
asset.title = f"{title} : {min_depth}-{max_depth}m {c.time_interval_as_str()}"
item.add_asset(c.asset_key(), asset)
# The asset has the raster extension, but we need to make sure the item
# has the schema url.
_ = RasterExtension.ext(c.asset, add_if_missing=True)
_ = RasterExtension.ext(asset, add_if_missing=True)
items_as_dict[id] = item
return list(items_as_dict.values())

Expand Down
14 changes: 10 additions & 4 deletions src/stactools/noaa_cdr/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shapely.geometry
from pyproj import CRS
from pyproj.enums import WktVersion
from pystac import Asset
from pystac import Asset, MediaType
from pystac.extensions.raster import DataType, NoDataStrings, RasterBand
from rasterio import Affine
from xarray import DataArray, Dataset
Expand Down Expand Up @@ -110,6 +110,7 @@ class BandProfile:
attrs: Dict[Hashable, Any]
title: str
dataset_profile: DatasetProfile
variable: str

@classmethod
def build(
Expand Down Expand Up @@ -159,8 +160,16 @@ def build(
attrs=data_array.attrs,
title=title,
dataset_profile=dataset_profile,
variable=variable,
)

def cog_asset(self, href: str) -> Asset:
asset = Asset(
title=self.title, href=href, media_type=MediaType.COG, roles=["data"]
)
asset.extra_fields["raster:bands"] = [self.raster_band().to_dict()]
return asset

def gtiff(self) -> Dict[str, Any]:
return {
"crs": self.crs,
Expand All @@ -173,9 +182,6 @@ def gtiff(self) -> Dict[str, Any]:
"driver": "GTiff",
}

def update_cog_asset(self, key: str, asset: Asset) -> Asset:
return asset

def raster_band(self) -> RasterBand:
if math.isnan(self.nodata):
nodata = NoDataStrings.NAN
Expand Down
11 changes: 6 additions & 5 deletions src/stactools/noaa_cdr/sea_ice_concentration/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,26 @@
from ..profile import BandProfile
from .constants import SPATIAL_RESOLUTION

KEYS_WITH_CLASSES = [
VARIABLES_WITH_CLASSES = [
"cdr_seaice_conc",
"nsidc_bt_seaice_conc",
"stdev_of_cdr_seaice_conc",
"temporal_interpolation_flag",
]
KEYS_WITH_BITFIELDS = ["qa_of_cdr_seaice_conc", "spatial_interpolation_flag"]
VARIABLES_WITH_BITFIELDS = ["qa_of_cdr_seaice_conc", "spatial_interpolation_flag"]


def cogify(href: str, directory: str) -> Dict[str, Asset]:
return cog.cogify(href, directory, SeaIceConcentrationBandProfile)


class SeaIceConcentrationBandProfile(BandProfile):
def update_cog_asset(self, key: str, asset: Asset) -> Asset:
def cog_asset(self, href: str) -> Asset:
asset = super().cog_asset(href)
asset.extra_fields["raster:bands"][0]["spatial_resolution"] = SPATIAL_RESOLUTION
if key in KEYS_WITH_CLASSES:
if self.variable in VARIABLES_WITH_CLASSES:
asset.extra_fields["classification:classes"] = self.classes()
elif key in KEYS_WITH_BITFIELDS:
elif self.variable in VARIABLES_WITH_BITFIELDS:
asset.extra_fields["classification:bitfields"] = self.bitfield()
return asset

Expand Down
4 changes: 2 additions & 2 deletions src/stactools/noaa_cdr/sea_surface_temperature_whoi/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def create_cog_items(href: str, directory: str) -> List[Item]:
values = numpy.flipud(ds[variable].isel(time=i).values.squeeze())
values = numpy.roll(values, int(profiles[variable].width / 2), 1)
path = Path(directory) / f"{item.id}-{variable}.tif"
asset = cog.write(values, str(path), profiles[variable])
item.assets[variable] = asset
cog.write(values, str(path), profiles[variable])
item.assets[variable] = profiles[variable].cog_asset(str(path))
items.append(item)

return items
Expand Down
49 changes: 43 additions & 6 deletions tests/ocean_heat_content/test_stac.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import datetime
import os
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory

Expand Down Expand Up @@ -32,10 +34,9 @@ def test_create_collection() -> None:
collection.validate_all()


def test_create_items_one_netcdf() -> None:
def test_create_items_one_netcdf(tmp_path: Path) -> None:
path = test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc")
with TemporaryDirectory() as temporary_directory:
items = stac.create_items([path], temporary_directory)
items = stac.create_items([path], str(tmp_path))
assert len(items) == 17
for item in items:
assert len(item.assets) == 1
Expand Down Expand Up @@ -67,6 +68,22 @@ def test_create_items_one_netcdf() -> None:
item.validate()


def test_create_items_one_netcdf_cog_hrefs(tmp_path: Path) -> None:
path = test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc")
items = stac.create_items([path], str(tmp_path))
subdirectory = tmp_path / "subdirectory"
subdirectory.mkdir()
new_paths = list()
for p in tmp_path.iterdir():
if p.suffix == ".tif":
new_path = subdirectory / p.name
p.rename(new_path)
new_paths.append(str(new_path))
new_items = stac.create_items([path], str(tmp_path), cog_hrefs=new_paths)
assert not any(p.suffix == ".tif" for p in tmp_path.iterdir())
assert len(new_items) == len(items)


def test_create_items_two_netcdfs_same_items(tmp_path: Path) -> None:
paths = [
test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc"),
Expand Down Expand Up @@ -124,7 +141,7 @@ def test_cogify(tmp_path: Path, infile: str, num_cogs: int) -> None:
# Because these netcdfs grow in place, we can never be sure of how many there should be.
assert len(cogs) >= num_cogs
for c in cogs:
assert Path(c.asset.href).exists()
assert Path(c.asset().href).exists()


def test_cogify_href(tmp_path: Path) -> None:
Expand All @@ -135,7 +152,7 @@ def test_cogify_href(tmp_path: Path) -> None:
cogs = cog.cogify(href, str(tmp_path))
assert len(cogs) == 17
for c in cogs:
assert Path(c.asset.href).exists()
assert Path(c.asset().href).exists()


def test_cogify_href_no_output_directory() -> None:
Expand All @@ -150,4 +167,24 @@ def test_cogify_href_no_output_directory() -> None:
def test_unitless(tmp_path: Path) -> None:
path = test_data.get_external_data("mean_salinity_anomaly_0-2000_yearly.nc")
cogs = cog.cogify(path, str(tmp_path))
assert "unit" not in cogs[0].asset.extra_fields["raster:bands"][0]
assert "unit" not in cogs[0].asset().extra_fields["raster:bands"][0]


def test_cogify_cog_href(tmp_path: Path) -> None:
path = test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc")
cogs = cog.cogify(path, str(tmp_path))
href = cogs[0].asset().href
subdirectory = tmp_path / "subdirectory"
subdirectory.mkdir()
href = shutil.move(href, subdirectory)
for p in tmp_path.iterdir():
if p.suffix == ".tif":
p.unlink()
new_cogs = cog.cogify(path, str(tmp_path), cog_hrefs=[href])
assert not (tmp_path / os.path.basename(href)).exists()
assert (
sum(1 for f in os.listdir(tmp_path) if os.path.splitext(f)[1] == ".tif")
== len(new_cogs) - 1
)
assert os.path.exists(href)
assert href in [new_cog.asset().href for new_cog in new_cogs]