Skip to content

Commit

Permalink
feat: allow skipping of cog creation
Browse files Browse the repository at this point in the history
  • Loading branch information
gadomski committed Nov 4, 2022
1 parent 22d9aca commit 584c457
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 42 deletions.
14 changes: 4 additions & 10 deletions src/stactools/noaa_cdr/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import rasterio.shutil
import xarray
from numpy.typing import NDArray
from pystac import Asset, MediaType
from pystac import Asset
from rasterio import MemoryFile

from . import dataset
Expand All @@ -32,27 +32,21 @@ def cogify(
if profile.needs_longitude_remap:
values = numpy.roll(values, int(profile.width / 2), 1)
path = os.path.join(directory, f"{file_name}-{variable}.tif")
asset = write(
write(
values,
path,
profile,
)
asset = profile.update_cog_asset(variable, asset)
assets[variable] = asset
assets[variable] = profile.cog_asset(path)
return assets


def write(
values: NDArray[Any],
path: str,
profile: BandProfile,
) -> Asset:
) -> None:
with MemoryFile() as memory_file:
with memory_file.open(**profile.gtiff()) as open_memory_file:
open_memory_file.write(values, 1)
rasterio.shutil.copy(open_memory_file, path, **profile.cog())
asset = Asset(
title=profile.title, href=path, media_type=MediaType.COG, roles=["data"]
)
asset.extra_fields["raster:bands"] = [profile.raster_band().to_dict()]
return asset
33 changes: 22 additions & 11 deletions src/stactools/noaa_cdr/ocean_heat_content/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@
class Cog:
"""Dataclass to hold the result of a cogification operation."""

asset: Asset
href: str
profile: BandProfile
time_resolution: TimeResolution
start_datetime: datetime.datetime
end_datetime: datetime.datetime
datetime: datetime.datetime
attributes: Dict[Hashable, Any]

def asset(self) -> Asset:
return self.profile.cog_asset(self.href)

def time_interval_as_str(self) -> str:
"""Returns this COG's time interval as a string."""
return self.time_resolution.as_str(self.datetime)
Expand All @@ -52,6 +55,7 @@ def cogify(
outdir: Optional[str] = None,
latest_only: bool = False,
read_href_modifier: Optional[ReadHrefModifier] = None,
cog_hrefs: Optional[List[str]] = None,
) -> List[Cog]:
if outdir is None:
outdir = os.path.dirname(href)
Expand All @@ -60,6 +64,10 @@ def cogify(
maybe_modified_href = read_href_modifier(href)
else:
maybe_modified_href = href
if cog_hrefs:
cog_file_names = dict((os.path.basename(h), h) for h in cog_hrefs)
else:
cog_file_names = dict()
with fsspec.open(maybe_modified_href) as file:
with xarray.open_dataset(file, decode_times=False) as ds:
time_resolution = TimeResolution.from_value(ds.time_coverage_resolution)
Expand All @@ -71,22 +79,25 @@ def cogify(
dt = time.add_months_to_datetime(BASE_TIME, month_offset)
start_datetime, end_datetime = time_resolution.datetime_bounds(dt)
suffix = time_resolution.as_str(dt)
path = os.path.join(
outdir,
f"{os.path.splitext(os.path.basename(href))[0]}_{suffix}.tif",
file_name = (
f"{os.path.splitext(os.path.basename(href))[0]}_{suffix}.tif"
)
profile = BandProfile.build(
ds, variable, lambda d: d.isel(time=i).squeeze()
)
values = numpy.flipud(ds[variable].isel(time=i).values.squeeze())
asset = cog.write(
values,
path,
profile,
)
if file_name in cog_file_names:
cog_href = cog_file_names[file_name]
else:
cog_href = os.path.join(outdir, file_name)
values = numpy.flipud(ds[variable].isel(time=i).values.squeeze())
cog.write(
values,
cog_href,
profile,
)
cogs.append(
Cog(
asset=asset,
href=cog_href,
profile=profile,
time_resolution=time_resolution,
datetime=dt,
Expand Down
2 changes: 1 addition & 1 deletion src/stactools/noaa_cdr/ocean_heat_content/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,6 @@ def cogify_command(infile: str, outdir: Optional[Path]) -> None:
if outdir:
os.makedirs(str(outdir), exist_ok=True)
cogs = cog.cogify(infile, None if outdir is None else str(outdir))
print(f"Wrote {len(cogs)} COGs to {os.path.dirname(cogs[0].asset.href)}")
print(f"Wrote {len(cogs)} COGs to {os.path.dirname(cogs[0].asset().href)}")

return ocean_heat_content
9 changes: 6 additions & 3 deletions src/stactools/noaa_cdr/ocean_heat_content/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def create_collection(
def create_items(
hrefs: List[str],
directory: str,
cog_hrefs: Optional[List[str]] = None,
latest_only: bool = False,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> List[Item]:
Expand All @@ -144,6 +145,7 @@ def create_items(
cogs = cog.cogify(
href,
directory,
cog_hrefs=cog_hrefs,
latest_only=latest_only,
read_href_modifier=read_href_modifier,
)
Expand Down Expand Up @@ -175,11 +177,12 @@ def _update_items(items: List[Item], cogs: List[Cog]) -> List[Item]:
title = c.attributes["title"].split(" : ")[0]
min_depth = int(c.attributes["geospatial_vertical_min"])
max_depth = int(c.attributes["geospatial_vertical_max"])
c.asset.title = f"{title} : {min_depth}-{max_depth}m {c.time_interval_as_str()}"
item.add_asset(c.asset_key(), c.asset)
asset = c.asset()
asset.title = f"{title} : {min_depth}-{max_depth}m {c.time_interval_as_str()}"
item.add_asset(c.asset_key(), asset)
# The asset has the raster extension, but we need to make sure the item
# has the schema url.
_ = RasterExtension.ext(c.asset, add_if_missing=True)
_ = RasterExtension.ext(asset, add_if_missing=True)
items_as_dict[id] = item
return list(items_as_dict.values())

Expand Down
14 changes: 10 additions & 4 deletions src/stactools/noaa_cdr/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shapely.geometry
from pyproj import CRS
from pyproj.enums import WktVersion
from pystac import Asset
from pystac import Asset, MediaType
from pystac.extensions.raster import DataType, NoDataStrings, RasterBand
from rasterio import Affine
from xarray import DataArray, Dataset
Expand Down Expand Up @@ -110,6 +110,7 @@ class BandProfile:
attrs: Dict[Hashable, Any]
title: str
dataset_profile: DatasetProfile
variable: str

@classmethod
def build(
Expand Down Expand Up @@ -159,8 +160,16 @@ def build(
attrs=data_array.attrs,
title=title,
dataset_profile=dataset_profile,
variable=variable,
)

def cog_asset(self, href: str) -> Asset:
asset = Asset(
title=self.title, href=href, media_type=MediaType.COG, roles=["data"]
)
asset.extra_fields["raster:bands"] = [self.raster_band().to_dict()]
return asset

def gtiff(self) -> Dict[str, Any]:
return {
"crs": self.crs,
Expand All @@ -173,9 +182,6 @@ def gtiff(self) -> Dict[str, Any]:
"driver": "GTiff",
}

def update_cog_asset(self, key: str, asset: Asset) -> Asset:
return asset

def raster_band(self) -> RasterBand:
if math.isnan(self.nodata):
nodata = NoDataStrings.NAN
Expand Down
11 changes: 6 additions & 5 deletions src/stactools/noaa_cdr/sea_ice_concentration/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,26 @@
from ..profile import BandProfile
from .constants import SPATIAL_RESOLUTION

KEYS_WITH_CLASSES = [
VARIABLES_WITH_CLASSES = [
"cdr_seaice_conc",
"nsidc_bt_seaice_conc",
"stdev_of_cdr_seaice_conc",
"temporal_interpolation_flag",
]
KEYS_WITH_BITFIELDS = ["qa_of_cdr_seaice_conc", "spatial_interpolation_flag"]
VARIABLES_WITH_BITFIELDS = ["qa_of_cdr_seaice_conc", "spatial_interpolation_flag"]


def cogify(href: str, directory: str) -> Dict[str, Asset]:
return cog.cogify(href, directory, SeaIceConcentrationBandProfile)


class SeaIceConcentrationBandProfile(BandProfile):
def update_cog_asset(self, key: str, asset: Asset) -> Asset:
def cog_asset(self, href: str) -> Asset:
asset = super().cog_asset(href)
asset.extra_fields["raster:bands"][0]["spatial_resolution"] = SPATIAL_RESOLUTION
if key in KEYS_WITH_CLASSES:
if self.variable in VARIABLES_WITH_CLASSES:
asset.extra_fields["classification:classes"] = self.classes()
elif key in KEYS_WITH_BITFIELDS:
elif self.variable in VARIABLES_WITH_BITFIELDS:
asset.extra_fields["classification:bitfields"] = self.bitfield()
return asset

Expand Down
4 changes: 2 additions & 2 deletions src/stactools/noaa_cdr/sea_surface_temperature_whoi/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def create_cog_items(href: str, directory: str) -> List[Item]:
values = numpy.flipud(ds[variable].isel(time=i).values.squeeze())
values = numpy.roll(values, int(profiles[variable].width / 2), 1)
path = Path(directory) / f"{item.id}-{variable}.tif"
asset = cog.write(values, str(path), profiles[variable])
item.assets[variable] = asset
cog.write(values, str(path), profiles[variable])
item.assets[variable] = profiles[variable].cog_asset(str(path))
items.append(item)

return items
Expand Down
49 changes: 43 additions & 6 deletions tests/ocean_heat_content/test_stac.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import datetime
import os
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory

Expand Down Expand Up @@ -32,10 +34,9 @@ def test_create_collection() -> None:
collection.validate_all()


def test_create_items_one_netcdf() -> None:
def test_create_items_one_netcdf(tmp_path: Path) -> None:
path = test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc")
with TemporaryDirectory() as temporary_directory:
items = stac.create_items([path], temporary_directory)
items = stac.create_items([path], str(tmp_path))
assert len(items) == 17
for item in items:
assert len(item.assets) == 1
Expand Down Expand Up @@ -67,6 +68,22 @@ def test_create_items_one_netcdf() -> None:
item.validate()


def test_create_items_one_netcdf_cog_hrefs(tmp_path: Path) -> None:
path = test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc")
items = stac.create_items([path], str(tmp_path))
subdirectory = tmp_path / "subdirectory"
subdirectory.mkdir()
new_paths = list()
for p in tmp_path.iterdir():
if p.suffix == ".tif":
new_path = subdirectory / p.name
p.rename(new_path)
new_paths.append(str(new_path))
new_items = stac.create_items([path], str(tmp_path), cog_hrefs=new_paths)
assert not any(p.suffix == ".tif" for p in tmp_path.iterdir())
assert len(new_items) == len(items)


def test_create_items_two_netcdfs_same_items(tmp_path: Path) -> None:
paths = [
test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc"),
Expand Down Expand Up @@ -124,7 +141,7 @@ def test_cogify(tmp_path: Path, infile: str, num_cogs: int) -> None:
# Because these netcdfs grow in place, we can never be sure of how many there should be.
assert len(cogs) >= num_cogs
for c in cogs:
assert Path(c.asset.href).exists()
assert Path(c.asset().href).exists()


def test_cogify_href(tmp_path: Path) -> None:
Expand All @@ -135,7 +152,7 @@ def test_cogify_href(tmp_path: Path) -> None:
cogs = cog.cogify(href, str(tmp_path))
assert len(cogs) == 17
for c in cogs:
assert Path(c.asset.href).exists()
assert Path(c.asset().href).exists()


def test_cogify_href_no_output_directory() -> None:
Expand All @@ -150,4 +167,24 @@ def test_cogify_href_no_output_directory() -> None:
def test_unitless(tmp_path: Path) -> None:
path = test_data.get_external_data("mean_salinity_anomaly_0-2000_yearly.nc")
cogs = cog.cogify(path, str(tmp_path))
assert "unit" not in cogs[0].asset.extra_fields["raster:bands"][0]
assert "unit" not in cogs[0].asset().extra_fields["raster:bands"][0]


def test_cogify_cog_href(tmp_path: Path) -> None:
path = test_data.get_external_data("heat_content_anomaly_0-2000_yearly.nc")
cogs = cog.cogify(path, str(tmp_path))
href = cogs[0].asset().href
subdirectory = tmp_path / "subdirectory"
subdirectory.mkdir()
href = shutil.move(href, subdirectory)
for p in tmp_path.iterdir():
if p.suffix == ".tif":
p.unlink()
new_cogs = cog.cogify(path, str(tmp_path), cog_hrefs=[href])
assert not (tmp_path / os.path.basename(href)).exists()
assert (
sum(1 for f in os.listdir(tmp_path) if os.path.splitext(f)[1] == ".tif")
== len(new_cogs) - 1
)
assert os.path.exists(href)
assert href in [new_cog.asset().href for new_cog in new_cogs]

0 comments on commit 584c457

Please sign in to comment.