Skip to content

Commit

Permalink
Merge pull request #76 from lukasValentin/eodal_v0.2.2
Browse files Browse the repository at this point in the history
Upgrading to EOdal v0.2.2
  • Loading branch information
lukasValentin authored Aug 19, 2023
2 parents 4d378f3 + cf23459 commit 239fa0d
Show file tree
Hide file tree
Showing 33 changed files with 207 additions and 237 deletions.
21 changes: 0 additions & 21 deletions .github/workflows/pre-commit.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/test-release-candidate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
python-version: ["3.11"]
include:
- os: macos-latest
python-version: "3.11"
Expand Down
12 changes: 2 additions & 10 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.9", "3.10"]
python-version: ["3.11"]
include:
- os: windows-latest
python-version: "3.10"
python-version: "3.11"
env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python-version }}
Expand All @@ -46,16 +46,8 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip wheel
python -m pip install flake8
python -m pip install --editable .[test]
- name: Sanity check with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings
python -m flake8 . --count --exit-zero --statistics
- name: Debug environment
run: python -m pip freeze

Expand Down
4 changes: 3 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ Categories for changes are: Added, Changed, Deprecated, Removed, Fixed, Security
Version `0.2.2 < https://github.com/EOA-team/eodal/releases/tag/v0.2.2>`__
--------------------------------------------------------------------------------

Release date: 2023-08-XX
Release date: 2023-08-19

- Added: Full support for USGS Landsat Collection-2 data given access to data >50 years of data starting with Landsat-1 (#66).
- Changed: The spectral index module has undergone some flexibilization efforts. Now, custom bands can be passed for index calculation.
- Fixed: Calls to new `pydantic_settings` have been added where necessary to provide compatability with `pydantic` version 2.+ (#73).
- Added: The mapper now also supports merging (mosaicing) of scenes with slightly different time stamps as it often happens with EO platforms when scenes are acquired one after another. It works by calling `pandas.Timestamp.round` on the scene metadata time column.
- Fixed: A set of deprecation warnings from `pydantic`, `matplotlib` and `shapely`.


Version `0.2.1 < https://github.com/EOA-team/eodal/releases/tag/v0.2.1>`__
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[![Documentation Status](https://readthedocs.org/projects/eodal/badge/?version=latest)](https://eodal.readthedocs.io/en/latest/?badge=latest)
[![codecov](https://codecov.io/gh/lukasValentin/eodal/graph/badge.svg?token=87CFHMHC41)](https://codecov.io/gh/lukasValentin/eodal)

# E:earth_africa:dal Earth Observation Data Analysis Library

Expand Down
2 changes: 1 addition & 1 deletion eodal/__meta__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
description = "Earth Observation Data Analysis Library" # One-liner
url = "https://github.com/EOA-team/eodal" # your project home-page
license = "GNU General Public License version 3" # See https://choosealicense.com
version = "0.2.1"
version = "0.2.2"
6 changes: 0 additions & 6 deletions eodal/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,6 @@ def get_logger(self):
self.logger.addHandler(fh)
self.logger.addHandler(ch)

# env files are encoded utf-8, only
class Config:
env_file = "../../docker-services/.env"
env_file_encoding = "utf-8"
arbitrary_types_allowed = True


@lru_cache()
def get_settings():
Expand Down
8 changes: 6 additions & 2 deletions eodal/core/band.py
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ def plot(
# get color-map
cmap = user_defined_colors
if cmap is None:
cmap = plt.cm.get_cmap(colormap)
cmap = mpl.colormaps.get_cmap(colormap)

# check if data is continuous (spectral) or discrete (np.unit8)
if discrete_values:
Expand Down Expand Up @@ -1981,7 +1981,11 @@ def reproject(
except Exception as e:
raise ReprojectionError(f"Could not re-project band {self.band_name}: {e}")

# cast array back to original dtype
# cast array back to original data type
# make sure to handle NaNs properly
if np.isnan(out_data).any() and not self.values.dtype == float:
out_data[np.isnan(out_data)] = np.nanmin(out_data)

if len(out_data.shape) == 2:
out_data = out_data.astype(self.values.dtype)
elif len(out_data.shape) == 3:
Expand Down
3 changes: 2 additions & 1 deletion eodal/core/sensors/landsat.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ def _get_metadata_json(
elif isinstance(in_dir, Path):
fpath_json = in_dir.joinpath('MTL.json')
if fpath_json.exists():
metadata = json.loads(metadata_json.read_text())
with open(fpath_json, 'r') as src:
metadata = json.load(src)
else:
fpath_xml = in_dir.joinpath('MTL.xml')
metadata_xml_tree = ElementTree.parse(fpath_xml)
Expand Down
5 changes: 3 additions & 2 deletions eodal/core/sensors/sentinel2.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,9 @@ def read_pixels_from_safe(
if gdf.dtypes[band_names].unique() in ["float32", "float64"]:
gdf[band_names] = gdf[band_names].replace({0.0, np.nan})
gdf.dropna(axis=0, inplace=True)
elif gdf.dtypes[band_names].unique() in ["int16", "int32", "int64"]:
gdf = gdf.loc[~(gdf[band_df_safe.band_name] == 0).all(axis=1)]
elif gdf.dtypes[band_names].unique() in ["uint16", "int16", "int32", "int64"]:
gdf = gdf.loc[
~(gdf[band_df_safe.band_name] == 0).all(axis=1)].copy()

return gdf

Expand Down
4 changes: 2 additions & 2 deletions eodal/mapper/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import pandas as pd

from shapely import wkt
from shapely.errors import WKTReadingError
from shapely.errors import ShapelyError
from shapely.geometry import MultiPoint, MultiPolygon, Point, Polygon
from typing import Any, Dict, Optional

Expand Down Expand Up @@ -148,7 +148,7 @@ def from_dict(cls, dictionary: Dict[str, Any]):
raise ValueError(
"Dictionary does not have fields required to instantiate a new Feature"
)
except WKTReadingError as e:
except ShapelyError as e:
raise ValueError(f"Invalid Geometry: {e}")

def to_epsg(self, epsg: int):
Expand Down
92 changes: 73 additions & 19 deletions eodal/mapper/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,15 @@ def sensor(self) -> str | None:
def time_column(self) -> str:
return self._time_column

@time_column.setter
def time_column(self, value: str):
"""..versionadd:: 0.2.2"""
if not isinstance(value, str):
raise TypeError('time_column must be a string')
if len(value) <= 0:
raise ValueError('String must not be empty')
self._time_column = value

def query_scenes(self) -> None:
"""
Query available scenes for the current `MapperConfigs` and loads
Expand Down Expand Up @@ -382,6 +391,11 @@ def query_scenes(self) -> None:
except Exception as e:
raise DatabaseError(f"Querying metadata DB failed: {e}")

# make sure the time column is handled as pandas datetime
# objects
scenes_df[self.time_column] = pd.to_datetime(
scenes_df[self.time_column], utc=True)

# populate the metadata attribute
self.metadata = scenes_df

Expand Down Expand Up @@ -455,6 +469,7 @@ def _load_scenes_collection(
scene_constructor_kwargs: Optional[Dict[str, Any]] = {},
scene_modifier: Optional[Callable[..., RasterCollection]] = None,
scene_modifier_kwargs: Optional[Dict[str, Any]] = {},
round_time_stamps_to_freq: Optional[str] = None
) -> None:
"""
Auxiliary method to handle EOdal scenes and store them into a SceneCollection.
Expand Down Expand Up @@ -483,6 +498,16 @@ def _load_scenes_collection(
are no pixel shifts due to re-projections from different spatial
reference systems such as different UTM zones.
..versionadd:: 0.2.2
When mosaicing (i.e., a scene is split into multiple tiles) we not only
have to look for spatial and temporal overlap (old behavior) but we
must also allow a certain tolerance in the timestamps in the range of
typically some seconds to minutes to merge imagery that was acquired
slightly one-after-another. To do so, a new keyword argument
`round_time_stamps_to_freq` was added that defaults to None (old behavior)
and can be set to any string value accepted by `~pandas.Timestamp.round`.
:param scene_constructor:
Callable used to read the scenes found into `RasterCollection` fulfilling
the `is_scene` criterion (i.e., a time stamp is available). The callable is
Expand All @@ -504,14 +529,34 @@ def _load_scenes_collection(
to calculate spectral indices on the fly or for applying masks.
:param scene_modifier_kwargs:
optional keyword arguments for `scene_modifier` (if any).
:param round_time_stamps_to_freq:
..versionadd:: 0.2.2
optionally round scene time stamps to a custom temporal frequency accepted
by `~pandas.Timestamp.round` to allow moscaicing of scenes with slightly
different timestamps as it might be necessary for some EO platforms.
"""
# open a SceneCollection for storing the data
scoll = SceneCollection()
logger.info(f"Starting extraction of {self.sensor} scenes")
# filter out datasets where mosaicing is necessary (time stamp is the same)
# filter out datasets for which mosaicing is necessary (time stamp is the same)
# ..versionadd:: 0.2.2
# Allow a user-defined temporal tolerance (background: some
# platforms such as Landsat provide the "scene-center" time, i.e., two
# scenes that were acquired one-after-another differ only be a few minutes)
if round_time_stamps_to_freq is not None:
# new optional behavior eodal >= 0.2.2
rounded_time_column = \
f'{self.time_column}_rounded_{round_time_stamps_to_freq}'
self.metadata[rounded_time_column] = \
self.metadata[self.time_column].dt.round(
freq=round_time_stamps_to_freq)
# set time column to rounded time stamps but keep
# the name of the "old", i.e., original time column
self.time_column = rounded_time_column

self.metadata["_duplicated"] = self.metadata[self.time_column].duplicated(
keep=False
)
keep=False
)
# datasets where the 'duplicated' entry is False are truely unqiue
_metadata_unique = self.metadata[~self.metadata._duplicated].copy()
_metadata_nonunique = self.metadata[self.metadata._duplicated].copy()
Expand Down Expand Up @@ -552,14 +597,14 @@ def _load_scenes_collection(
# merge datasets using rasterio and read results back into a scene
band_options = {
"band_names": _scene.band_names,
"band_aliases": _scene.band_aliases,
"band_aliases": _scene.band_aliases
}
scene = merge_datasets(
datasets=dataset_list,
target_crs=self.metadata.target_epsg.unique()[0],
vector_features=self.mapper_configs.feature.to_geoseries(),
sensor=self.sensor,
band_options=band_options,
band_options=band_options
)
# handle scene properties. They need to be merged as well
merged_scene_properties = scene_properties_list[0]
Expand Down Expand Up @@ -596,13 +641,15 @@ def _load_scenes_collection(
)
for updated_scene_properties in update_scene_properties_list:
# use the time stamp for finding the correct metadata
# records. Theremight be some disagreement in the milliseconds
# records. There might be some disagreement in the milliseconds
# because of different precision levels therefore, an offset
# of less than 1 second is tolerated
idx = self.metadata[
abs(
self.metadata[self.time_column]
- pd.to_datetime(updated_scene_properties.acquisition_time)
- pd.to_datetime(
updated_scene_properties.acquisition_time,
utc=True)
)
< pd.Timedelta(60, unit="minutes")
].index
Expand Down Expand Up @@ -682,13 +729,20 @@ def _load_scenes_collection(
dst_shape,
dtype=band.values.dtype
)
# determine nodata
if not np.isnan(band.nodata):
dst_nodata = band.nodata
else:
# if band nodata is NaN we set
# no-data to None (rasterio default)
dst_nodata = None
band.reproject(
inplace=True,
target_crs=reference_band.crs,
dst_transform=dst_transform,
destination=destination,
dst_nodata=band.nodata)
dst_nodata=dst_nodata)

self.data = scoll

logger.info(f"Finished extraction of {self.sensor} scenes")
Expand Down Expand Up @@ -736,6 +790,7 @@ def load_scenes(
self,
scene_kwargs: Optional[Dict[str, Any]] = None,
pixel_kwargs: Optional[Dict[str, Any]] = None,
round_time_stamps_to_freq: Optional[str] = None
) -> None:
"""
Load scenes from `~Mapper.query_scenes` result into a `SceneCollection`
Expand All @@ -754,6 +809,11 @@ def load_scenes(
key-word arguments to pass to `~Mapper._load_pixels` for handling
single pixel values. These arguments *MUST* be provided when
using `Point` or `MulitPoint` geometries in the `Mapper` call.
:param round_time_stamps_to_freq:
..versionadd:: 0.2.2
optionally round scene time stamps to a custom temporal frequency accepted
by `~pandas.Timestamp.round` to allow moscaicing of scenes with slightly
different timestamps as it might be necessary for some EO platforms.
"""
# check if the correct keyword arguments have been passed
if not self._geoms_are_points:
Expand Down Expand Up @@ -785,15 +845,6 @@ def load_scenes(
except KeyError as e:
raise ValueError(f"Could not determine CRS of scenes: {e}")

# check if mosaicing scenes is required. This is done by checking the
# sensing_time time stamps. If there are multiple scenes with the same
# time stamp they must be mosaiced into a single scene
self.metadata["mosaicing"] = False
duplicated_idx = self.metadata[
self.metadata.duplicated([self.time_column])
].index
self.metadata.loc[duplicated_idx, "mosaicing"] = True

# provide paths to raster data. Depending on th settings, this is a path on the
# file system or a URL
self.metadata["real_path"] = ""
Expand All @@ -808,4 +859,7 @@ def load_scenes(
if self._geoms_are_points:
self._load_pixels(**pixel_kwargs)
else:
self._load_scenes_collection(**scene_kwargs)
self._load_scenes_collection(
round_time_stamps_to_freq=round_time_stamps_to_freq,
**scene_kwargs
)
File renamed without changes.
13 changes: 9 additions & 4 deletions scripts/download_ndvi.py → examples/download_ndvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,24 @@ def get_ndvi(
mapper.load_scenes(scene_kwargs=scene_kwargs)

# calculate the NDVI
for scene_uri, scene in mapper.data:
for scene_timestamp, scene in mapper.data:
scene.calc_si('NDVI', inplace=True)
# save NDVI as GeoTiff
fpath_ndvi = output_dir.joinpath(
scene_uri.stem + '_ndvi.tif'
f'{scene_timestamp}_ndvi.tif'
)
scene['ndvi'].to_rasterio(fpath_ndvi)


if __name__ == '__main__':

import os
cwd = Path(__file__).parents[1]
os.chdir(cwd)

# -------------------------- Paths -------------------------------------
# define the output directory where to save the NDVI GeoTiff files
output_dir = Path('data')
output_dir = cwd.joinpath('data')

# user-inputs
# -------------------------- Collection -------------------------------
Expand All @@ -73,7 +77,8 @@ def get_ndvi(
cloudy_pixel_percentage: int = 25 # percent (scene-wide)

# ---------------------- Spatial Feature ------------------------------
geom = Path('data/sample_polygons/ZH_Polygon_73129_ESCH_EPSG32632.shp')
geom = cwd.joinpath(
'data/sample_polygons/ZH_Polygon_73129_ESCH_EPSG32632.shp')

# ------------------------- Metadata Filters ---------------------------
metadata_filters: List[Filter] = [
Expand Down
Loading

0 comments on commit 239fa0d

Please sign in to comment.