Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Copernicus-Bench: Add Level-3 Sentinel-5P #2607

Merged
merged 6 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,8 @@ Copernicus-Bench is a comprehensive evaluation benchmark with 15 downstream task
.. autoclass:: CopernicusBenchFloodS1
.. autoclass:: CopernicusBenchLCZS2
.. autoclass:: CopernicusBenchBiomassS3
.. autoclass:: CopernicusBenchAQNO2S5P
.. autoclass:: CopernicusBenchAQO3S5P

Base Classes
------------
Expand Down
2 changes: 2 additions & 0 deletions docs/api/datasets/copernicus_bench.csv
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ L2,DFC2020-S2,S,Sentinel-2,CC-BY-4.0,"5,128",10,256x256,10,MSI
L3,Flood-S1,CD,Sentinel-1,MIT,"5,000",3,224x224,10,SAR
L3,LCZ-S2,C,Sentinel-2,CC-BY-4.0,"25,000",17,32x32,10,MSI
L3,Biomass-S3,R,Sentinel-3,CC-BY-4.0,"5,000",-,96x96,300,MSI
L3,AQ-NO2-S5P,R,Sentinel-5,CC-BY-4.0,"2,467",-,56x56,"1,000",-
L3,AQ-O3-S5P,R,Sentinel-5,CC-BY-4.0,"2,467",-,56x56,"1,000",-
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
EEA_1kmgrid_2021_no2_avg_34_13
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
EEA_1kmgrid_2021_no2_avg_34_13
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
EEA_1kmgrid_2021_no2_avg_34_13
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
EEA_1kmgrid_2021_o3_avg_34_13
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
EEA_1kmgrid_2021_o3_avg_34_13
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
EEA_1kmgrid_2021_o3_avg_34_13
90 changes: 90 additions & 0 deletions tests/data/copernicus/l3_airquality_s5p/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil

import numpy as np
import rasterio as rio
from rasterio import Affine
from rasterio.crs import CRS

SIZE = 32

np.random.seed(0)

profile = {
'driver': 'GTiff',
'dtype': 'float32',
'height': SIZE,
'width': SIZE,
'count': 1,
'crs': CRS.from_wkt("""
PROJCS["ETRS89-extended / LAEA Europe",
GEOGCS["ETRS89",
DATUM["European_Terrestrial_Reference_System_1989",
SPHEROID["GRS 1980",6378137,298.257222101,
AUTHORITY["EPSG","7019"]],
AUTHORITY["EPSG","6258"]],
PRIMEM["Greenwich",0,
AUTHORITY["EPSG","8901"]],
UNIT["degree",0.0174532925199433,
AUTHORITY["EPSG","9122"]],
AUTHORITY["EPSG","4258"]],
PROJECTION["Lambert_Azimuthal_Equal_Area"],
PARAMETER["latitude_of_center",52],
PARAMETER["longitude_of_center",10],
PARAMETER["false_easting",4321000],
PARAMETER["false_northing",3210000],
UNIT["metre",1,
AUTHORITY["EPSG","9001"]],
AXIS["Northing",NORTH],
AXIS["Easting",EAST],
AUTHORITY["EPSG","3035"]]
"""),
'transform': Affine(1113.2, 0.0, 3307317.2, 0.0, -1113.2, 3575598.4000000004),
}

Z = np.random.random(size=(profile['height'], profile['width']))
files = [
'2021-01-01_2021-04-01.tif',
'2021-04-01_2021-07-01.tif',
'2021-07-01_2021-10-01.tif',
'2021-10-01_2021-12-31.tif',
]
for variable in ['no2', 'o3']:
pid = f'EEA_1kmgrid_2021_{variable}_avg_34_13'

# Image (annual)
directory = os.path.join('airquality_s5p', variable, 's5p_annual', pid)
os.makedirs(directory, exist_ok=True)
file = '2021-01-01_2021-12-31.tif'
path = os.path.join(directory, file)
with rio.open(path, 'w', **profile) as src:
src.write(Z, 1)

# Images (seasonal)
directory = os.path.join('airquality_s5p', variable, 's5p_seasonal', pid)
os.makedirs(directory, exist_ok=True)
for file in files:
path = os.path.join(directory, file)
with rio.open(path, 'w', **profile) as src:
src.write(Z, 1)

# Label (annual)
directory = os.path.join('airquality_s5p', variable, 'label_annual')
os.makedirs(directory, exist_ok=True)
path = os.path.join(directory, f'{pid}.tif')
with rio.open(path, 'w', **profile) as src:
src.write(Z, 1)

# Splits
directory = os.path.join('airquality_s5p', variable)
for split in ['train', 'val', 'test']:
with open(os.path.join(directory, f'{split}.csv'), 'w') as f:
f.write(f'{pid}\n')

# Zip
shutil.make_archive('airquality_s5p', 'zip', '.', 'airquality_s5p')
6 changes: 6 additions & 0 deletions tests/datasets/test_copernicus.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class TestCopernicusBench:
('lcz_s2', 'l3_lcz_s2', {}),
('biomass_s3', 'l3_biomass_s3', {'mode': 'static'}),
('biomass_s3', 'l3_biomass_s3', {'mode': 'time-series'}),
('aq_no2_s5p', 'l3_airquality_s5p', {'mode': 'annual'}),
('aq_no2_s5p', 'l3_airquality_s5p', {'mode': 'seasonal'}),
('aq_o3_s5p', 'l3_airquality_s5p', {'mode': 'annual'}),
('aq_o3_s5p', 'l3_airquality_s5p', {'mode': 'seasonal'}),
]
)
def dataset(self, request: SubRequest) -> CopernicusBench:
Expand Down Expand Up @@ -108,6 +112,8 @@ def test_not_rgb(self, dataset: CopernicusBench) -> None:

if dataset.name.endswith('s1'):
all_bands = ['VV']
elif dataset.name.endswith('s5p'):
pytest.skip('single-band dataset')

dataset = CopernicusBench(dataset.name, dataset.root, bands=all_bands)
match = 'Dataset does not contain some of the RGB bands'
Expand Down
4 changes: 4 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from .cms_mangrove_canopy import CMSGlobalMangroveCanopy
from .copernicus import (
CopernicusBench,
CopernicusBenchAQNO2S5P,
CopernicusBenchAQO3S5P,
CopernicusBenchBase,
CopernicusBenchBigEarthNetS1,
CopernicusBenchBigEarthNetS2,
Expand Down Expand Up @@ -233,6 +235,8 @@
'ChesapeakeWV',
'CloudCoverDetection',
'CopernicusBench',
'CopernicusBenchAQNO2S5P',
'CopernicusBenchAQO3S5P',
'CopernicusBenchBase',
'CopernicusBenchBigEarthNetS1',
'CopernicusBenchBigEarthNetS2',
Expand Down
8 changes: 8 additions & 0 deletions torchgeo/datasets/copernicus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from torch import Tensor

from ..geo import NonGeoDataset
from .aq_no2_s5p import CopernicusBenchAQNO2S5P
from .aq_o3_s5p import CopernicusBenchAQO3S5P
from .base import CopernicusBenchBase
from .bigearthnet_s1 import CopernicusBenchBigEarthNetS1
from .bigearthnet_s2 import CopernicusBenchBigEarthNetS2
Expand All @@ -25,6 +27,8 @@

__all__ = (
'CopernicusBench',
'CopernicusBenchAQNO2S5P',
'CopernicusBenchAQO3S5P',
'CopernicusBenchBase',
'CopernicusBenchBigEarthNetS1',
'CopernicusBenchBigEarthNetS2',
Expand Down Expand Up @@ -55,6 +59,8 @@
'flood_s1': CopernicusBenchFloodS1,
'lcz_s2': CopernicusBenchLCZS2,
'biomass_s3': CopernicusBenchBiomassS3,
'aq_no2_s5p': CopernicusBenchAQNO2S5P,
'aq_o3_s5p': CopernicusBenchAQO3S5P,
}


Expand Down Expand Up @@ -86,6 +92,8 @@ def __init__(
'flood_s1',
'lcz_s2',
'biomass_s3',
'aq_no2_s5p',
'aq_o3_s5p',
],
*args: Any,
**kwargs: Any,
Expand Down
108 changes: 108 additions & 0 deletions torchgeo/datasets/copernicus/aq_no2_s5p.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""Copernicus-Bench AQ-NO2-S5P dataset."""

import os
from collections.abc import Callable, Sequence
from typing import Literal

import torch
from torch import Tensor

from ..utils import Path, stack_samples
from .base import CopernicusBenchBase


class CopernicusBenchAQNO2S5P(CopernicusBenchBase):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could make a shared CopernicusBenchAQS5P base class, but then we need to document it as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be useful when we have more variables and they can be aligned, could be in further updates of Copercnius-Bench.

"""Copernicus-Bench AQ-NO2-S5P dataset.

AQ-NO2-S5P is a regression dataset based on Sentinel-5P NO2 images and
EEA air quality data products. Specifically, this dataset combines 2021
measurements of NO2 (annual average concentration) from EEA with S5P NO2
("tropospheric NO2 column number density") from GEE.

This benchmark supports both annual (1 image/location) and seasonal
(4 images/location) modes, the former is used in the original benchmark.

If you use this dataset in your research, please cite the following papers:

* https://arxiv.org/abs/2503.11849
* https://www.researchgate.net/profile/Jan-Horalek/publication/389165501_Air_quality_maps_of_EEA_member_and_cooperating_countries_for_2022/links/67b72628207c0c20fa8ec116/Air-quality-maps-of-EEA-member-and-cooperating-countries-for-2022.pdf

.. versionadded:: 0.7
"""

url = 'https://hf.co/datasets/wangyi111/Copernicus-Bench/resolve/9d252acd3aa0e3da3128e05c6f028647f0e48e5f/l3_airquality_s5p/airquality_s5p.zip'
md5 = '92081c7437c5c1daf783868ad7669877'
zipfile = 'airquality_s5p.zip'
directory = os.path.join('airquality_s5p', 'no2')
filename = '{}.csv'
dtype = torch.float
filename_regex = r'(?P<start>\d{4}-\d{2}-\d{2})_(?P<stop>\d{4}-\d{2}-\d{2})'
date_format = '%Y-%m-%d'
all_bands = ('NO2',)
rgb_bands = ('NO2',)
cmap = 'Wistia'

def __init__(
self,
root: Path = 'data',
split: Literal['train', 'val', 'test'] = 'train',
mode: Literal['annual', 'seasonal'] = 'annual',
bands: Sequence[str] | None = None,
transforms: Callable[[dict[str, Tensor]], dict[str, Tensor]] | None = None,
download: bool = False,
checksum: bool = False,
) -> None:
"""Initialize a new CopernicusBenchAQNO2S5P instance.

Args:
root: Root directory where dataset can be found.
split: One of 'train', 'val', or 'test'.
mode: One of 'annual' or 'seasonal'.
bands: Sequence of band names to load (defaults to all bands).
transforms: A function/transform that takes input sample and its target as
entry and returns a transformed version.
download: If True, download dataset and store it in the root directory.
checksum: If True, check the MD5 of the downloaded files (may be slow).

Raises:
DatasetNotFoundError: If dataset is not found and *download* is False.
"""
self.mode = mode
super().__init__(root, split, bands, transforms, download, checksum)

def __getitem__(self, index: int) -> dict[str, Tensor]:
"""Return an index within the dataset.

Args:
index: Index to return.

Returns:
Data and labels at that index.
"""
pid = self.files[index]
match self.mode:
case 'annual':
file = '2021-01-01_2021-12-31.tif'
path = os.path.join(self.root, self.directory, 's5p_annual', pid, file)
sample = self._load_image(path)
case 'seasonal':
files = [
'2021-01-01_2021-04-01.tif',
'2021-04-01_2021-07-01.tif',
'2021-07-01_2021-10-01.tif',
'2021-10-01_2021-12-31.tif',
]
root = os.path.join(self.root, self.directory, 's5p_seasonal', pid)
samples = [self._load_image(os.path.join(root, file)) for file in files]
sample = stack_samples(samples)

path = os.path.join(self.root, self.directory, 'label_annual', f'{pid}.tif')
sample |= self._load_mask(path)

if self.transforms is not None:
sample = self.transforms(sample)

return sample
Loading