From 0de793db7f9372888ff9850faad451925d657115 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 7 Aug 2024 10:33:52 +0200 Subject: [PATCH 01/41] Adding minimal test class on iasi ng l2 reader --- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py new file mode 100644 index 0000000000..e47f4db72b --- /dev/null +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -0,0 +1,24 @@ +# Copyright (c) 2022 Satpy developers +# +# satpy is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# satpy is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with satpy. If not, see . + +"""Unit tests on the IASI NG L2 reader using the conventional mock constructed context.""" + + +class TestIasiNGL2: + """Main test class for the IASI NG L2 reader.""" + + def test_sanity(self): + """Minimal temporary sanity check unit test.""" + assert 1 == 0 From bdb2cc8766e8af37688669aa5af4449e157b968f Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 7 Aug 2024 11:18:44 +0200 Subject: [PATCH 02/41] Fixing sanity unit test --- satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index e47f4db72b..3979e42283 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -21,4 +21,4 @@ class TestIasiNGL2: def test_sanity(self): """Minimal temporary sanity check unit test.""" - assert 1 == 0 + assert 1 == 1 From 83736952ddac308b676383c6da9a364add622f29 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 7 Aug 2024 17:43:04 +0200 Subject: [PATCH 03/41] Added initial version of file pattern and unit test --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 17 ++++ satpy/readers/iasi_ng_l2_nc.py | 70 +++++++++++++++++ .../tests/reader_tests/test_iasi_ng_l2_nc.py | 78 ++++++++++++++++++- 3 files changed, 161 insertions(+), 4 deletions(-) create mode 100644 satpy/etc/readers/iasi_ng_l2_nc.yaml create mode 100644 satpy/readers/iasi_ng_l2_nc.py diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml new file mode 100644 index 0000000000..d239ba7f82 --- /dev/null +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -0,0 +1,17 @@ +reader: + name: iasi_ng_l2_nc + short_name: IASI-NG L2 NC Reader + long_name: IASI-NG Level-2 NetCDF Reader + description: Reader for IASI-NG Level-2 NetCDF files + sensors: [iasi] + reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader + status: Alpha + supports_fsspec: false + +file_types: + iasi_ng_l2_nc: + file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler + file_patterns: + [ + "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}-{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", + ] diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py new file mode 100644 index 0000000000..33d82ed852 --- /dev/null +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -0,0 +1,70 @@ +# Copyright (c) 2017-2023 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . + +r"""IASI-NG L2 reader + +This reader supports reading all the products from the IASI-NG L2 processing +level: + * IASI-L2-TWV + * IASI-L2-CLD + * IASI-L2-GHG + * IASI-L2-SFC +""" + +import logging +import logging +import numpy as np +import xarray as xr + + +from .netcdf_utils import NetCDF4FsspecFileHandler + +logger = logging.getLogger(__name__) + + +class IASINGL2NCFileHandler(NetCDF4FsspecFileHandler): + """Reader for IASI-NG L2 products in NetCDF format.""" + + def __init__(self, filename, filename_info, filetype_info, **kwargs): + """Initialize object.""" + super().__init__(filename, filename_info, filetype_info, **kwargs) + + logger.info("Creating reader with infos: %s", filename_info) + + # def get_dataset(self, data_id, ds_info): + # """Obtain dataset.""" + # ds = self[data_id["name"]] + # if "scan_lines" in ds.dims: + # ds = ds.rename(scan_lines="y") + # if "pixels" in ds.dims: + # ds = ds.rename(pixels="x") + # if "_FillValue" in ds.attrs and ds.dtype.kind == "f": + # with xr.set_options(keep_attrs=True): + # # have to inverse the logic due to https://github.com/pydata/xarray/issues/7581 + # return xr.where(ds != ds.attrs["_FillValue"], ds, np.nan) + # return ds + + # def available_datasets(self, configured_datasets=None): + # """Get available datasets based on what's in the file. + + # Returns all datasets in the root group. + # """ + # yield from super().available_datasets(configured_datasets) + # common = {"file_type": "iasi_l2_cdr_nc", "resolution": 12000} + # for key in self.file_content: + # if "/" in key: # not a dataset + # continue + # yield (True, {"name": key} | common | self[key].attrs) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 3979e42283..822593314b 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -14,11 +14,81 @@ # along with satpy. If not, see . """Unit tests on the IASI NG L2 reader using the conventional mock constructed context.""" +import pytest +import logging +import os +from unittest import mock +from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler +from satpy.readers import load_reader -class TestIasiNGL2: +from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler + +logger = logging.getLogger(__name__) + + +class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler): + """Fake base class for IASI NG handler""" + + def get_test_content(self, _filename, _filename_info, _filetype_info): + """Get the content of the test data. + + Here we generate the default content we want to provide depending + on the provided filename infos. + """ + + dset = {} + return dset + + +class TestIASINGL2NCReader: """Main test class for the IASI NG L2 reader.""" - def test_sanity(self): - """Minimal temporary sanity check unit test.""" - assert 1 == 1 + yaml_file = "iasi_ng_l2_nc.yaml" + + def setup_method(self): + """Setup the reade config""" + from satpy._config import config_search_paths + + self.reader_configs = config_search_paths( + os.path.join("readers", self.yaml_file) + ) + + @pytest.fixture(autouse=True, scope="class") + def fake_handler(self): + """Wrap NetCDF4 FileHandler with our own fake handler.""" + patch_ctx = mock.patch.object( + IASINGL2NCFileHandler, "__bases__", (FakeIASINGFileHandlerBase,) + ) + + with patch_ctx: + patch_ctx.is_local = True + yield patch_ctx + + def test_filename_matching(self): + # Example filename + filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425-20220101115728_____W______.nc" + + reader = load_reader(self.reader_configs) + + # Test if the file is recognized by the reader + files = reader.select_files_from_pathnames([filename]) + + assert len(files) == 1, "File should be recognized by the reader" + + # Create the file handler: + reader.create_filehandlers(files) + + # We should have our handler now: + assert len(reader.file_handlers) == 1 + + # logger.info("File handlers are: %s", reader.file_handlers) + + assert "iasi_ng_l2_nc" in reader.file_handlers + + handlers = reader.file_handlers["iasi_ng_l2_nc"] + + # We should have a single handler for a single file: + assert len(handlers) == 1 + + assert isinstance(handlers[0], IASINGL2NCFileHandler) From 21674841f1192823676cf088d96bd1a0122d840a Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 7 Aug 2024 18:13:41 +0200 Subject: [PATCH 04/41] Fixing minor error in file pattern --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 2 +- satpy/readers/iasi_ng_l2_nc.py | 2 +- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 41 ++++++++++++++----- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index d239ba7f82..3b48654c59 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -13,5 +13,5 @@ file_types: file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler file_patterns: [ - "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}-{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", + "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 33d82ed852..de362d4b84 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -42,7 +42,7 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs): """Initialize object.""" super().__init__(filename, filename_info, filetype_info, **kwargs) - logger.info("Creating reader with infos: %s", filename_info) + # logger.info("Creating reader with infos: %s", filename_info) # def get_dataset(self, data_id, ds_info): # """Obtain dataset.""" diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 822593314b..a686ef0443 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -44,14 +44,14 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): class TestIASINGL2NCReader: """Main test class for the IASI NG L2 reader.""" - yaml_file = "iasi_ng_l2_nc.yaml" + reader_name = "iasi_ng_l2_nc" def setup_method(self): """Setup the reade config""" from satpy._config import config_search_paths self.reader_configs = config_search_paths( - os.path.join("readers", self.yaml_file) + os.path.join("readers", self.reader_name + ".yaml") ) @pytest.fixture(autouse=True, scope="class") @@ -65,15 +65,14 @@ def fake_handler(self): patch_ctx.is_local = True yield patch_ctx - def test_filename_matching(self): - # Example filename - filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425-20220101115728_____W______.nc" + def _create_file_handler(self, filename): + """Create an handler for the given file checking that it can + be parsed correctly""" reader = load_reader(self.reader_configs) # Test if the file is recognized by the reader files = reader.select_files_from_pathnames([filename]) - assert len(files) == 1, "File should be recognized by the reader" # Create the file handler: @@ -83,12 +82,34 @@ def test_filename_matching(self): assert len(reader.file_handlers) == 1 # logger.info("File handlers are: %s", reader.file_handlers) + assert self.reader_name in reader.file_handlers - assert "iasi_ng_l2_nc" in reader.file_handlers - - handlers = reader.file_handlers["iasi_ng_l2_nc"] + handlers = reader.file_handlers[self.reader_name] # We should have a single handler for a single file: assert len(handlers) == 1 - assert isinstance(handlers[0], IASINGL2NCFileHandler) + + return handlers[0] + + def test_filename_matching(self): + """Test filename matching against some random name""" + + # Example filename + filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc" + + self._create_file_handler(filename) + + def test_real_filename_matching(self): + """Test that we will match an actual IASI NG L2 product file name""" + + # Below we test the TWV,CLD,GHG and SFC products: + filenames = { + "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc", + "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-CLD_C_EUMT_20170616120000_G_V_20070912094037_20070912094308_O_N____.nc", + "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-GHG_C_EUMT_20170616120000_G_V_20070912090651_20070912090922_O_N____.nc", + "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-SFC_C_EUMT_20170616120000_G_V_20070912100911_20070912101141_O_N____.nc", + } + + for filename in filenames: + self._create_file_handler(filename) From 53c7e52259d285648885aa2f01a6cabc5da24de6 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 7 Aug 2024 18:36:27 +0200 Subject: [PATCH 05/41] Adding simple properties in reader --- satpy/readers/iasi_ng_l2_nc.py | 35 ++++++++++++++++++- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 20 ++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index de362d4b84..9342db632b 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -42,7 +42,40 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs): """Initialize object.""" super().__init__(filename, filename_info, filetype_info, **kwargs) - # logger.info("Creating reader with infos: %s", filename_info) + self.sensors = {"iasi_ng"} + + # List of datasets provided by this handler: + self.dataset_infos = [] + + logger.info("Creating reader with infos: %s", filename_info) + + @property + def start_time(self): + """Get the start time.""" + return self.filename_info["sensing_start_time"] + + @property + def end_time(self): + """Get the end time.""" + return self.filename_info["sensing_end_time"] + + @property + def sensor_names(self): + """List of sensors represented in this file.""" + return self.sensors + + def available_datasets(self, configured_datasets=None): + """Determine automatically the datasets provided by this file. + + Uses a per product type dataset registration mechanism. + """ + + # pass along existing datasets + for is_avail, ds_info in configured_datasets or []: + yield is_avail, ds_info + + for ds_info in self.dataset_infos: + yield True, ds_info # def get_dataset(self, data_id, ds_info): # """Obtain dataset.""" diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index a686ef0443..25c7a0fb27 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -19,7 +19,7 @@ import os from unittest import mock from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler - +from datetime import datetime from satpy.readers import load_reader from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler @@ -65,6 +65,12 @@ def fake_handler(self): patch_ctx.is_local = True yield patch_ctx + @pytest.fixture() + def twv_handler(self): + """Create a simple (and fake) default handler on a TWV product""" + filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc" + return self._create_file_handler(filename) + def _create_file_handler(self, filename): """Create an handler for the given file checking that it can be parsed correctly""" @@ -113,3 +119,15 @@ def test_real_filename_matching(self): for filename in filenames: self._create_file_handler(filename) + + def test_sensing_times(self, twv_handler): + """Test that we read the sensing start/end times correctly + from filename""" + + assert twv_handler.start_time == datetime(2007, 9, 12, 8, 43, 29) + assert twv_handler.end_time == datetime(2007, 9, 12, 8, 46, 0) + + def test_sensor_names(self, twv_handler): + """Test that the handler reports iasi_ng as sensor""" + + assert twv_handler.sensor_names == {"iasi_ng"} From 252808a9c4da4fa8c80d91fc7bc30066341b74be Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 08:39:46 +0200 Subject: [PATCH 06/41] Preparing initial support to read datasets in iasi_ng handler --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 7 +- satpy/readers/iasi_ng_l2_nc.py | 214 +++++++++++++++--- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 9 +- 3 files changed, 196 insertions(+), 34 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 3b48654c59..0dcb3ee2d7 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -13,5 +13,10 @@ file_types: file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler file_patterns: [ - "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", + "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] + datasets: + latitude: + location: data/geolocation_information/sounder_pixel_latitude + longitude: + location: data/geolocation_information/sounder_pixel_longitude diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 9342db632b..c20c6289f2 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -25,10 +25,9 @@ """ import logging -import logging -import numpy as np -import xarray as xr +import netCDF4 +import numpy as np from .netcdf_utils import NetCDF4FsspecFileHandler @@ -45,10 +44,15 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs): self.sensors = {"iasi_ng"} # List of datasets provided by this handler: - self.dataset_infos = [] + self.dataset_infos = None + + # Description of the available datasets: + self.ds_desc = self.filetype_info["datasets"] logger.info("Creating reader with infos: %s", filename_info) + self.register_available_datasets() + @property def start_time(self): """Get the start time.""" @@ -74,30 +78,182 @@ def available_datasets(self, configured_datasets=None): for is_avail, ds_info in configured_datasets or []: yield is_avail, ds_info - for ds_info in self.dataset_infos: + for _, ds_info in self.dataset_infos.items(): yield True, ds_info - # def get_dataset(self, data_id, ds_info): - # """Obtain dataset.""" - # ds = self[data_id["name"]] - # if "scan_lines" in ds.dims: - # ds = ds.rename(scan_lines="y") - # if "pixels" in ds.dims: - # ds = ds.rename(pixels="x") - # if "_FillValue" in ds.attrs and ds.dtype.kind == "f": - # with xr.set_options(keep_attrs=True): - # # have to inverse the logic due to https://github.com/pydata/xarray/issues/7581 - # return xr.where(ds != ds.attrs["_FillValue"], ds, np.nan) - # return ds - - # def available_datasets(self, configured_datasets=None): - # """Get available datasets based on what's in the file. - - # Returns all datasets in the root group. - # """ - # yield from super().available_datasets(configured_datasets) - # common = {"file_type": "iasi_l2_cdr_nc", "resolution": 12000} - # for key in self.file_content: - # if "/" in key: # not a dataset - # continue - # yield (True, {"name": key} | common | self[key].attrs) + def register_dataset(self, ds_name, desc): + """Register a simple dataset given its name and a desc dict""" + + if ds_name in self.dataset_infos: + raise ValueError(f"Dataset for {ds_name} already registered.") + + ds_infos = { + "name": ds_name, + "sensor": "iasi_ng", + "file_type": self.filetype_info["file_type"], + } + + ds_infos.update(desc) + + self.dataset_infos[ds_name] = ds_infos + + def register_available_datasets(self): + """Register the available dataset in the current product file""" + + if self.dataset_infos is not None: + # Datasets are already registered. + return + + # Otherwise, we need to perform the registration: + self.dataset_infos = {} + + for ds_name, desc in self.ds_desc.items(): + self.register_dataset(ds_name, desc) + + def get_dataset_infos(self, ds_name): + """Retrieve the dataset infos corresponding to one of the registered + datasets.""" + if ds_name not in self.dataset_infos: + raise KeyError(f"No dataset registered for {ds_name}") + + return self.dataset_infos[ds_name] + + def variable_path_exists(self, var_path): + """Check if a given variable path is available in the underlying + netCDF file. + + All we really need to do here is to access the file_content dictionary + and check if we have a variable under that var_path key. + """ + # but we ignore attributes: or sub properties: + if var_path.startswith("/attr") or var_path.endswith( + ("/dtype", "/shape", "/dimensions") + ): + return False + + # Check if the path is found: + if var_path in self.file_content: + # This is only a valid variable if it is not a netcdf group: + return not isinstance(self.file_content[var_path], netCDF4.Group) + + # Var path not in file_content: + return False + + def apply_fill_value(self, data_array, ds_info): + """Apply the rescaling transform on a given array.""" + + # Check if we should apply: + if ds_info.get("apply_fill_value", True) is not True: + return data_array + + attribs = data_array.attrs + + # Apply the min/max valid range: + if "valid_min" in attribs: + vmin = attribs["valid_min"] + data_array = data_array.where(data_array < vmin, np.float32(np.nan)) + + if "valid_max" in attribs: + vmax = attribs["valid_max"] + data_array = data_array.where(data_array > vmax, np.float32(np.nan)) + + if "valid_range" in attribs: + vrange = attribs["valid_range"] + data_array = data_array.where(data_array < vrange[0], np.float32(np.nan)) + data_array = data_array.where(data_array > vrange[1], np.float32(np.nan)) + + # Check the missing value: + missing_val = attribs.get("missing_value", None) + missing_val = attribs.get("_FillValue", missing_val) + + if missing_val is None: + return data_array + + return data_array.where(data_array != missing_val, np.float32(np.nan)) + + def apply_rescaling(self, data_array, ds_info): + """Apply the rescaling transform on a given array.""" + + # Here we should apply the rescaling except if it is explicitly + # requested not to rescale: + if ds_info.get("apply_rescaling", True) is not True: + return data_array + + # Check if we have the scaling elements: + attribs = data_array.attrs + if "scale_factor" in attribs or "add_offset" in attribs: + scale_factor = attribs.setdefault("scale_factor", 1) + add_offset = attribs.setdefault("add_offset", 0) + + data_array = (data_array * scale_factor) + add_offset + + # rescale the valid range accordingly + if "valid_range" in attribs: + attribs["valid_range"] = ( + attribs["valid_range"] * scale_factor + add_offset + ) + + data_array.attrs.update(attribs) + + return data_array + + def apply_reshaping(self, data_array, ds_info): + """Apply the reshaping transform on a given IASI-NG data array + + Those arrays may come as 3D array, in which case we collapse the + last 2 dimensions on a single axis (ie. the number of columns or "y") + + In the process, we also rename the first axis to "x" + """ + + if ds_info.get("apply_reshaping", True) is not True: + return data_array + + if data_array.dims[0] != "x": + data_array = data_array.rename({data_array.dims[0]: "x"}) + + if len(data_array.dims) > 2: + return data_array.stack(y=(data_array.dims[1:])) + + return data_array + + def get_transformed_dataset(self, ds_info): + """Retrieve a dataset with all transformations applied on it.""" + + # Extract location: + vname = ds_info["location"] + + if not self.variable_path_exists(vname): + raise ValueError(f"Invalid variable path: {vname}") + + # Read the raw variable data from file (this is an xr.DataArray): + arr = self[vname] + + # Apply the transformations: + arr = self.apply_fill_value(arr, ds_info) + arr = self.apply_rescaling(arr, ds_info) + arr = self.apply_reshaping(arr, ds_info) + + return arr + + def get_dataset(self, dataset_id, ds_info=None): + """Get a dataset.""" + + ds_name = dataset_id["name"] + + # In case this dataset name is not explicitly provided by this file + # handler then we should simply return None. + if ds_name not in self.dataset_infos: + return None + + # Retrieve default infos if missing: + if ds_info is None: + ds_info = self.get_dataset_infos(ds_name) + + ds_name = ds_info["name"] + + # Retrieve the transformed data array: + data_array = self.get_transformed_dataset(ds_info) + + # Return the resulting array: + return data_array diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 25c7a0fb27..131a455676 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -14,14 +14,15 @@ # along with satpy. If not, see . """Unit tests on the IASI NG L2 reader using the conventional mock constructed context.""" -import pytest import logging import os -from unittest import mock -from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler from datetime import datetime -from satpy.readers import load_reader +from unittest import mock +import pytest + +from satpy.readers import load_reader +from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler logger = logging.getLogger(__name__) From 3e7ec22142cfd3812249f38eaa11b4ab4685dc81 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 10:19:43 +0200 Subject: [PATCH 07/41] Fixing value filling process and updated unit tests --- satpy/readers/iasi_ng_l2_nc.py | 24 ++-- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 133 +++++++++++++++++- 2 files changed, 146 insertions(+), 11 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index c20c6289f2..7f2a72e1e2 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -146,21 +146,28 @@ def apply_fill_value(self, data_array, ds_info): if ds_info.get("apply_fill_value", True) is not True: return data_array + if data_array.dtype == np.int32: + data_array = data_array.astype(np.float32) + else: + raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}") + attribs = data_array.attrs + nan_val = np.float32(np.nan) + # Apply the min/max valid range: if "valid_min" in attribs: vmin = attribs["valid_min"] - data_array = data_array.where(data_array < vmin, np.float32(np.nan)) + data_array = data_array.where(data_array >= vmin, other=nan_val) if "valid_max" in attribs: vmax = attribs["valid_max"] - data_array = data_array.where(data_array > vmax, np.float32(np.nan)) + data_array = data_array.where(data_array <= vmax, other=nan_val) if "valid_range" in attribs: vrange = attribs["valid_range"] - data_array = data_array.where(data_array < vrange[0], np.float32(np.nan)) - data_array = data_array.where(data_array > vrange[1], np.float32(np.nan)) + data_array = data_array.where(data_array >= vrange[0], other=nan_val) + data_array = data_array.where(data_array <= vrange[1], other=nan_val) # Check the missing value: missing_val = attribs.get("missing_value", None) @@ -169,7 +176,7 @@ def apply_fill_value(self, data_array, ds_info): if missing_val is None: return data_array - return data_array.where(data_array != missing_val, np.float32(np.nan)) + return data_array.where(data_array != missing_val, nan_val) def apply_rescaling(self, data_array, ds_info): """Apply the rescaling transform on a given array.""" @@ -188,10 +195,9 @@ def apply_rescaling(self, data_array, ds_info): data_array = (data_array * scale_factor) + add_offset # rescale the valid range accordingly - if "valid_range" in attribs: - attribs["valid_range"] = ( - attribs["valid_range"] * scale_factor + add_offset - ) + for key in ["valid_range", "valid_min", "valid_max"]: + if key in attribs: + attribs[key] = attribs[key] * scale_factor + add_offset data_array.attrs.update(attribs) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 131a455676..e920c497f1 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -19,8 +19,12 @@ from datetime import datetime from unittest import mock +import dask.array as da +import numpy as np import pytest +import xarray as xr +from satpy import Scene from satpy.readers import load_reader from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler @@ -31,15 +35,84 @@ class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler): """Fake base class for IASI NG handler""" + chunks = (10, 10, 10) # Define the chunk size for dask array + + def add_rand_data(self, desc): + """ + Build a random DataArray from a given description, and add it as content. + """ + + # Create a lazy dask array with random int32 values + dtype = desc.get("data_type", "int32") + rand_min = desc.get("rand_min", 0) + rand_max = desc.get("rand_max", 100) + dims = desc["dims"] + key = desc["key"] + + shape = tuple(dims.values()) + + if dtype == "int32": + dask_array = da.random.randint( + rand_min, rand_max, size=shape, chunks=self.chunks, dtype="int32" + ) + else: + raise ValueError(f"Unsupported data type: {dtype}") + + # Wrap the dask array with xarray.DataArray + data_array = xr.DataArray(dask_array, dims=list(dims.keys())) + + data_array.attrs.update(desc.get("attribs", {})) + + self.content[key] = data_array + def get_test_content(self, _filename, _filename_info, _filetype_info): """Get the content of the test data. Here we generate the default content we want to provide depending on the provided filename infos. """ + n_lines = 10 + n_for = 14 + n_fov = 16 + def_dims = {"n_lines": n_lines, "n_for": n_for, "n_fov": n_fov} + + self.content = {} + + # Note: below we use the full range of int32 to generate the random + # values, we expect the handler to "fix" out of range values replacing + # them with NaNs. + self.add_rand_data( + { + "key": "data/geolocation_information/sounder_pixel_latitude", + "dims": def_dims, + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1800000000, + "valid_max": 1800000000, + "scale_factor": 5.0e-8, + "add_offset": 0.0, + }, + } + ) + self.add_rand_data( + { + "key": "data/geolocation_information/sounder_pixel_longitude", + "dims": def_dims, + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1843200000, + "valid_max": 1843200000, + "scale_factor": 9.765625e-8, + "add_offset": 0.0, + }, + } + ) - dset = {} - return dset + return self.content class TestIASINGL2NCReader: @@ -72,6 +145,12 @@ def twv_handler(self): filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc" return self._create_file_handler(filename) + @pytest.fixture() + def twv_scene(self): + """Create a simple (and fake) satpy scene on a TWV product""" + filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc" + return Scene(filenames=[filename], reader=self.reader_name) + def _create_file_handler(self, filename): """Create an handler for the given file checking that it can be parsed correctly""" @@ -132,3 +211,53 @@ def test_sensor_names(self, twv_handler): """Test that the handler reports iasi_ng as sensor""" assert twv_handler.sensor_names == {"iasi_ng"} + + def test_available_datasets(self, twv_scene): + """Test the list of available datasets in scene""" + + dnames = twv_scene.available_dataset_names() + + assert "latitude" in dnames + assert "longitude" in dnames + + def test_latitude_dataset(self, twv_scene): + """Test loading the latitude dataset""" + + twv_scene.load(["latitude"]) + dset = twv_scene["latitude"] + + # Should be 2D now: + assert len(dset.dims) == 2 + assert dset.dims[0] == "x" + assert dset.dims[1] == "y" + + # Should have been converted to float32: + assert dset.dtype == np.float32 + + # All valid values should be in range [-90.0,90.0] + vmin = np.nanmin(dset) + vmax = np.nanmax(dset) + + assert vmin >= -90.0 + assert vmax <= 90.0 + + def test_longitude_dataset(self, twv_scene): + """Test loading the longitude dataset""" + + twv_scene.load(["longitude"]) + dset = twv_scene["longitude"] + + # Should be 2D now: + assert len(dset.dims) == 2 + assert dset.dims[0] == "x" + assert dset.dims[1] == "y" + + # Should have been converted to float32: + assert dset.dtype == np.float32 + + # All valid values should be in range [-90.0,90.0] + vmin = np.nanmin(dset) + vmax = np.nanmax(dset) + + assert vmin >= -180.0 + assert vmax <= 180.0 From e4ea66dbb0a2cfd3eb8367fe9994e4b12886a9d4 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 11:18:09 +0200 Subject: [PATCH 08/41] Adding support for missing value handling in unit test --- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index e920c497f1..cfc6aedcd7 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -58,10 +58,39 @@ def add_rand_data(self, desc): else: raise ValueError(f"Unsupported data type: {dtype}") + attribs = desc.get("attribs", {}) + + if "missing_value" in attribs: + # Force setting a few elements to this missing value (but still lazily with + # dask map_blocks function) + missing_value = attribs["missing_value"] + + # Ratio of elements to replace with missing value: + missing_ratio = 0.05 + + def set_missing_values(block): + # Generate random indices to set as missing values within this block + block_shape = block.shape + + block_size = np.prod(block_shape) + block_num_to_replace = int(block_size * missing_ratio) + + # Generate unique random indices to set as missing values within this block + flat_indices = np.random.choice( + block_size, block_num_to_replace, replace=False + ) + unraveled_indices = np.unravel_index(flat_indices, block_shape) + block[unraveled_indices] = missing_value + + return block + + # Apply the function lazily to each block + dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype) + # Wrap the dask array with xarray.DataArray data_array = xr.DataArray(dask_array, dims=list(dims.keys())) - data_array.attrs.update(desc.get("attribs", {})) + data_array.attrs.update(attribs) self.content[key] = data_array @@ -93,6 +122,7 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): "valid_max": 1800000000, "scale_factor": 5.0e-8, "add_offset": 0.0, + "missing_value": -2147483648, }, } ) @@ -108,6 +138,7 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): "valid_max": 1843200000, "scale_factor": 9.765625e-8, "add_offset": 0.0, + "missing_value": -2147483648, }, } ) From e094de9a4e59d7ffe6d6c26974e7ca2043c86efc Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 11:40:06 +0200 Subject: [PATCH 09/41] Using float 64 for lat/lon datasets --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 2 ++ satpy/readers/iasi_ng_l2_nc.py | 11 ++++++++--- satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 8 ++++---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 0dcb3ee2d7..7c1dbc3f8a 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -18,5 +18,7 @@ file_types: datasets: latitude: location: data/geolocation_information/sounder_pixel_latitude + data_type: float64 longitude: location: data/geolocation_information/sounder_pixel_longitude + data_type: float64 diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 7f2a72e1e2..d2efd35712 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -146,14 +146,19 @@ def apply_fill_value(self, data_array, ds_info): if ds_info.get("apply_fill_value", True) is not True: return data_array + dtype = ds_info.get("data_type", "float32") + if data_array.dtype == np.int32: - data_array = data_array.astype(np.float32) + data_array = data_array.astype(dtype) else: raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}") - attribs = data_array.attrs + if dtype not in ["float32", "float64"]: + # We won't be able to use NaN in the other cases: + return data_array - nan_val = np.float32(np.nan) + attribs = data_array.attrs + nan_val = np.nan if dtype == "float64" else np.float32(np.nan) # Apply the min/max valid range: if "valid_min" in attribs: diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index cfc6aedcd7..cbd68506f3 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -262,8 +262,8 @@ def test_latitude_dataset(self, twv_scene): assert dset.dims[0] == "x" assert dset.dims[1] == "y" - # Should have been converted to float32: - assert dset.dtype == np.float32 + # Should have been converted to float64: + assert dset.dtype == np.float64 # All valid values should be in range [-90.0,90.0] vmin = np.nanmin(dset) @@ -283,8 +283,8 @@ def test_longitude_dataset(self, twv_scene): assert dset.dims[0] == "x" assert dset.dims[1] == "y" - # Should have been converted to float32: - assert dset.dtype == np.float32 + # Should have been converted to float64: + assert dset.dtype == np.float64 # All valid values should be in range [-90.0,90.0] vmin = np.nanmin(dset) From 51727f8e8ef0873aea391a0c3c107193274f5269 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 11:51:56 +0200 Subject: [PATCH 10/41] Adding dedicated dataset descs array --- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 71 ++++++++++--------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index cbd68506f3..5ace5a0b37 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -31,6 +31,37 @@ logger = logging.getLogger(__name__) +DATASET_DESCS = [ + { + "key": "data/geolocation_information/sounder_pixel_latitude", + "dims": ("n_lines", "n_for", "n_fov"), + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1800000000, + "valid_max": 1800000000, + "scale_factor": 5.0e-8, + "add_offset": 0.0, + "missing_value": -2147483648, + }, + }, + { + "key": "data/geolocation_information/sounder_pixel_longitude", + "dims": ("n_lines", "n_for", "n_fov"), + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1843200000, + "valid_max": 1843200000, + "scale_factor": 9.765625e-8, + "add_offset": 0.0, + "missing_value": -2147483648, + }, + }, +] + class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler): """Fake base class for IASI NG handler""" @@ -49,7 +80,7 @@ def add_rand_data(self, desc): dims = desc["dims"] key = desc["key"] - shape = tuple(dims.values()) + shape = [self.dims[k] for k in dims] if dtype == "int32": dask_array = da.random.randint( @@ -88,7 +119,7 @@ def set_missing_values(block): dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype) # Wrap the dask array with xarray.DataArray - data_array = xr.DataArray(dask_array, dims=list(dims.keys())) + data_array = xr.DataArray(dask_array, dims=dims) data_array.attrs.update(attribs) @@ -103,45 +134,15 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): n_lines = 10 n_for = 14 n_fov = 16 - def_dims = {"n_lines": n_lines, "n_for": n_for, "n_fov": n_fov} + self.dims = {"n_lines": n_lines, "n_for": n_for, "n_fov": n_fov} self.content = {} # Note: below we use the full range of int32 to generate the random # values, we expect the handler to "fix" out of range values replacing # them with NaNs. - self.add_rand_data( - { - "key": "data/geolocation_information/sounder_pixel_latitude", - "dims": def_dims, - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1800000000, - "valid_max": 1800000000, - "scale_factor": 5.0e-8, - "add_offset": 0.0, - "missing_value": -2147483648, - }, - } - ) - self.add_rand_data( - { - "key": "data/geolocation_information/sounder_pixel_longitude", - "dims": def_dims, - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1843200000, - "valid_max": 1843200000, - "scale_factor": 9.765625e-8, - "add_offset": 0.0, - "missing_value": -2147483648, - }, - } - ) + for desc in DATASET_DESCS: + self.add_rand_data(desc) return self.content From 2e57be929761a19a2d1c617c035b02a4924cf729 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 13:43:45 +0200 Subject: [PATCH 11/41] Adding support for datetime conversion --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 13 ++- satpy/readers/iasi_ng_l2_nc.py | 35 ++++++- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 99 ++++++++++++++++++- 3 files changed, 140 insertions(+), 7 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 7c1dbc3f8a..7422716baa 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -16,9 +16,18 @@ file_types: "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] datasets: + onboard_utc: + location: data/geolocation_information/onboard_utc + seconds_since_epoch: "2020-01-01" latitude: location: data/geolocation_information/sounder_pixel_latitude - data_type: float64 longitude: location: data/geolocation_information/sounder_pixel_longitude - data_type: float64 + azimuth: + location: data/geolocation_information/sounder_pixel_azimuth + zenith: + location: data/geolocation_information/sounder_pixel_zenith + sun_azimuth: + location: data/geolocation_information/sounder_pixel_sun_azimuth + sun_zenith: + location: data/geolocation_information/sounder_pixel_sun_zenith diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index d2efd35712..fd2319d6dc 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -26,8 +26,11 @@ import logging +import dask.array as da import netCDF4 import numpy as np +import pandas as pd +import xarray as xr from .netcdf_utils import NetCDF4FsspecFileHandler @@ -146,10 +149,13 @@ def apply_fill_value(self, data_array, ds_info): if ds_info.get("apply_fill_value", True) is not True: return data_array - dtype = ds_info.get("data_type", "float32") + dtype = ds_info.get("data_type", "float64") if data_array.dtype == np.int32: data_array = data_array.astype(dtype) + elif data_array.dtype == np.float64: + if dtype != "float64": + data_array = data_array.astype(dtype) else: raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}") @@ -220,11 +226,33 @@ def apply_reshaping(self, data_array, ds_info): if ds_info.get("apply_reshaping", True) is not True: return data_array + if len(data_array.dims) > 2: + data_array = data_array.stack(y=(data_array.dims[1:])) + if data_array.dims[0] != "x": data_array = data_array.rename({data_array.dims[0]: "x"}) - if len(data_array.dims) > 2: - return data_array.stack(y=(data_array.dims[1:])) + if data_array.dims[1] != "y": + data_array = data_array.rename({data_array.dims[1]: "y"}) + + return data_array + + def apply_to_datetime(self, data_array, ds_info): + """Convert the data to datetime values.""" + + if "seconds_since_epoch" not in ds_info: + return data_array + + epoch = ds_info["seconds_since_epoch"] + # Note: below could convert the resulting data to another type + # with .astype("datetime64[us]") for instance + data_array = xr.DataArray( + data=da.from_array( + pd.to_datetime(epoch) + data_array.astype("timedelta64[s]") + ), + dims=data_array.dims, + attrs=data_array.attrs, + ) return data_array @@ -244,6 +272,7 @@ def get_transformed_dataset(self, ds_info): arr = self.apply_fill_value(arr, ds_info) arr = self.apply_rescaling(arr, ds_info) arr = self.apply_reshaping(arr, ds_info) + arr = self.apply_to_datetime(arr, ds_info) return arr diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 5ace5a0b37..981f71f481 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -32,6 +32,18 @@ logger = logging.getLogger(__name__) DATASET_DESCS = [ + { + "key": "data/geolocation_information/onboard_utc", + "dims": ("n_lines", "n_for"), + "data_type": "float64", + "rand_min": -1100000000.0, + "rand_max": 1100000000.0, + "attribs": { + "valid_min": -1000000000.0, + "valid_max": 1000000000.0, + "missing_value": -9000000000.0, + }, + }, { "key": "data/geolocation_information/sounder_pixel_latitude", "dims": ("n_lines", "n_for", "n_fov"), @@ -60,13 +72,69 @@ "missing_value": -2147483648, }, }, + { + "key": "data/geolocation_information/sounder_pixel_sun_azimuth", + "dims": ("n_lines", "n_for", "n_fov"), + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1843200000, + "valid_max": 1843200000, + "scale_factor": 9.765625e-8, + "add_offset": 0.0, + "missing_value": -2147483648, + }, + }, + { + "key": "data/geolocation_information/sounder_pixel_sun_zenith", + "dims": ("n_lines", "n_for", "n_fov"), + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1800000000, + "valid_max": 1800000000, + "scale_factor": 5.0e-8, + "add_offset": 90.0, + "missing_value": -2147483648, + }, + }, + { + "key": "data/geolocation_information/sounder_pixel_zenith", + "dims": ("n_lines", "n_for", "n_fov"), + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1800000000, + "valid_max": 1800000000, + "scale_factor": 2.5e-8, + "add_offset": 45.0, + "missing_value": -2147483648, + }, + }, + { + "key": "data/geolocation_information/sounder_pixel_azimuth", + "dims": ("n_lines", "n_for", "n_fov"), + "data_type": "int32", + "rand_min": -2147483647, + "rand_max": 2147483647, + "attribs": { + "valid_min": -1843200000, + "valid_max": 1843200000, + "scale_factor": 9.765625e-8, + "add_offset": 0.0, + "missing_value": -2147483648, + }, + }, ] class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler): """Fake base class for IASI NG handler""" - chunks = (10, 10, 10) # Define the chunk size for dask array + chunks = (10, 10, 10) def add_rand_data(self, desc): """ @@ -82,10 +150,18 @@ def add_rand_data(self, desc): shape = [self.dims[k] for k in dims] + # Define the chunk size for dask array + chunks = [10] * len(dims) + if dtype == "int32": dask_array = da.random.randint( - rand_min, rand_max, size=shape, chunks=self.chunks, dtype="int32" + rand_min, rand_max, size=shape, chunks=chunks, dtype=np.int32 ) + elif dtype == "float64": + dask_array = da.random.random(shape, chunks=chunks) + + # Scale and shift to the desired range [min_val, max_val] + dask_array = dask_array * (rand_max - rand_min) + rand_min else: raise ValueError(f"Unsupported data type: {dtype}") @@ -249,8 +325,13 @@ def test_available_datasets(self, twv_scene): dnames = twv_scene.available_dataset_names() + assert "onboard_utc" in dnames assert "latitude" in dnames assert "longitude" in dnames + assert "azimuth" in dnames + assert "zenith" in dnames + assert "sun_azimuth" in dnames + assert "sun_zenith" in dnames def test_latitude_dataset(self, twv_scene): """Test loading the latitude dataset""" @@ -293,3 +374,17 @@ def test_longitude_dataset(self, twv_scene): assert vmin >= -180.0 assert vmax <= 180.0 + + def test_onboard_utc_dataset(self, twv_scene): + """Test loading the onboard_utc dataset""" + + twv_scene.load(["onboard_utc"]) + dset = twv_scene["onboard_utc"] + + # Should be 2D now: + assert len(dset.dims) == 2 + assert dset.dims[0] == "x" + assert dset.dims[1] == "y" + + # Should have been converted to datetime: + assert dset.dtype == np.dtype("datetime64[ns]") From d98c9bf6e8f4a2151335805ae4f85b70ebe834ed Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 14:57:15 +0200 Subject: [PATCH 12/41] Optimized data description in unit test --- satpy/readers/iasi_ng_l2_nc.py | 19 +- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 192 +++++++++--------- 2 files changed, 109 insertions(+), 102 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index fd2319d6dc..33825ae6d0 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -149,16 +149,27 @@ def apply_fill_value(self, data_array, ds_info): if ds_info.get("apply_fill_value", True) is not True: return data_array - dtype = ds_info.get("data_type", "float64") + dtype = ds_info.get("data_type", "auto") + convert = False if data_array.dtype == np.int32: - data_array = data_array.astype(dtype) + if dtype == "auto": + dtype = "float64" + convert = dtype != "int32" elif data_array.dtype == np.float64: - if dtype != "float64": - data_array = data_array.astype(dtype) + if dtype == "auto": + dtype = "float64" + convert = dtype != "float64" + elif data_array.dtype == np.float32: + if dtype == "auto": + dtype = "float32" + convert = dtype != "float32" else: raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}") + if convert: + data_array = data_array.astype(dtype) + if dtype not in ["float32", "float64"]: # We won't be able to use NaN in the other cases: return data_array diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 981f71f481..0c81d0df5a 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -31,101 +31,72 @@ logger = logging.getLogger(__name__) -DATASET_DESCS = [ - { - "key": "data/geolocation_information/onboard_utc", - "dims": ("n_lines", "n_for"), - "data_type": "float64", - "rand_min": -1100000000.0, - "rand_max": 1100000000.0, - "attribs": { - "valid_min": -1000000000.0, - "valid_max": 1000000000.0, - "missing_value": -9000000000.0, - }, - }, - { - "key": "data/geolocation_information/sounder_pixel_latitude", - "dims": ("n_lines", "n_for", "n_fov"), - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1800000000, - "valid_max": 1800000000, - "scale_factor": 5.0e-8, - "add_offset": 0.0, - "missing_value": -2147483648, - }, - }, - { - "key": "data/geolocation_information/sounder_pixel_longitude", - "dims": ("n_lines", "n_for", "n_fov"), - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1843200000, - "valid_max": 1843200000, - "scale_factor": 9.765625e-8, - "add_offset": 0.0, - "missing_value": -2147483648, - }, - }, - { - "key": "data/geolocation_information/sounder_pixel_sun_azimuth", - "dims": ("n_lines", "n_for", "n_fov"), - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1843200000, - "valid_max": 1843200000, - "scale_factor": 9.765625e-8, - "add_offset": 0.0, - "missing_value": -2147483648, - }, - }, - { - "key": "data/geolocation_information/sounder_pixel_sun_zenith", - "dims": ("n_lines", "n_for", "n_fov"), - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1800000000, - "valid_max": 1800000000, - "scale_factor": 5.0e-8, - "add_offset": 90.0, - "missing_value": -2147483648, - }, - }, +d_lff = ("n_lines", "n_for", "n_fov") + +DATA_DESC = [ { - "key": "data/geolocation_information/sounder_pixel_zenith", - "dims": ("n_lines", "n_for", "n_fov"), - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1800000000, - "valid_max": 1800000000, - "scale_factor": 2.5e-8, - "add_offset": 45.0, - "missing_value": -2147483648, + "group": "data/geolocation_information", + "attrs": [ + "unit", + "valid_min", + "valid_max", + "scale_factor", + "add_offset", + "missing_value", + ], + "variables": { + "onboard_utc": [ + ("n_lines", "n_for"), + "float64", + ("seconds", -1e9, 1e9, None, None, -9e9), + ], + "sounder_pixel_latitude": [ + d_lff, + "int32", + ("degrees_north", -18e8, 18e8, 5.0e-8, 0.0, -2147483648), + ], + "sounder_pixel_longitude": [ + d_lff, + "int32", + ("degrees_east", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648), + ], + "sounder_pixel_sun_azimuth": [ + d_lff, + "int32", + ("degrees", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648), + ], + "sounder_pixel_sun_zenith": [ + d_lff, + "int32", + ("degrees", -18e8, 18e8, 5.0e-8, 90.0, -2147483648), + ], + "sounder_pixel_azimuth": [ + d_lff, + "int32", + ("degrees", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648), + ], + "sounder_pixel_zenith": [ + d_lff, + "int32", + ("degrees", -18e8, 18e8, 2.5e-8, 45.0, -2147483648), + ], }, }, { - "key": "data/geolocation_information/sounder_pixel_azimuth", - "dims": ("n_lines", "n_for", "n_fov"), - "data_type": "int32", - "rand_min": -2147483647, - "rand_max": 2147483647, - "attribs": { - "valid_min": -1843200000, - "valid_max": 1843200000, - "scale_factor": 9.765625e-8, - "add_offset": 0.0, - "missing_value": -2147483648, + "group": "data/statistical_retrieval", + "attrs": ["unit", "valid_min", "valid_max", "missing_value"], + "variables": { + "air_temperature": [d_lff, "float32", ("K", 100.0, 400.0, 3.4e38)], + "atmosphere_mass_content_of_water": [ + d_lff, + "float32", + ("kg.m-2", 0, 300, 3.4e38), + ], + "qi_air_temperature": [d_lff, "float32", ("", 0, 25, 3.4e38)], + "qi_specific_humidity": [d_lff, "float32", ("", 0, 25, 3.4e38)], + "specific_humidity": [d_lff, "float32", ("kg/kg", 0, 1, 3.4e38)], + "surface_air_temperature": [d_lff, "float32", ("K", 100, 400, 3.4e38)], + "surface_specific_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)], }, }, ] @@ -143,8 +114,6 @@ def add_rand_data(self, desc): # Create a lazy dask array with random int32 values dtype = desc.get("data_type", "int32") - rand_min = desc.get("rand_min", 0) - rand_max = desc.get("rand_max", 100) dims = desc["dims"] key = desc["key"] @@ -152,21 +121,30 @@ def add_rand_data(self, desc): # Define the chunk size for dask array chunks = [10] * len(dims) + attribs = desc["attribs"] if dtype == "int32": + rand_min = -2147483647 + rand_max = 2147483647 dask_array = da.random.randint( rand_min, rand_max, size=shape, chunks=chunks, dtype=np.int32 ) - elif dtype == "float64": + elif dtype == "float64" or dtype == "float32": dask_array = da.random.random(shape, chunks=chunks) + if dtype == "float32": + dask_array = dask_array.astype(np.float32) + + vmin = attribs["valid_min"] + vmax = attribs["valid_max"] + rand_min = vmin - (vmax - vmin) * 0.1 + rand_max = vmax + (vmax - vmin) * 0.1 + # Scale and shift to the desired range [min_val, max_val] dask_array = dask_array * (rand_max - rand_min) + rand_min else: raise ValueError(f"Unsupported data type: {dtype}") - attribs = desc.get("attribs", {}) - if "missing_value" in attribs: # Force setting a few elements to this missing value (but still lazily with # dask map_blocks function) @@ -217,8 +195,26 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): # Note: below we use the full range of int32 to generate the random # values, we expect the handler to "fix" out of range values replacing # them with NaNs. - for desc in DATASET_DESCS: - self.add_rand_data(desc) + for grp_desc in DATA_DESC: + # get the group prefix: + prefix = grp_desc["group"] + anames = grp_desc["attrs"] + for vname, vdesc in grp_desc["variables"].items(): + # For each variable we create a dataset descriptor: + attribs = {} + for idx, val in enumerate(vdesc[2]): + # AEnsure we do not inject "None" attribute values: + if val is not None: + attribs[anames[idx]] = val + + desc = { + "key": f"{prefix}/{vname}", + "dims": vdesc[0], + "data_type": vdesc[1], + "attribs": attribs, + } + + self.add_rand_data(desc) return self.content From 7f648d82f9b74f34e79960ba976267770a33be3c Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 15:44:46 +0200 Subject: [PATCH 13/41] Adding support for dataset renaming --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 36 ++++++++++++++++------------ satpy/readers/iasi_ng_l2_nc.py | 19 +++++++++++++-- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 7422716baa..2a4193f58c 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -15,19 +15,25 @@ file_types: [ "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] + + onboard_utc_epoch: "2020-01-01" + datasets: - onboard_utc: - location: data/geolocation_information/onboard_utc - seconds_since_epoch: "2020-01-01" - latitude: - location: data/geolocation_information/sounder_pixel_latitude - longitude: - location: data/geolocation_information/sounder_pixel_longitude - azimuth: - location: data/geolocation_information/sounder_pixel_azimuth - zenith: - location: data/geolocation_information/sounder_pixel_zenith - sun_azimuth: - location: data/geolocation_information/sounder_pixel_sun_azimuth - sun_zenith: - location: data/geolocation_information/sounder_pixel_sun_zenith + - group: data/geolocation_information + variables: + - onboard_utc + - sounder_pixel_latitude:latitude + - sounder_pixel_longitude:longitude + - sounder_pixel_azimuth:azimuth + - sounder_pixel_zenith:zenith + - sounder_pixel_sun_azimuth:sun_azimuth + - sounder_pixel_sun_zenith:sun_zenith + - group: data/statistical_retrieval + variables: + - air_temperature + - atmosphere_mass_content_of_water + - qi_air_temperature + - qi_specific_humidity + - specific_humidity + - surface_air_temperature + - surface_specific_humidity diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 33825ae6d0..f3fdf9c50a 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -110,8 +110,23 @@ def register_available_datasets(self): # Otherwise, we need to perform the registration: self.dataset_infos = {} - for ds_name, desc in self.ds_desc.items(): - self.register_dataset(ds_name, desc) + for grp_desc in self.ds_desc: + prefix = grp_desc["group"] + for vname in grp_desc["variables"]: + # Check if we have an alias for this variable: + ds_name = vname + if ":" in vname: + vname, ds_name = vname.split(":") + + desc = {"location": f"{prefix}/{vname}"} + + if ds_name == "onboard_utc": + # add the seconds_since_epoch: + desc["seconds_since_epoch"] = self.filetype_info["onboard_utc_epoch"] + + # print(f"Registering {ds_name} with desc: {desc}") + + self.register_dataset(ds_name, desc) def get_dataset_infos(self, ds_name): """Retrieve the dataset infos corresponding to one of the registered From 2eb0311be24e16998e1abb32d64ed2b9df5367d0 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 15:46:52 +0200 Subject: [PATCH 14/41] Replacing dataset alais symbol in yaml file --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 12 ++++++------ satpy/readers/iasi_ng_l2_nc.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 2a4193f58c..300d3c0967 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -22,12 +22,12 @@ file_types: - group: data/geolocation_information variables: - onboard_utc - - sounder_pixel_latitude:latitude - - sounder_pixel_longitude:longitude - - sounder_pixel_azimuth:azimuth - - sounder_pixel_zenith:zenith - - sounder_pixel_sun_azimuth:sun_azimuth - - sounder_pixel_sun_zenith:sun_zenith + - sounder_pixel_latitude => latitude + - sounder_pixel_longitude => longitude + - sounder_pixel_azimuth => azimuth + - sounder_pixel_zenith => zenith + - sounder_pixel_sun_azimuth => sun_azimuth + - sounder_pixel_sun_zenith => sun_zenith - group: data/statistical_retrieval variables: - air_temperature diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index f3fdf9c50a..f64d4c5170 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -115,8 +115,8 @@ def register_available_datasets(self): for vname in grp_desc["variables"]: # Check if we have an alias for this variable: ds_name = vname - if ":" in vname: - vname, ds_name = vname.split(":") + if "=>" in vname: + vname, ds_name = (v.strip() for v in vname.split("=>")) desc = {"location": f"{prefix}/{vname}"} From 86dd68f2cd76c8d8e906b0d1d56a8d80b097f1ac Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 15:49:28 +0200 Subject: [PATCH 15/41] Updating unit test on available datasets --- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 0c81d0df5a..c09245c903 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -321,13 +321,25 @@ def test_available_datasets(self, twv_scene): dnames = twv_scene.available_dataset_names() - assert "onboard_utc" in dnames - assert "latitude" in dnames - assert "longitude" in dnames - assert "azimuth" in dnames - assert "zenith" in dnames - assert "sun_azimuth" in dnames - assert "sun_zenith" in dnames + expected_names = [ + "onboard_utc", + "latitude", + "longitude", + "azimuth", + "zenith", + "sun_azimuth", + "sun_zenith", + "air_temperature", + "atmosphere_mass_content_of_water", + "qi_air_temperature", + "qi_specific_humidity", + "specific_humidity", + "surface_air_temperature", + "surface_specific_humidity", + ] + + for dname in expected_names: + assert dname in dnames def test_latitude_dataset(self, twv_scene): """Test loading the latitude dataset""" From 34e03edbfcd094ca4a973134cae8f92fa506fb3c Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 16:08:52 +0200 Subject: [PATCH 16/41] Adding diagnostic variables --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 6 +++++ satpy/readers/iasi_ng_l2_nc.py | 9 +++++-- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 26 +++++++++++++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 300d3c0967..db8e3e9fb0 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -19,6 +19,12 @@ file_types: onboard_utc_epoch: "2020-01-01" datasets: + - group: data/diagnostics + variables: + - fg_cost + - nbr_iterations + - rt_cost_x + - rt_cost_y - group: data/geolocation_information variables: - onboard_utc diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index f64d4c5170..1b93bea2d6 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -166,10 +166,16 @@ def apply_fill_value(self, data_array, ds_info): dtype = ds_info.get("data_type", "auto") convert = False + attribs = data_array.attrs if data_array.dtype == np.int32: if dtype == "auto": - dtype = "float64" + # Only convert to float if we have the scale factor or add_offset: + dtype = ( + "float64" + if ("scale_factor" in attribs or "add_offset" in attribs) + else "int32" + ) convert = dtype != "int32" elif data_array.dtype == np.float64: if dtype == "auto": @@ -189,7 +195,6 @@ def apply_fill_value(self, data_array, ds_info): # We won't be able to use NaN in the other cases: return data_array - attribs = data_array.attrs nan_val = np.nan if dtype == "float64" else np.float32(np.nan) # Apply the min/max valid range: diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index c09245c903..96450366fe 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -34,6 +34,16 @@ d_lff = ("n_lines", "n_for", "n_fov") DATA_DESC = [ + { + "group": "data/diagnostics", + "attrs": ["missing_value"], + "variables": { + "fg_cost": [d_lff, "float32", (3.4e38,)], + "nbr_iterations": [d_lff, "int32", (2147483647,)], + "rt_cost_x": [d_lff, "float32", (3.4e38,)], + "rt_cost_y": [d_lff, "float32", (3.4e38,)], + }, + }, { "group": "data/geolocation_information", "attrs": [ @@ -135,8 +145,10 @@ def add_rand_data(self, desc): if dtype == "float32": dask_array = dask_array.astype(np.float32) - vmin = attribs["valid_min"] - vmax = attribs["valid_max"] + # Max flaot32 value is 3.4028235e38, we use a smaller value + # by default here: + vmin = attribs.get("valid_min", -1e16) + vmax = attribs.get("valid_max", 1e16) rand_min = vmin - (vmax - vmin) * 0.1 rand_max = vmax + (vmax - vmin) * 0.1 @@ -396,3 +408,13 @@ def test_onboard_utc_dataset(self, twv_scene): # Should have been converted to datetime: assert dset.dtype == np.dtype("datetime64[ns]") + + def test_nbr_iterations_dataset(self, twv_scene): + """Test loading the nbr_iterations dataset""" + + twv_scene.load(["nbr_iterations"]) + dset = twv_scene["nbr_iterations"] + + assert len(dset.dims) == 2 + # Should still be in int32 type: + assert dset.dtype == np.int32 From b80f68cca05783f4c2fc94db32eb0052217b0030 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 16:12:35 +0200 Subject: [PATCH 17/41] Adding surface info datasets --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 6 ++++++ satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index db8e3e9fb0..2ff74e8ffc 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -43,3 +43,9 @@ file_types: - specific_humidity - surface_air_temperature - surface_specific_humidity + - group: data/surface_info + variables: + - height + - height_std + - ice_fraction + - land_fraction diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 96450366fe..5a11c1913b 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -109,6 +109,16 @@ "surface_specific_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)], }, }, + { + "group": "data/surface_info", + "attrs": ["unit", "valid_min", "valid_max", "missing_value"], + "variables": { + "height": [d_lff, "float32", ("m", -418, 8848, 3.4e38)], + "height_std": [d_lff, "float32", ("m", 0, 999.0, 3.4e38)], + "ice_fraction": [d_lff, "float32", ("%", 0, 100.0, 3.4e38)], + "land_fraction": [d_lff, "float32", ("%", 0, 100.0, 3.4e38)], + }, + }, ] From aa2988a7058b043ae9fa2ccb23a76630deb3f0a8 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 21:42:23 +0200 Subject: [PATCH 18/41] Adding support to broadcast onboard_utc variable --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 1 + satpy/readers/iasi_ng_l2_nc.py | 6 ++++++ satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 7 ++++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 2ff74e8ffc..caf9749624 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -17,6 +17,7 @@ file_types: ] onboard_utc_epoch: "2020-01-01" + onboard_utc_broadcast: true datasets: - group: data/diagnostics diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 1b93bea2d6..5a6d921290 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -285,6 +285,12 @@ def apply_to_datetime(self, data_array, ds_info): attrs=data_array.attrs, ) + # print(f"file_content keys: {self.file_content.keys()}") + lat_shape = self["data/geolocation_information/sounder_pixel_latitude"].shape + + # Apply "repeat" with the last dimension size: + data_array = xr.concat([data_array] * lat_shape[2], dim=data_array.dims[-1]) + return data_array def get_transformed_dataset(self, ds_info): diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 5a11c1913b..b28cbd49c4 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -408,7 +408,7 @@ def test_longitude_dataset(self, twv_scene): def test_onboard_utc_dataset(self, twv_scene): """Test loading the onboard_utc dataset""" - twv_scene.load(["onboard_utc"]) + twv_scene.load(["onboard_utc", "latitude"]) dset = twv_scene["onboard_utc"] # Should be 2D now: @@ -419,6 +419,11 @@ def test_onboard_utc_dataset(self, twv_scene): # Should have been converted to datetime: assert dset.dtype == np.dtype("datetime64[ns]") + # Should also have the same size as "lattitude" for instance: + lat = twv_scene["latitude"] + + assert lat.shape == dset.shape + def test_nbr_iterations_dataset(self, twv_scene): """Test loading the nbr_iterations dataset""" From 9c2f3ecd376d612f435674b5a67e8fe35683c45b Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 8 Aug 2024 23:25:28 +0200 Subject: [PATCH 19/41] fixing warning in unit test --- satpy/readers/iasi_ng_l2_nc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 5a6d921290..c013a5d40b 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -278,9 +278,7 @@ def apply_to_datetime(self, data_array, ds_info): # Note: below could convert the resulting data to another type # with .astype("datetime64[us]") for instance data_array = xr.DataArray( - data=da.from_array( - pd.to_datetime(epoch) + data_array.astype("timedelta64[s]") - ), + data=pd.to_datetime(epoch) + data_array.astype("timedelta64[s]"), dims=data_array.dims, attrs=data_array.attrs, ) From b113750c5b729532732c1bc971877bfdde215b92 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 09:58:22 +0200 Subject: [PATCH 20/41] Optimized variables discovery process. --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 36 +----- satpy/readers/iasi_ng_l2_nc.py | 119 +++++++++++++++--- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 44 ++++--- 3 files changed, 130 insertions(+), 69 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index caf9749624..59ef9a8227 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -16,37 +16,7 @@ file_types: "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] - onboard_utc_epoch: "2020-01-01" - onboard_utc_broadcast: true + ignored_patterns: + - /optimal_estimation/ # cf. eum respase2 task2 issue #4 - datasets: - - group: data/diagnostics - variables: - - fg_cost - - nbr_iterations - - rt_cost_x - - rt_cost_y - - group: data/geolocation_information - variables: - - onboard_utc - - sounder_pixel_latitude => latitude - - sounder_pixel_longitude => longitude - - sounder_pixel_azimuth => azimuth - - sounder_pixel_zenith => zenith - - sounder_pixel_sun_azimuth => sun_azimuth - - sounder_pixel_sun_zenith => sun_zenith - - group: data/statistical_retrieval - variables: - - air_temperature - - atmosphere_mass_content_of_water - - qi_air_temperature - - qi_specific_humidity - - specific_humidity - - surface_air_temperature - - surface_specific_humidity - - group: data/surface_info - variables: - - height - - height_std - - ice_fraction - - land_fraction + onboard_utc_broadcast: true diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index c013a5d40b..950e16e422 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -25,8 +25,8 @@ """ import logging +import re -import dask.array as da import netCDF4 import numpy as np import pandas as pd @@ -49,11 +49,18 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs): # List of datasets provided by this handler: self.dataset_infos = None - # Description of the available datasets: - self.ds_desc = self.filetype_info["datasets"] + # Description of the available variables: + self.variable_desc = {} - logger.info("Creating reader with infos: %s", filename_info) + # Description of the available dimensions: + self.dimensions_desc = {} + # Ignored variable patterns: + patterns = self.filetype_info.get("ignored_patterns", []) + self.ignored_patterns = [re.compile(pstr) for pstr in patterns] + + # dataset aliases: + self.dataset_aliases = self.filetype_info.get("dataset_aliases", {}) self.register_available_datasets() @property @@ -71,6 +78,16 @@ def sensor_names(self): """List of sensors represented in this file.""" return self.sensors + # Note: patching the collect_groups_info method below to + # also collect dimensions in sub groups. + def _collect_groups_info(self, base_name, obj): + for group_name, group_obj in obj.groups.items(): + full_group_name = base_name + group_name + self.file_content[full_group_name] = group_obj + self._collect_attrs(full_group_name, group_obj) + self.collect_metadata(full_group_name, group_obj) + self.collect_dimensions(full_group_name, group_obj) + def available_datasets(self, configured_datasets=None): """Determine automatically the datasets provided by this file. @@ -100,6 +117,72 @@ def register_dataset(self, ds_name, desc): self.dataset_infos[ds_name] = ds_infos + def parse_file_content(self): + """Parse the file_content to discover the available datasets and dimensions""" + + for key, val in self.file_content.items(): + # print(f"Found key: {key}") + + if "/dimension/" in key: + dim_name = key.split("/")[-1] + # print(f"Found dimension: {dim_name}={val}") + if ( + dim_name in self.dimensions_desc + and self.dimensions_desc[dim_name] != val + ): + # This might happen if we have the same dim name from different groups: + raise ValueError(f"Detected duplicated dim name: {dim_name}") + + self.dimensions_desc[dim_name] = val + continue + + if "/attr/" in key: + var_path, aname = key.split("/attr/") + # print(f"Found attrib for: {var_path}: {aname}") + + if var_path not in self.variable_desc: + # maybe this variable is ignored, or this is a group attr. + continue + + self.variable_desc[var_path]["attribs"][aname] = val + continue + + # if isinstance(val, netCDF4.Variable): + if f"{key}/shape" in self.file_content: + # print(f"Found variable: {key}") + + shape = self.file_content[f"{key}/shape"] + if np.prod(shape) <= 1: + # print(f"Ignoring scalar variable {key}") + continue + + # Check if this variable should be ignored: + if any(p.search(key) is not None for p in self.ignored_patterns): + # print(f"Ignoring variable {key}") + continue + + # Prepare a description for this variable: + prefix, var_name = key.rsplit("/", 1) + dims = self.file_content[f"{key}/dimensions"] + dtype = self.file_content[f"{key}/dtype"] + + desc = { + "location": key, + "prefix": prefix, + "var_name": var_name, + "shape": shape, + "dtype": f"{dtype}", + "dims": dims, + "attribs": {}, + } + + self.variable_desc[key] = desc + continue + + print(f"Found {len(self.variable_desc)} variables:") + for vpath, desc in self.variable_desc.items(): + print(f"{vpath}: {desc}") + def register_available_datasets(self): """Register the available dataset in the current product file""" @@ -110,23 +193,25 @@ def register_available_datasets(self): # Otherwise, we need to perform the registration: self.dataset_infos = {} - for grp_desc in self.ds_desc: - prefix = grp_desc["group"] - for vname in grp_desc["variables"]: - # Check if we have an alias for this variable: - ds_name = vname - if "=>" in vname: - vname, ds_name = (v.strip() for v in vname.split("=>")) + # Parse the file content: + self.parse_file_content() - desc = {"location": f"{prefix}/{vname}"} + for vpath, desc in self.variable_desc.items(): + # Check if we have an alias for this variable: + ds_name = desc["var_name"] + if vpath in self.dataset_aliases: + ds_name = self.dataset_aliases[vpath] - if ds_name == "onboard_utc": - # add the seconds_since_epoch: - desc["seconds_since_epoch"] = self.filetype_info["onboard_utc_epoch"] + unit = desc["attribs"].get("units", None) - # print(f"Registering {ds_name} with desc: {desc}") + if unit is not None and unit.startswith("seconds since "): + # request conversion to datetime: + desc["seconds_since_epoch"] = unit.replace("seconds since ", "") + print( + f"Converting {ds_name} in secs from epoch {desc['seconds_since_epoch']}" + ) - self.register_dataset(ds_name, desc) + self.register_dataset(ds_name, desc) def get_dataset_infos(self, ds_name): """Retrieve the dataset infos corresponding to one of the registered diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index b28cbd49c4..16dbaa0c23 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -47,7 +47,7 @@ { "group": "data/geolocation_information", "attrs": [ - "unit", + "units", "valid_min", "valid_max", "scale_factor", @@ -58,7 +58,7 @@ "onboard_utc": [ ("n_lines", "n_for"), "float64", - ("seconds", -1e9, 1e9, None, None, -9e9), + ("seconds since 2020-01-01 00:00:00.000", -1e9, 1e9, None, None, -9e9), ], "sounder_pixel_latitude": [ d_lff, @@ -94,7 +94,7 @@ }, { "group": "data/statistical_retrieval", - "attrs": ["unit", "valid_min", "valid_max", "missing_value"], + "attrs": ["units", "valid_min", "valid_max", "missing_value"], "variables": { "air_temperature": [d_lff, "float32", ("K", 100.0, 400.0, 3.4e38)], "atmosphere_mass_content_of_water": [ @@ -103,15 +103,15 @@ ("kg.m-2", 0, 300, 3.4e38), ], "qi_air_temperature": [d_lff, "float32", ("", 0, 25, 3.4e38)], - "qi_specific_humidity": [d_lff, "float32", ("", 0, 25, 3.4e38)], - "specific_humidity": [d_lff, "float32", ("kg/kg", 0, 1, 3.4e38)], + "qi_specific_humidity": [d_lff, "float32", ("", 0, 25, 3.4e38)], + "specific_humidity": [d_lff, "float32", ("kg/kg", 0, 1, 3.4e38)], "surface_air_temperature": [d_lff, "float32", ("K", 100, 400, 3.4e38)], - "surface_specific_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)], + "surface_specific_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)], }, }, { "group": "data/surface_info", - "attrs": ["unit", "valid_min", "valid_max", "missing_value"], + "attrs": ["units", "valid_min", "valid_max", "missing_value"], "variables": { "height": [d_lff, "float32", ("m", -418, 8848, 3.4e38)], "height_std": [d_lff, "float32", ("m", 0, 999.0, 3.4e38)], @@ -200,6 +200,12 @@ def set_missing_values(block): data_array.attrs.update(attribs) self.content[key] = data_array + self.content[key + "/shape"] = data_array.shape + self.content[key + "/dtype"] = dask_array.dtype + self.content[key + "/dimensions"] = data_array.dims + + for aname, val in attribs.items(): + self.content[key + "/attr/" + aname] = val def get_test_content(self, _filename, _filename_info, _filetype_info): """Get the content of the test data. @@ -345,12 +351,12 @@ def test_available_datasets(self, twv_scene): expected_names = [ "onboard_utc", - "latitude", - "longitude", - "azimuth", - "zenith", - "sun_azimuth", - "sun_zenith", + "sounder_pixel_latitude", + "sounder_pixel_longitude", + "sounder_pixel_azimuth", + "sounder_pixel_zenith", + "sounder_pixel_sun_azimuth", + "sounder_pixel_sun_zenith", "air_temperature", "atmosphere_mass_content_of_water", "qi_air_temperature", @@ -366,8 +372,8 @@ def test_available_datasets(self, twv_scene): def test_latitude_dataset(self, twv_scene): """Test loading the latitude dataset""" - twv_scene.load(["latitude"]) - dset = twv_scene["latitude"] + twv_scene.load(["sounder_pixel_latitude"]) + dset = twv_scene["sounder_pixel_latitude"] # Should be 2D now: assert len(dset.dims) == 2 @@ -387,8 +393,8 @@ def test_latitude_dataset(self, twv_scene): def test_longitude_dataset(self, twv_scene): """Test loading the longitude dataset""" - twv_scene.load(["longitude"]) - dset = twv_scene["longitude"] + twv_scene.load(["sounder_pixel_longitude"]) + dset = twv_scene["sounder_pixel_longitude"] # Should be 2D now: assert len(dset.dims) == 2 @@ -408,7 +414,7 @@ def test_longitude_dataset(self, twv_scene): def test_onboard_utc_dataset(self, twv_scene): """Test loading the onboard_utc dataset""" - twv_scene.load(["onboard_utc", "latitude"]) + twv_scene.load(["onboard_utc", "sounder_pixel_latitude"]) dset = twv_scene["onboard_utc"] # Should be 2D now: @@ -420,7 +426,7 @@ def test_onboard_utc_dataset(self, twv_scene): assert dset.dtype == np.dtype("datetime64[ns]") # Should also have the same size as "lattitude" for instance: - lat = twv_scene["latitude"] + lat = twv_scene["sounder_pixel_latitude"] assert lat.shape == dset.shape From becb81c9f9d63822fe5cb2501cd63331ae371d22 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 10:32:18 +0200 Subject: [PATCH 21/41] Updated onboard_utc broadcast process. --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 4 +- satpy/readers/iasi_ng_l2_nc.py | 72 ++++++++++--------- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 4 ++ 3 files changed, 44 insertions(+), 36 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 59ef9a8227..0c13795cdd 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -19,4 +19,6 @@ file_types: ignored_patterns: - /optimal_estimation/ # cf. eum respase2 task2 issue #4 - onboard_utc_broadcast: true + # Define if we should broadcast the timestamps from + # dims (n_lines, n_for) to (n_lines, n_for, n_fov): + broadcast_timestamps: true diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 950e16e422..f25b51a966 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -61,6 +61,10 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs): # dataset aliases: self.dataset_aliases = self.filetype_info.get("dataset_aliases", {}) + + # broadcasts timestamps flag: + self.broadcast_timestamps = self.filetype_info.get("broadcast_timestamps", False) + self.register_available_datasets() @property @@ -179,9 +183,9 @@ def parse_file_content(self): self.variable_desc[key] = desc continue - print(f"Found {len(self.variable_desc)} variables:") - for vpath, desc in self.variable_desc.items(): - print(f"{vpath}: {desc}") + # print(f"Found {len(self.variable_desc)} variables:") + # for vpath, desc in self.variable_desc.items(): + # print(f"{vpath}: {desc}") def register_available_datasets(self): """Register the available dataset in the current product file""" @@ -203,13 +207,13 @@ def register_available_datasets(self): ds_name = self.dataset_aliases[vpath] unit = desc["attribs"].get("units", None) - if unit is not None and unit.startswith("seconds since "): # request conversion to datetime: desc["seconds_since_epoch"] = unit.replace("seconds since ", "") - print( - f"Converting {ds_name} in secs from epoch {desc['seconds_since_epoch']}" - ) + + if self.broadcast_timestamps and desc["var_name"] == "onboard_utc": + # Broadcast on the "n_fov" dimension: + desc["broadcast_on_dim"] = "n_fov" self.register_dataset(ds_name, desc) @@ -245,11 +249,7 @@ def variable_path_exists(self, var_path): def apply_fill_value(self, data_array, ds_info): """Apply the rescaling transform on a given array.""" - # Check if we should apply: - if ds_info.get("apply_fill_value", True) is not True: - return data_array - - dtype = ds_info.get("data_type", "auto") + dtype = ds_info.get("target_data_type", "auto") convert = False attribs = data_array.attrs @@ -271,7 +271,7 @@ def apply_fill_value(self, data_array, ds_info): dtype = "float32" convert = dtype != "float32" else: - raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}") + raise ValueError(f"Unexpected raw array data type: {data_array.dtype}") if convert: data_array = data_array.astype(dtype) @@ -303,16 +303,11 @@ def apply_fill_value(self, data_array, ds_info): if missing_val is None: return data_array - return data_array.where(data_array != missing_val, nan_val) + return data_array.where(data_array != missing_val, other=nan_val) - def apply_rescaling(self, data_array, ds_info): + def apply_rescaling(self, data_array): """Apply the rescaling transform on a given array.""" - # Here we should apply the rescaling except if it is explicitly - # requested not to rescale: - if ds_info.get("apply_rescaling", True) is not True: - return data_array - # Check if we have the scaling elements: attribs = data_array.attrs if "scale_factor" in attribs or "add_offset" in attribs: @@ -330,7 +325,7 @@ def apply_rescaling(self, data_array, ds_info): return data_array - def apply_reshaping(self, data_array, ds_info): + def apply_reshaping(self, data_array): """Apply the reshaping transform on a given IASI-NG data array Those arrays may come as 3D array, in which case we collapse the @@ -339,9 +334,6 @@ def apply_reshaping(self, data_array, ds_info): In the process, we also rename the first axis to "x" """ - if ds_info.get("apply_reshaping", True) is not True: - return data_array - if len(data_array.dims) > 2: data_array = data_array.stack(y=(data_array.dims[1:])) @@ -353,13 +345,11 @@ def apply_reshaping(self, data_array, ds_info): return data_array - def apply_to_datetime(self, data_array, ds_info): + def convert_to_datetime(self, data_array, ds_info): """Convert the data to datetime values.""" - if "seconds_since_epoch" not in ds_info: - return data_array - epoch = ds_info["seconds_since_epoch"] + # Note: below could convert the resulting data to another type # with .astype("datetime64[us]") for instance data_array = xr.DataArray( @@ -368,11 +358,18 @@ def apply_to_datetime(self, data_array, ds_info): attrs=data_array.attrs, ) - # print(f"file_content keys: {self.file_content.keys()}") - lat_shape = self["data/geolocation_information/sounder_pixel_latitude"].shape + return data_array + + def apply_broadcast(self, data_array, ds_info): + """Apply the broadcast of the data array""" + + dim_name = ds_info["broadcast_on_dim"] + if dim_name not in self.dimensions_desc: + raise ValueError(f"Invalid dimension name {dim_name}") + rep_count = self.dimensions_desc[dim_name] - # Apply "repeat" with the last dimension size: - data_array = xr.concat([data_array] * lat_shape[2], dim=data_array.dims[-1]) + # Apply "a repeat operation" with the last dimension size: + data_array = xr.concat([data_array] * rep_count, dim=data_array.dims[-1]) return data_array @@ -390,9 +387,14 @@ def get_transformed_dataset(self, ds_info): # Apply the transformations: arr = self.apply_fill_value(arr, ds_info) - arr = self.apply_rescaling(arr, ds_info) - arr = self.apply_reshaping(arr, ds_info) - arr = self.apply_to_datetime(arr, ds_info) + arr = self.apply_rescaling(arr) + arr = self.apply_reshaping(arr) + + if "seconds_since_epoch" in ds_info: + arr = self.convert_to_datetime(arr, ds_info) + + if "broadcast_on_dim" in ds_info: + arr = self.apply_broadcast(arr, ds_info) return arr diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 16dbaa0c23..63e34d0c3d 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -244,6 +244,10 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): self.add_rand_data(desc) + # We should also register the dimensions as we will need access to "n_fov" for instance: + for key, val in self.dims.items(): + self.content[f"data/dimension/{key}"] = val + return self.content From 3be91cb93183b744331a164915822b9065aeb248 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 11:43:00 +0200 Subject: [PATCH 22/41] Installed pre-commit hook --- satpy/readers/iasi_ng_l2_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index f25b51a966..11c3dcbe05 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -361,7 +361,7 @@ def convert_to_datetime(self, data_array, ds_info): return data_array def apply_broadcast(self, data_array, ds_info): - """Apply the broadcast of the data array""" + """Apply the broadcast of the data array.""" dim_name = ds_info["broadcast_on_dim"] if dim_name not in self.dimensions_desc: From 3cdaee742b7ee6d9c0e55924cbf5e12c53345382 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 12:20:58 +0200 Subject: [PATCH 23/41] Fixing pre-commit issues --- README | 2 +- doc/source/dev_guide/CONTRIBUTING.rst | 2 +- satpy/readers/iasi_ng_l2_nc.py | 74 +++++++----------- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 76 ++++++++----------- 4 files changed, 62 insertions(+), 92 deletions(-) diff --git a/README b/README index 92cacd2853..a1320b1b4a 120000 --- a/README +++ b/README @@ -1 +1 @@ -README.rst \ No newline at end of file +README.rst diff --git a/doc/source/dev_guide/CONTRIBUTING.rst b/doc/source/dev_guide/CONTRIBUTING.rst index ac9338fc25..f60965aeba 120000 --- a/doc/source/dev_guide/CONTRIBUTING.rst +++ b/doc/source/dev_guide/CONTRIBUTING.rst @@ -1 +1 @@ -../../../CONTRIBUTING.rst \ No newline at end of file +../../../CONTRIBUTING.rst diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 11c3dcbe05..32a5dea713 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . -r"""IASI-NG L2 reader +r"""IASI-NG L2 reader implementation. This reader supports reading all the products from the IASI-NG L2 processing level: @@ -97,7 +97,6 @@ def available_datasets(self, configured_datasets=None): Uses a per product type dataset registration mechanism. """ - # pass along existing datasets for is_avail, ds_info in configured_datasets or []: yield is_avail, ds_info @@ -106,8 +105,7 @@ def available_datasets(self, configured_datasets=None): yield True, ds_info def register_dataset(self, ds_name, desc): - """Register a simple dataset given its name and a desc dict""" - + """Register a simple dataset given its name and a desc dict.""" if ds_name in self.dataset_infos: raise ValueError(f"Dataset for {ds_name} already registered.") @@ -122,8 +120,7 @@ def register_dataset(self, ds_name, desc): self.dataset_infos[ds_name] = ds_infos def parse_file_content(self): - """Parse the file_content to discover the available datasets and dimensions""" - + """Parse the file_content to discover the available datasets and dimensions.""" for key, val in self.file_content.items(): # print(f"Found key: {key}") @@ -188,8 +185,7 @@ def parse_file_content(self): # print(f"{vpath}: {desc}") def register_available_datasets(self): - """Register the available dataset in the current product file""" - + """Register the available dataset in the current product file.""" if self.dataset_infos is not None: # Datasets are already registered. return @@ -218,16 +214,14 @@ def register_available_datasets(self): self.register_dataset(ds_name, desc) def get_dataset_infos(self, ds_name): - """Retrieve the dataset infos corresponding to one of the registered - datasets.""" + """Retrieve the dataset infos corresponding to one of the registered datasets.""" if ds_name not in self.dataset_infos: raise KeyError(f"No dataset registered for {ds_name}") return self.dataset_infos[ds_name] def variable_path_exists(self, var_path): - """Check if a given variable path is available in the underlying - netCDF file. + """Check if a given variable path is available in the underlying netCDF file. All we really need to do here is to access the file_content dictionary and check if we have a variable under that var_path key. @@ -246,41 +240,34 @@ def variable_path_exists(self, var_path): # Var path not in file_content: return False - def apply_fill_value(self, data_array, ds_info): - """Apply the rescaling transform on a given array.""" - - dtype = ds_info.get("target_data_type", "auto") - convert = False + def convert_data_type(self, data_array, dtype="auto"): + """Convert the data type if applicable.""" attribs = data_array.attrs - if data_array.dtype == np.int32: - if dtype == "auto": - # Only convert to float if we have the scale factor or add_offset: - dtype = ( - "float64" - if ("scale_factor" in attribs or "add_offset" in attribs) - else "int32" - ) - convert = dtype != "int32" - elif data_array.dtype == np.float64: - if dtype == "auto": - dtype = "float64" - convert = dtype != "float64" - elif data_array.dtype == np.float32: - if dtype == "auto": - dtype = "float32" - convert = dtype != "float32" - else: - raise ValueError(f"Unexpected raw array data type: {data_array.dtype}") - - if convert: + cur_dtype = np.dtype(data_array.dtype).name + + if dtype == "auto" and cur_dtype in ["float32", "float64"]: + dtype = cur_dtype + + to_float = "scale_factor" in attribs or "add_offset" in attribs + + if dtype == "auto": + dtype = "float64" if to_float else cur_dtype + + if cur_dtype != dtype: data_array = data_array.astype(dtype) + return data_array + + def apply_fill_value(self, data_array): + """Apply the rescaling transform on a given array.""" + dtype = np.dtype(data_array.dtype).name if dtype not in ["float32", "float64"]: # We won't be able to use NaN in the other cases: return data_array nan_val = np.nan if dtype == "float64" else np.float32(np.nan) + attribs = data_array.attrs # Apply the min/max valid range: if "valid_min" in attribs: @@ -307,7 +294,6 @@ def apply_fill_value(self, data_array, ds_info): def apply_rescaling(self, data_array): """Apply the rescaling transform on a given array.""" - # Check if we have the scaling elements: attribs = data_array.attrs if "scale_factor" in attribs or "add_offset" in attribs: @@ -326,14 +312,13 @@ def apply_rescaling(self, data_array): return data_array def apply_reshaping(self, data_array): - """Apply the reshaping transform on a given IASI-NG data array + """Apply the reshaping transform on a given IASI-NG data array. Those arrays may come as 3D array, in which case we collapse the last 2 dimensions on a single axis (ie. the number of columns or "y") In the process, we also rename the first axis to "x" """ - if len(data_array.dims) > 2: data_array = data_array.stack(y=(data_array.dims[1:])) @@ -347,7 +332,6 @@ def apply_reshaping(self, data_array): def convert_to_datetime(self, data_array, ds_info): """Convert the data to datetime values.""" - epoch = ds_info["seconds_since_epoch"] # Note: below could convert the resulting data to another type @@ -362,7 +346,6 @@ def convert_to_datetime(self, data_array, ds_info): def apply_broadcast(self, data_array, ds_info): """Apply the broadcast of the data array.""" - dim_name = ds_info["broadcast_on_dim"] if dim_name not in self.dimensions_desc: raise ValueError(f"Invalid dimension name {dim_name}") @@ -375,7 +358,6 @@ def apply_broadcast(self, data_array, ds_info): def get_transformed_dataset(self, ds_info): """Retrieve a dataset with all transformations applied on it.""" - # Extract location: vname = ds_info["location"] @@ -386,7 +368,8 @@ def get_transformed_dataset(self, ds_info): arr = self[vname] # Apply the transformations: - arr = self.apply_fill_value(arr, ds_info) + arr = self.convert_data_type(arr) + arr = self.apply_fill_value(arr) arr = self.apply_rescaling(arr) arr = self.apply_reshaping(arr) @@ -400,7 +383,6 @@ def get_transformed_dataset(self, ds_info): def get_dataset(self, dataset_id, ds_info=None): """Get a dataset.""" - ds_name = dataset_id["name"] # In case this dataset name is not explicitly provided by this file diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 63e34d0c3d..84f7164f4f 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -123,15 +123,12 @@ class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler): - """Fake base class for IASI NG handler""" + """Fake base class for IASI NG handler.""" chunks = (10, 10, 10) def add_rand_data(self, desc): - """ - Build a random DataArray from a given description, and add it as content. - """ - + """Build a random DataArray from a given description.""" # Create a lazy dask array with random int32 values dtype = desc.get("data_type", "int32") dims = desc["dims"] @@ -182,10 +179,11 @@ def set_missing_values(block): block_size = np.prod(block_shape) block_num_to_replace = int(block_size * missing_ratio) + # Create a Generator instance + rng = np.random.default_rng() + # Generate unique random indices to set as missing values within this block - flat_indices = np.random.choice( - block_size, block_num_to_replace, replace=False - ) + flat_indices = rng.choice(block_size, block_num_to_replace, replace=False) unraveled_indices = np.unravel_index(flat_indices, block_shape) block[unraveled_indices] = missing_value @@ -256,8 +254,12 @@ class TestIASINGL2NCReader: reader_name = "iasi_ng_l2_nc" + file_prefix = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02" + file_suffix = "C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc" + twv_filename = f"{file_prefix}-TWV_{file_suffix}" + def setup_method(self): - """Setup the reade config""" + """Setup the reade config.""" from satpy._config import config_search_paths self.reader_configs = config_search_paths( @@ -277,20 +279,16 @@ def fake_handler(self): @pytest.fixture() def twv_handler(self): - """Create a simple (and fake) default handler on a TWV product""" - filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc" - return self._create_file_handler(filename) + """Create a simple (and fake) default handler on a TWV product.""" + return self._create_file_handler(self.twv_filename) @pytest.fixture() def twv_scene(self): - """Create a simple (and fake) satpy scene on a TWV product""" - filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc" - return Scene(filenames=[filename], reader=self.reader_name) + """Create a simple (and fake) satpy scene on a TWV product.""" + return Scene(filenames=[self.twv_filename], reader=self.reader_name) def _create_file_handler(self, filename): - """Create an handler for the given file checking that it can - be parsed correctly""" - + """Create an handler for the given file checking that it can be parsed correctly.""" reader = load_reader(self.reader_configs) # Test if the file is recognized by the reader @@ -315,42 +313,36 @@ def _create_file_handler(self, filename): return handlers[0] def test_filename_matching(self): - """Test filename matching against some random name""" - + """Test filename matching against some random name.""" # Example filename - filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc" + prefix = "W_fr-meteo-sat,GRAL,MTI1-IASING-2" + suffix = ( + "C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc" + ) + filename = f"{prefix}-l2p_{suffix}" self._create_file_handler(filename) def test_real_filename_matching(self): - """Test that we will match an actual IASI NG L2 product file name""" - + """Test that we will match an actual IASI NG L2 product file name.""" # Below we test the TWV,CLD,GHG and SFC products: - filenames = { - "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc", - "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-CLD_C_EUMT_20170616120000_G_V_20070912094037_20070912094308_O_N____.nc", - "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-GHG_C_EUMT_20170616120000_G_V_20070912090651_20070912090922_O_N____.nc", - "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-SFC_C_EUMT_20170616120000_G_V_20070912100911_20070912101141_O_N____.nc", - } + ptypes = ["TWV", "CLD", "GHG", "SFC"] + filenames = [f"{self.file_prefix}-{ptype}_{self.file_suffix}" for ptype in ptypes] for filename in filenames: self._create_file_handler(filename) def test_sensing_times(self, twv_handler): - """Test that we read the sensing start/end times correctly - from filename""" - + """Test that we read the sensing start/end times correctly from filename.""" assert twv_handler.start_time == datetime(2007, 9, 12, 8, 43, 29) assert twv_handler.end_time == datetime(2007, 9, 12, 8, 46, 0) def test_sensor_names(self, twv_handler): - """Test that the handler reports iasi_ng as sensor""" - + """Test that the handler reports iasi_ng as sensor.""" assert twv_handler.sensor_names == {"iasi_ng"} def test_available_datasets(self, twv_scene): - """Test the list of available datasets in scene""" - + """Test the list of available datasets in scene.""" dnames = twv_scene.available_dataset_names() expected_names = [ @@ -374,8 +366,7 @@ def test_available_datasets(self, twv_scene): assert dname in dnames def test_latitude_dataset(self, twv_scene): - """Test loading the latitude dataset""" - + """Test loading the latitude dataset.""" twv_scene.load(["sounder_pixel_latitude"]) dset = twv_scene["sounder_pixel_latitude"] @@ -395,8 +386,7 @@ def test_latitude_dataset(self, twv_scene): assert vmax <= 90.0 def test_longitude_dataset(self, twv_scene): - """Test loading the longitude dataset""" - + """Test loading the longitude dataset.""" twv_scene.load(["sounder_pixel_longitude"]) dset = twv_scene["sounder_pixel_longitude"] @@ -416,8 +406,7 @@ def test_longitude_dataset(self, twv_scene): assert vmax <= 180.0 def test_onboard_utc_dataset(self, twv_scene): - """Test loading the onboard_utc dataset""" - + """Test loading the onboard_utc dataset.""" twv_scene.load(["onboard_utc", "sounder_pixel_latitude"]) dset = twv_scene["onboard_utc"] @@ -435,8 +424,7 @@ def test_onboard_utc_dataset(self, twv_scene): assert lat.shape == dset.shape def test_nbr_iterations_dataset(self, twv_scene): - """Test loading the nbr_iterations dataset""" - + """Test loading the nbr_iterations dataset.""" twv_scene.load(["nbr_iterations"]) dset = twv_scene["nbr_iterations"] From 17fc4308abc8b06c20d8a8fc01822698011899f9 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 12:27:01 +0200 Subject: [PATCH 24/41] Refactoring parse_file_content to reduce complexity --- satpy/readers/iasi_ng_l2_nc.py | 100 ++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 32a5dea713..81890ed531 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -119,65 +119,73 @@ def register_dataset(self, ds_name, desc): self.dataset_infos[ds_name] = ds_infos + def process_dimension(self, key, value): + """Process a dimension entry from the file_content.""" + dim_name = key.split("/")[-1] + # print(f"Found dimension: {dim_name}={val}") + if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value: + # This might happen if we have the same dim name from different groups: + raise ValueError(f"Detected duplicated dim name: {dim_name}") + + self.dimensions_desc[dim_name] = value + + def process_attribute(self, key, value): + """Process a attribute entry from the file_content.""" + var_path, aname = key.split("/attr/") + # print(f"Found attrib for: {var_path}: {aname}") + + if var_path not in self.variable_desc: + # maybe this variable is ignored, or this is a group attr. + return + + self.variable_desc[var_path]["attribs"][aname] = value + + def process_variable(self, key): + """Process a variable entry from the file_content.""" + shape = self.file_content[f"{key}/shape"] + # print(f"Found variable: {key}") + if np.prod(shape) <= 1: + # print(f"Ignoring scalar variable {key}") + return + + # Check if this variable should be ignored: + if any(p.search(key) is not None for p in self.ignored_patterns): + # print(f"Ignoring variable {key}") + return + + # Prepare a description for this variable: + prefix, var_name = key.rsplit("/", 1) + dims = self.file_content[f"{key}/dimensions"] + dtype = self.file_content[f"{key}/dtype"] + + desc = { + "location": key, + "prefix": prefix, + "var_name": var_name, + "shape": shape, + "dtype": f"{dtype}", + "dims": dims, + "attribs": {}, + } + + self.variable_desc[key] = desc + def parse_file_content(self): """Parse the file_content to discover the available datasets and dimensions.""" for key, val in self.file_content.items(): # print(f"Found key: {key}") if "/dimension/" in key: - dim_name = key.split("/")[-1] - # print(f"Found dimension: {dim_name}={val}") - if ( - dim_name in self.dimensions_desc - and self.dimensions_desc[dim_name] != val - ): - # This might happen if we have the same dim name from different groups: - raise ValueError(f"Detected duplicated dim name: {dim_name}") - - self.dimensions_desc[dim_name] = val + self.process_dimension(key, val) continue if "/attr/" in key: - var_path, aname = key.split("/attr/") - # print(f"Found attrib for: {var_path}: {aname}") - - if var_path not in self.variable_desc: - # maybe this variable is ignored, or this is a group attr. - continue - - self.variable_desc[var_path]["attribs"][aname] = val + self.process_attribute(key, val) continue # if isinstance(val, netCDF4.Variable): if f"{key}/shape" in self.file_content: - # print(f"Found variable: {key}") - - shape = self.file_content[f"{key}/shape"] - if np.prod(shape) <= 1: - # print(f"Ignoring scalar variable {key}") - continue - - # Check if this variable should be ignored: - if any(p.search(key) is not None for p in self.ignored_patterns): - # print(f"Ignoring variable {key}") - continue - - # Prepare a description for this variable: - prefix, var_name = key.rsplit("/", 1) - dims = self.file_content[f"{key}/dimensions"] - dtype = self.file_content[f"{key}/dtype"] - - desc = { - "location": key, - "prefix": prefix, - "var_name": var_name, - "shape": shape, - "dtype": f"{dtype}", - "dims": dims, - "attribs": {}, - } - - self.variable_desc[key] = desc + self.process_variable(key) continue # print(f"Found {len(self.variable_desc)} variables:") From b906f4cdbc18ee350887e03a593872396404d320 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 12:34:21 +0200 Subject: [PATCH 25/41] Refactoring add_rand_data to reduce complexity --- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 77 +++++++++++-------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 84f7164f4f..399ca728ae 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -127,18 +127,41 @@ class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler): chunks = (10, 10, 10) - def add_rand_data(self, desc): - """Build a random DataArray from a given description.""" - # Create a lazy dask array with random int32 values - dtype = desc.get("data_type", "int32") - dims = desc["dims"] - key = desc["key"] + def inject_missing_value(self, dask_array, attribs): + """Inject missing value in data array.""" + # Force setting a few elements to this missing value (but still lazily with + # dask map_blocks function) + missing_value = attribs["missing_value"] - shape = [self.dims[k] for k in dims] + # Ratio of elements to replace with missing value: + missing_ratio = 0.05 - # Define the chunk size for dask array + def set_missing_values(block): + # Generate random indices to set as missing values within this block + block_shape = block.shape + + block_size = np.prod(block_shape) + block_num_to_replace = int(block_size * missing_ratio) + + # Create a Generator instance + rng = np.random.default_rng() + + # Generate unique random indices to set as missing values within this block + flat_indices = rng.choice(block_size, block_num_to_replace, replace=False) + unraveled_indices = np.unravel_index(flat_indices, block_shape) + block[unraveled_indices] = missing_value + + return block + + # Apply the function lazily to each block + dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype) + + return dask_array + + def generate_dask_array(self, dims, dtype, attribs): + """Generate some random dask array of the given dtype.""" + shape = [self.dims[k] for k in dims] chunks = [10] * len(dims) - attribs = desc["attribs"] if dtype == "int32": rand_min = -2147483647 @@ -164,33 +187,21 @@ def add_rand_data(self, desc): else: raise ValueError(f"Unsupported data type: {dtype}") - if "missing_value" in attribs: - # Force setting a few elements to this missing value (but still lazily with - # dask map_blocks function) - missing_value = attribs["missing_value"] - - # Ratio of elements to replace with missing value: - missing_ratio = 0.05 - - def set_missing_values(block): - # Generate random indices to set as missing values within this block - block_shape = block.shape + return dask_array - block_size = np.prod(block_shape) - block_num_to_replace = int(block_size * missing_ratio) - - # Create a Generator instance - rng = np.random.default_rng() - - # Generate unique random indices to set as missing values within this block - flat_indices = rng.choice(block_size, block_num_to_replace, replace=False) - unraveled_indices = np.unravel_index(flat_indices, block_shape) - block[unraveled_indices] = missing_value + def add_rand_data(self, desc): + """Build a random DataArray from a given description.""" + # Create a lazy dask array with random int32 values + dtype = desc.get("data_type", "int32") + dims = desc["dims"] + attribs = desc["attribs"] + key = desc["key"] - return block + dask_array = self.generate_dask_array(dims, dtype, attribs) + # Define the chunk size for dask array - # Apply the function lazily to each block - dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype) + if "missing_value" in attribs: + dask_array = self.inject_missing_value(dask_array, attribs) # Wrap the dask array with xarray.DataArray data_array = xr.DataArray(dask_array, dims=dims) From f660cb1979af1c4551f3dda1752dd3722ec4db4a Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 12:39:47 +0200 Subject: [PATCH 26/41] Removing lat/lon unit test duplication --- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 52 +++++++------------ 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 399ca728ae..9c2eda3c6f 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -376,45 +376,31 @@ def test_available_datasets(self, twv_scene): for dname in expected_names: assert dname in dnames - def test_latitude_dataset(self, twv_scene): - """Test loading the latitude dataset.""" - twv_scene.load(["sounder_pixel_latitude"]) - dset = twv_scene["sounder_pixel_latitude"] - - # Should be 2D now: - assert len(dset.dims) == 2 - assert dset.dims[0] == "x" - assert dset.dims[1] == "y" - - # Should have been converted to float64: - assert dset.dtype == np.float64 - - # All valid values should be in range [-90.0,90.0] - vmin = np.nanmin(dset) - vmax = np.nanmax(dset) - - assert vmin >= -90.0 - assert vmax <= 90.0 - - def test_longitude_dataset(self, twv_scene): - """Test loading the longitude dataset.""" - twv_scene.load(["sounder_pixel_longitude"]) - dset = twv_scene["sounder_pixel_longitude"] + def test_latlon_datasets(self, twv_scene): + """Test loading the latitude/longitude dataset.""" + twv_scene.load(["sounder_pixel_latitude", "sounder_pixel_longitude"]) + lat = twv_scene["sounder_pixel_latitude"] + lon = twv_scene["sounder_pixel_longitude"] # Should be 2D now: - assert len(dset.dims) == 2 - assert dset.dims[0] == "x" - assert dset.dims[1] == "y" + assert len(lat.dims) == 2 + assert len(lon.dims) == 2 + assert lat.dims[0] == "x" + assert lat.dims[1] == "y" + assert lon.dims[0] == "x" + assert lon.dims[1] == "y" # Should have been converted to float64: - assert dset.dtype == np.float64 + assert lat.dtype == np.float64 + assert lon.dtype == np.float64 - # All valid values should be in range [-90.0,90.0] - vmin = np.nanmin(dset) - vmax = np.nanmax(dset) + # All valid lat values should be in range [-90.0,90.0] + assert np.nanmin(lat) >= -90.0 + assert np.nanmax(lat) <= 90.0 - assert vmin >= -180.0 - assert vmax <= 180.0 + # All valid lon values should be in range [-180.0,180.0] + assert np.nanmin(lon) >= -180.0 + assert np.nanmax(lon) <= 180.0 def test_onboard_utc_dataset(self, twv_scene): """Test loading the onboard_utc dataset.""" From efc88656a5d7d9aaf5070dd5cd2808a9a7d2839e Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 9 Aug 2024 12:43:37 +0200 Subject: [PATCH 27/41] Reduced get_test_content complexity --- satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 9c2eda3c6f..02f1b8eaae 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -216,6 +216,15 @@ def add_rand_data(self, desc): for aname, val in attribs.items(): self.content[key + "/attr/" + aname] = val + def get_valid_attribs(self, anames, alist): + """Retrieve only the valid attributes from a list.""" + attribs = {} + for idx, val in enumerate(alist): + # AEnsure we do not inject "None" attribute values: + if val is not None: + attribs[anames[idx]] = val + return attribs + def get_test_content(self, _filename, _filename_info, _filetype_info): """Get the content of the test data. @@ -238,11 +247,7 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): anames = grp_desc["attrs"] for vname, vdesc in grp_desc["variables"].items(): # For each variable we create a dataset descriptor: - attribs = {} - for idx, val in enumerate(vdesc[2]): - # AEnsure we do not inject "None" attribute values: - if val is not None: - attribs[anames[idx]] = val + attribs = self.get_valid_attribs(anames, vdesc[2]) desc = { "key": f"{prefix}/{vname}", From c237c108ffb2209a3badb2a7ca235536b9e6409e Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Tue, 27 Aug 2024 08:56:36 +0200 Subject: [PATCH 28/41] Added support for O3_ and CO_ products --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 2 +- satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 0c13795cdd..1d637db0d4 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -13,7 +13,7 @@ file_types: file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler file_patterns: [ - "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", + "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type:3s}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] ignored_patterns: diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 02f1b8eaae..3368b144b4 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -341,12 +341,16 @@ def test_filename_matching(self): def test_real_filename_matching(self): """Test that we will match an actual IASI NG L2 product file name.""" - # Below we test the TWV,CLD,GHG and SFC products: - ptypes = ["TWV", "CLD", "GHG", "SFC"] - filenames = [f"{self.file_prefix}-{ptype}_{self.file_suffix}" for ptype in ptypes] + # Below we test the TWV,CLD,GHG,SFC,O3_ and CO_ products: + ptypes = ["TWV", "CLD", "GHG", "SFC", "O3_", "CO_"] - for filename in filenames: - self._create_file_handler(filename) + for ptype in ptypes: + filename = f"{self.file_prefix}-{ptype}_{self.file_suffix}" + handler = self._create_file_handler(filename) + + assert handler.filename_info["oflag"] == "C" + assert handler.filename_info["originator"] == "EUMT" + assert handler.filename_info["product_type"] == ptype def test_sensing_times(self, twv_handler): """Test that we read the sensing start/end times correctly from filename.""" From 90f7016c4f4368cd94f9208024e0035cb2908bb8 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Tue, 27 Aug 2024 09:02:58 +0200 Subject: [PATCH 29/41] Adding my name to authors.md --- AUTHORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.md b/AUTHORS.md index 461ce3ca1e..d1e6763134 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -66,6 +66,7 @@ The following people have made contributions to this project: - [Lars Ørum Rasmussen (loerum)](https://github.com/loerum) - [Martin Raspaud (mraspaud)](https://github.com/mraspaud) - [William Roberts (wroberts4)](https://github.com/wroberts4) +- [Emmanuel Roche (roche-emmanuel)](https://github.com/roche-emmanuel) - [Benjamin Rösner (BENR0)](https://github.com/BENR0) - [Pascale Roquet (roquetp)](https://github.com/roquetp) - [Kristian Rune Larsen](https://github.com/) From 87cf5eafc16f31662574d6b92785e2b24b0471d9 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Tue, 27 Aug 2024 09:27:18 +0200 Subject: [PATCH 30/41] Minor change trying to fix the readthedocs build process --- satpy/readers/iasi_ng_l2_nc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 81890ed531..c0635dbb09 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License along with # satpy. If not, see . -r"""IASI-NG L2 reader implementation. +"""IASI-NG L2 reader implementation. This reader supports reading all the products from the IASI-NG L2 processing level: From 430fcd2d094760ac713bbcc7292200c0f289e7f4 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Tue, 27 Aug 2024 09:27:59 +0200 Subject: [PATCH 31/41] Updated IASI ng L2 reader description --- satpy/readers/iasi_ng_l2_nc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index c0635dbb09..f2d47a1630 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -22,6 +22,8 @@ * IASI-L2-CLD * IASI-L2-GHG * IASI-L2-SFC + * IASI-L2-O3_ + * IASI-L2-CO_ """ import logging From e69de92f64fdfb57f50cc3e342a11dd1c5125e0c Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Tue, 27 Aug 2024 10:01:35 +0200 Subject: [PATCH 32/41] Fixing readthedocs build process --- satpy/readers/iasi_ng_l2_nc.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index f2d47a1630..880725d341 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -18,12 +18,13 @@ This reader supports reading all the products from the IASI-NG L2 processing level: - * IASI-L2-TWV - * IASI-L2-CLD - * IASI-L2-GHG - * IASI-L2-SFC - * IASI-L2-O3_ - * IASI-L2-CO_ + +* IASI-L2-TWV +* IASI-L2-CLD +* IASI-L2-GHG +* IASI-L2-SFC +* IASI-L2-O3 +* IASI-L2-CO """ import logging From 93c509fba801abdef40e43ec70fb56d2bdaf355a Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Tue, 27 Aug 2024 21:16:45 +0200 Subject: [PATCH 33/41] Renaming contributing.rst file correctly --- ...TING.rst~contributing_and_rtd_build_fixes => CONTRIBUTING.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename doc/source/dev_guide/{CONTRIBUTING.rst~contributing_and_rtd_build_fixes => CONTRIBUTING.rst} (100%) diff --git a/doc/source/dev_guide/CONTRIBUTING.rst~contributing_and_rtd_build_fixes b/doc/source/dev_guide/CONTRIBUTING.rst similarity index 100% rename from doc/source/dev_guide/CONTRIBUTING.rst~contributing_and_rtd_build_fixes rename to doc/source/dev_guide/CONTRIBUTING.rst From 1c8741db503a2c601419b357da41760c2a09a46e Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Mon, 30 Sep 2024 13:17:00 +0200 Subject: [PATCH 34/41] Updated reader to enable optimal_estimations and use dataset name aliases. --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 7 +++++-- satpy/readers/iasi_ng_l2_nc.py | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index 1d637db0d4..ea27c2328b 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -16,8 +16,11 @@ file_types: "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type:3s}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] - ignored_patterns: - - /optimal_estimation/ # cf. eum respase2 task2 issue #4 + # ignored_patterns: + # - /optimal_estimation/ # cf. eum respase2 task2 issue #4 + + dataset_aliases: + "/optimal_estimation/(.+)$": "${VAR_NAME}_oem" # Define if we should broadcast the timestamps from # dims (n_lines, n_for) to (n_lines, n_for, n_fov): diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 880725d341..75d749be0f 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -65,6 +65,11 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs): # dataset aliases: self.dataset_aliases = self.filetype_info.get("dataset_aliases", {}) + # Transform the aliases on regex patterns: + self.dataset_aliases = { + re.compile(key): val for key, val in self.dataset_aliases.items() + } + # broadcasts timestamps flag: self.broadcast_timestamps = self.filetype_info.get("broadcast_timestamps", False) @@ -210,6 +215,18 @@ def register_available_datasets(self): for vpath, desc in self.variable_desc.items(): # Check if we have an alias for this variable: ds_name = desc["var_name"] + + # Check if this variable path matches an alias pattern: + for pat, sub in self.dataset_aliases.items(): + # Search for the pattern in the input string + match = pat.search(vpath) + if match: + # Extract the captured group(s) + var_name = match.group(1) + ds_name = sub.replace("${VAR_NAME}", var_name) + # logger.info("=> matched vpath %s: ds_name: %s", vpath, ds_name) + break + if vpath in self.dataset_aliases: ds_name = self.dataset_aliases[vpath] From e542e607f912d1a7ad6fc00778f7b8d27db85001 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Mon, 30 Sep 2024 13:28:43 +0200 Subject: [PATCH 35/41] Fixing pytest.fixture syntax --- satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 3368b144b4..97d738f08f 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -293,12 +293,12 @@ def fake_handler(self): patch_ctx.is_local = True yield patch_ctx - @pytest.fixture() + @pytest.fixture def twv_handler(self): """Create a simple (and fake) default handler on a TWV product.""" return self._create_file_handler(self.twv_filename) - @pytest.fixture() + @pytest.fixture def twv_scene(self): """Create a simple (and fake) satpy scene on a TWV product.""" return Scene(filenames=[self.twv_filename], reader=self.reader_name) From 6021c92267d5a4a3c4f635925fc89e414f8d75b7 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 2 Oct 2024 14:41:25 +0200 Subject: [PATCH 36/41] Adding collection of unit tests on IASI-NG l2 reader. --- satpy/readers/iasi_ng_l2_nc.py | 10 +- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 164 ++++++++++++++++++ 2 files changed, 169 insertions(+), 5 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 75d749be0f..65423550de 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -115,7 +115,7 @@ def available_datasets(self, configured_datasets=None): def register_dataset(self, ds_name, desc): """Register a simple dataset given its name and a desc dict.""" if ds_name in self.dataset_infos: - raise ValueError(f"Dataset for {ds_name} already registered.") + raise KeyError(f"Dataset for {ds_name} already registered.") ds_infos = { "name": ds_name, @@ -133,7 +133,7 @@ def process_dimension(self, key, value): # print(f"Found dimension: {dim_name}={val}") if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value: # This might happen if we have the same dim name from different groups: - raise ValueError(f"Detected duplicated dim name: {dim_name}") + raise KeyError(f"Detected duplicated dim name: {dim_name}") self.dimensions_desc[dim_name] = value @@ -362,10 +362,10 @@ def convert_to_datetime(self, data_array, ds_info): """Convert the data to datetime values.""" epoch = ds_info["seconds_since_epoch"] - # Note: below could convert the resulting data to another type - # with .astype("datetime64[us]") for instance + # Note: converting the time values to ns precision to avoid warnings + # from panda+numpy: data_array = xr.DataArray( - data=pd.to_datetime(epoch) + data_array.astype("timedelta64[s]"), + data=pd.to_datetime(epoch) + (data_array * 1e9).astype("timedelta64[ns]"), dims=data_array.dims, attrs=data_array.attrs, ) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 97d738f08f..ec668095d2 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -437,3 +437,167 @@ def test_nbr_iterations_dataset(self, twv_scene): assert len(dset.dims) == 2 # Should still be in int32 type: assert dset.dtype == np.int32 + + def test_register_dataset(self, twv_handler): + """Test the register_dataset method.""" + twv_handler.dataset_infos = {} + twv_handler.register_dataset("test_dataset", {"attr": "value"}) + assert "test_dataset" in twv_handler.dataset_infos + assert twv_handler.dataset_infos["test_dataset"]["attr"] == "value" + + # Should refuse to register the same dataset again afterwards: + with pytest.raises(KeyError): + twv_handler.register_dataset("test_dataset", {"attr": "new_value"}) + + def test_process_dimension(self, twv_handler): + """Test the process_dimension method.""" + twv_handler.dimensions_desc = {} + twv_handler.process_dimension("group/dimension/test_dim", 10) + assert twv_handler.dimensions_desc["test_dim"] == 10 + + # Should refuse to process the same dim again: + with pytest.raises(KeyError): + twv_handler.process_dimension("group/dimension/test_dim", 20) + + def test_process_attribute(self, twv_handler): + """Test the process_attribute method.""" + twv_handler.variable_desc = {"test_var": {"attribs": {}}} + twv_handler.process_attribute("test_var/attr/test_attr", "value") + assert twv_handler.variable_desc["test_var"]["attribs"]["test_attr"] == "value" + + def test_process_variable(self, twv_handler): + """Test the process_variable method.""" + # Note: we only support variables inside a group, + # because this is always the case in the IASI-NG file format. + twv_handler.file_content = { + "group/test_var/shape": (10, 10), + "group/test_var/dimensions": ("x", "y"), + "group/test_var/dtype": "float32", + } + twv_handler.ignored_patterns = [] + twv_handler.process_variable("group/test_var") + assert "group/test_var" in twv_handler.variable_desc + + def test_parse_file_content(self, twv_handler): + """Test the parse_file_content method.""" + twv_handler.file_content = { + "dim/dimension/test_dim": 10, + "var/attr/test_attr": "value", + "grp/test_var": "my_data", + "grp/test_var/shape": (10, 10), + "grp/test_var/dimensions": ("x", "y"), + "grp/test_var/dtype": "float32", + } + + twv_handler.parse_file_content() + assert "test_dim" in twv_handler.dimensions_desc + assert "grp/test_var" in twv_handler.variable_desc + + def test_register_available_datasets(self, twv_handler): + """Test the register_available_datasets method.""" + twv_handler.dataset_infos = None + twv_handler.variable_desc = { + "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}} + } + twv_handler.dataset_aliases = {} + twv_handler.register_available_datasets() + assert "test_var" in twv_handler.dataset_infos + + def test_get_dataset_infos(self, twv_handler): + """Test the get_dataset_infos method.""" + twv_handler.dataset_infos = {"test_dataset": {"attr": "value"}} + assert twv_handler.get_dataset_infos("test_dataset") == {"attr": "value"} + + with pytest.raises(KeyError): + twv_handler.get_dataset_infos("non_existent_dataset") + + def test_variable_path_exists(self, twv_handler): + """Test the variable_path_exists method.""" + twv_handler.file_content = {"test_var": mock.MagicMock()} + assert twv_handler.variable_path_exists("test_var") + assert not twv_handler.variable_path_exists("/attr/test_var") + assert not twv_handler.variable_path_exists("test_var/dtype") + + def test_convert_data_type(self, twv_handler): + """Test the convert_data_type method.""" + data = xr.DataArray(np.array([1, 2, 3], dtype=np.float32)) + result = twv_handler.convert_data_type(data, dtype="float64") + assert result.dtype == np.float64 + + # Should stay as f32 if dtype is auto: + data.attrs["scale_factor"] = 2.0 + result = twv_handler.convert_data_type(data, dtype="auto") + assert result.dtype == np.float32 + + def test_apply_fill_value(self, twv_handler): + """Test the apply_fill_value method.""" + data = xr.DataArray( + np.array([1.0, 2.0, 3.0, 4.0, 5.0]), + attrs={"valid_min": 2.0, "valid_max": 4.0}, + ) + result = twv_handler.apply_fill_value(data) + assert np.isnan(result[0]) + assert np.isnan(result[4]) + + def test_apply_rescaling(self, twv_handler): + """Test the apply_rescaling method.""" + data = xr.DataArray( + np.array([1, 2, 3]), attrs={"scale_factor": 2, "add_offset": 1} + ) + result = twv_handler.apply_rescaling(data) + np.testing.assert_array_equal(result, [3, 5, 7]) + + def test_apply_reshaping(self, twv_handler): + """Test the apply_reshaping method.""" + data = xr.DataArray(np.ones((2, 3, 4)), dims=("a", "b", "c")) + result = twv_handler.apply_reshaping(data) + assert result.dims == ("x", "y") + assert result.shape == (2, 12) + + def test_convert_to_datetime(self, twv_handler): + """Test the convert_to_datetime method.""" + data = xr.DataArray(np.array([0, 86400]), dims=("time",)) + ds_info = {"seconds_since_epoch": "2000-01-01 00:00:00"} + result = twv_handler.convert_to_datetime(data, ds_info) + assert result[0].values == np.datetime64("2000-01-01T00:00:00") + assert result[1].values == np.datetime64("2000-01-02T00:00:00") + + def test_apply_broadcast(self, twv_handler): + """Test the apply_broadcast method.""" + data = xr.DataArray(np.array([1, 2, 3]), dims=("x",)) + twv_handler.dimensions_desc = {"n_fov": 2} + ds_info = {"broadcast_on_dim": "n_fov"} + result = twv_handler.apply_broadcast(data, ds_info) + assert result.shape == (6,) + + def test_get_transformed_dataset(self, twv_handler): + """Test the get_transformed_dataset method.""" + ds_info = { + "location": "test_var", + "seconds_since_epoch": "2000-01-01 00:00:00", + "broadcast_on_dim": "n_fov", + } + twv_handler.variable_path_exists = mock.MagicMock(return_value=True) + twv_handler.file_content["test_var"] = xr.DataArray( + np.array([[0, 86400]]), dims=("x", "y") + ) + + twv_handler.dimensions_desc = {"n_fov": 2} + + result = twv_handler.get_transformed_dataset(ds_info) + assert result.shape == (1, 4) + assert result.dtype == "datetime64[ns]" + + def test_get_dataset(self, twv_handler): + """Test the get_dataset method.""" + twv_handler.dataset_infos = { + "test_dataset": {"name": "test_dataset", "location": "test_var"} + } + twv_handler.get_transformed_dataset = mock.MagicMock( + return_value=xr.DataArray([1, 2, 3]) + ) + + result = twv_handler.get_dataset({"name": "test_dataset"}) + assert result.equals(xr.DataArray([1, 2, 3])) + + assert twv_handler.get_dataset({"name": "non_existent_dataset"}) is None From 837e844398a6245e284e7cbbdbf4c3cffd01d150 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 2 Oct 2024 14:49:42 +0200 Subject: [PATCH 37/41] Cleaning code removing debug statements. --- satpy/etc/readers/iasi_ng_l2_nc.yaml | 3 +++ satpy/readers/iasi_ng_l2_nc.py | 18 ++++-------------- satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 4 ---- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml index ea27c2328b..d95345eb6d 100644 --- a/satpy/etc/readers/iasi_ng_l2_nc.yaml +++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml @@ -16,6 +16,9 @@ file_types: "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type:3s}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc", ] + # keeping optimal_estimation folder for the moment due to feature freeze, + # but it might be necessary to re-introduce this in a near future. + # So keeping this reference entry here for now. # ignored_patterns: # - /optimal_estimation/ # cf. eum respase2 task2 issue #4 diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 65423550de..e2f5631a9c 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -27,7 +27,6 @@ * IASI-L2-CO """ -import logging import re import netCDF4 @@ -37,8 +36,6 @@ from .netcdf_utils import NetCDF4FsspecFileHandler -logger = logging.getLogger(__name__) - class IASINGL2NCFileHandler(NetCDF4FsspecFileHandler): """Reader for IASI-NG L2 products in NetCDF format.""" @@ -130,7 +127,7 @@ def register_dataset(self, ds_name, desc): def process_dimension(self, key, value): """Process a dimension entry from the file_content.""" dim_name = key.split("/")[-1] - # print(f"Found dimension: {dim_name}={val}") + if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value: # This might happen if we have the same dim name from different groups: raise KeyError(f"Detected duplicated dim name: {dim_name}") @@ -140,7 +137,6 @@ def process_dimension(self, key, value): def process_attribute(self, key, value): """Process a attribute entry from the file_content.""" var_path, aname = key.split("/attr/") - # print(f"Found attrib for: {var_path}: {aname}") if var_path not in self.variable_desc: # maybe this variable is ignored, or this is a group attr. @@ -151,14 +147,14 @@ def process_attribute(self, key, value): def process_variable(self, key): """Process a variable entry from the file_content.""" shape = self.file_content[f"{key}/shape"] - # print(f"Found variable: {key}") + if np.prod(shape) <= 1: - # print(f"Ignoring scalar variable {key}") + # Ignoring scalar variable. return # Check if this variable should be ignored: if any(p.search(key) is not None for p in self.ignored_patterns): - # print(f"Ignoring variable {key}") + # Ignoring variable on user request. return # Prepare a description for this variable: @@ -181,7 +177,6 @@ def process_variable(self, key): def parse_file_content(self): """Parse the file_content to discover the available datasets and dimensions.""" for key, val in self.file_content.items(): - # print(f"Found key: {key}") if "/dimension/" in key: self.process_dimension(key, val) @@ -196,10 +191,6 @@ def parse_file_content(self): self.process_variable(key) continue - # print(f"Found {len(self.variable_desc)} variables:") - # for vpath, desc in self.variable_desc.items(): - # print(f"{vpath}: {desc}") - def register_available_datasets(self): """Register the available dataset in the current product file.""" if self.dataset_infos is not None: @@ -224,7 +215,6 @@ def register_available_datasets(self): # Extract the captured group(s) var_name = match.group(1) ds_name = sub.replace("${VAR_NAME}", var_name) - # logger.info("=> matched vpath %s: ds_name: %s", vpath, ds_name) break if vpath in self.dataset_aliases: diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index ec668095d2..f839aa301f 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -14,7 +14,6 @@ # along with satpy. If not, see . """Unit tests on the IASI NG L2 reader using the conventional mock constructed context.""" -import logging import os from datetime import datetime from unittest import mock @@ -29,8 +28,6 @@ from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler -logger = logging.getLogger(__name__) - d_lff = ("n_lines", "n_for", "n_fov") DATA_DESC = [ @@ -317,7 +314,6 @@ def _create_file_handler(self, filename): # We should have our handler now: assert len(reader.file_handlers) == 1 - # logger.info("File handlers are: %s", reader.file_handlers) assert self.reader_name in reader.file_handlers handlers = reader.file_handlers[self.reader_name] From 82f62e52b9ecb95285496ea95886719ca8b266ac Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Wed, 2 Oct 2024 15:34:39 +0200 Subject: [PATCH 38/41] Adding more unit tests. --- satpy/readers/iasi_ng_l2_nc.py | 3 -- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 46 +++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index e2f5631a9c..697eaa0f9a 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -217,9 +217,6 @@ def register_available_datasets(self): ds_name = sub.replace("${VAR_NAME}", var_name) break - if vpath in self.dataset_aliases: - ds_name = self.dataset_aliases[vpath] - unit = desc["attribs"].get("units", None) if unit is not None and unit.startswith("seconds since "): # request conversion to datetime: diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index f839aa301f..53f08862e5 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -15,6 +15,7 @@ """Unit tests on the IASI NG L2 reader using the conventional mock constructed context.""" import os +import re from datetime import datetime from unittest import mock @@ -474,6 +475,32 @@ def test_process_variable(self, twv_handler): twv_handler.process_variable("group/test_var") assert "group/test_var" in twv_handler.variable_desc + def test_ignore_scalar_variable(self, twv_handler): + """Test ignoring of scalar variable.""" + # Note: we only support variables inside a group, + # because this is always the case in the IASI-NG file format. + twv_handler.file_content = { + "group/test_var/shape": (1, 1), + "group/test_var/dimensions": ("x", "y"), + "group/test_var/dtype": "float32", + } + twv_handler.ignored_patterns = [] + twv_handler.process_variable("group/test_var") + assert "group/test_var" not in twv_handler.variable_desc + + def test_ignore_pattern_variable(self, twv_handler): + """Test ignoring of pattern in variable.""" + # Note: we only support variables inside a group, + # because this is always the case in the IASI-NG file format. + twv_handler.file_content = { + "group/test_var/shape": (10, 10), + "group/test_var/dimensions": ("x", "y"), + "group/test_var/dtype": "float32", + } + twv_handler.ignored_patterns = [re.compile(r"test_")] + twv_handler.process_variable("group/test_var") + assert "group/test_var" not in twv_handler.variable_desc + def test_parse_file_content(self, twv_handler): """Test the parse_file_content method.""" twv_handler.file_content = { @@ -499,6 +526,25 @@ def test_register_available_datasets(self, twv_handler): twv_handler.register_available_datasets() assert "test_var" in twv_handler.dataset_infos + def test_ignored_register_available_datasets(self, twv_handler): + """Test ignoring register_available_datasets method if done already.""" + twv_handler.variable_desc = { + "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}} + } + twv_handler.dataset_aliases = {} + twv_handler.register_available_datasets() + assert "test_var" not in twv_handler.dataset_infos + + def test_register_available_datasets_alias(self, twv_handler): + """Test the register_available_datasets method with alias.""" + twv_handler.dataset_infos = None + twv_handler.variable_desc = { + "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}} + } + twv_handler.dataset_aliases = {re.compile(r"var/(.+)$"): "${VAR_NAME}_oem"} + twv_handler.register_available_datasets() + assert "test_var_oem" in twv_handler.dataset_infos + def test_get_dataset_infos(self, twv_handler): """Test the get_dataset_infos method.""" twv_handler.dataset_infos = {"test_dataset": {"attr": "value"}} From c1303f351874ca12379a7aef687b5833702df54f Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 4 Oct 2024 07:23:45 +0200 Subject: [PATCH 39/41] Added a few additional unit tests for coverage. --- satpy/readers/iasi_ng_l2_nc.py | 4 +-- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index 697eaa0f9a..a41b526b50 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -363,7 +363,7 @@ def apply_broadcast(self, data_array, ds_info): """Apply the broadcast of the data array.""" dim_name = ds_info["broadcast_on_dim"] if dim_name not in self.dimensions_desc: - raise ValueError(f"Invalid dimension name {dim_name}") + raise KeyError(f"Invalid dimension name {dim_name}") rep_count = self.dimensions_desc[dim_name] # Apply "a repeat operation" with the last dimension size: @@ -377,7 +377,7 @@ def get_transformed_dataset(self, ds_info): vname = ds_info["location"] if not self.variable_path_exists(vname): - raise ValueError(f"Invalid variable path: {vname}") + raise KeyError(f"Invalid variable path: {vname}") # Read the raw variable data from file (this is an xr.DataArray): arr = self[vname] diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index 53f08862e5..f502f08212 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -559,6 +559,7 @@ def test_variable_path_exists(self, twv_handler): assert twv_handler.variable_path_exists("test_var") assert not twv_handler.variable_path_exists("/attr/test_var") assert not twv_handler.variable_path_exists("test_var/dtype") + assert not twv_handler.variable_path_exists("/grp/a_non_existing_var") def test_convert_data_type(self, twv_handler): """Test the convert_data_type method.""" @@ -581,6 +582,17 @@ def test_apply_fill_value(self, twv_handler): assert np.isnan(result[0]) assert np.isnan(result[4]) + def test_apply_fill_value_range(self, twv_handler): + """Test the apply_fill_value method with range.""" + data = xr.DataArray( + np.array([1.0, 2.0, 3.0, 4.0, 5.0]), + attrs={"valid_range": [2.0, 4.0]}, + ) + result = twv_handler.apply_fill_value(data) + assert np.isnan(result[0]) + assert np.isfinite(result[1]) + assert np.isnan(result[4]) + def test_apply_rescaling(self, twv_handler): """Test the apply_rescaling method.""" data = xr.DataArray( @@ -612,6 +624,15 @@ def test_apply_broadcast(self, twv_handler): result = twv_handler.apply_broadcast(data, ds_info) assert result.shape == (6,) + def test_apply_broadcast_failure(self, twv_handler): + """Test the apply_broadcast method fails on missing dim.""" + data = xr.DataArray(np.array([1, 2, 3]), dims=("x",)) + twv_handler.dimensions_desc = {} + ds_info = {"broadcast_on_dim": "n_fov"} + + with pytest.raises(KeyError): + twv_handler.apply_broadcast(data, ds_info) + def test_get_transformed_dataset(self, twv_handler): """Test the get_transformed_dataset method.""" ds_info = { @@ -630,6 +651,17 @@ def test_get_transformed_dataset(self, twv_handler): assert result.shape == (1, 4) assert result.dtype == "datetime64[ns]" + def test_get_transformed_dataset_failure(self, twv_handler): + """Test the get_transformed_dataset method fails on invalid path.""" + ds_info = { + "location": "test_var", + "seconds_since_epoch": "2000-01-01 00:00:00", + "broadcast_on_dim": "n_fov", + } + + with pytest.raises(KeyError): + twv_handler.get_transformed_dataset(ds_info) + def test_get_dataset(self, twv_handler): """Test the get_dataset method.""" twv_handler.dataset_infos = { From 2dc7811f4379926b9f01e093b5b1a5090940a208 Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Thu, 5 Dec 2024 09:35:21 +0100 Subject: [PATCH 40/41] Reduced comments in iasi_ng_L2_nc.py file --- satpy/readers/iasi_ng_l2_nc.py | 134 +++++++++++++++------------------ 1 file changed, 61 insertions(+), 73 deletions(-) diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py index a41b526b50..5d6184c2bf 100644 --- a/satpy/readers/iasi_ng_l2_nc.py +++ b/satpy/readers/iasi_ng_l2_nc.py @@ -46,28 +46,16 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs): self.sensors = {"iasi_ng"} - # List of datasets provided by this handler: self.dataset_infos = None - - # Description of the available variables: self.variable_desc = {} - - # Description of the available dimensions: self.dimensions_desc = {} - # Ignored variable patterns: patterns = self.filetype_info.get("ignored_patterns", []) self.ignored_patterns = [re.compile(pstr) for pstr in patterns] - # dataset aliases: - self.dataset_aliases = self.filetype_info.get("dataset_aliases", {}) - - # Transform the aliases on regex patterns: - self.dataset_aliases = { - re.compile(key): val for key, val in self.dataset_aliases.items() - } + aliases = self.filetype_info.get("dataset_aliases", {}) + self.dataset_aliases = {re.compile(key): val for key, val in aliases.items()} - # broadcasts timestamps flag: self.broadcast_timestamps = self.filetype_info.get("broadcast_timestamps", False) self.register_available_datasets() @@ -100,9 +88,9 @@ def _collect_groups_info(self, base_name, obj): def available_datasets(self, configured_datasets=None): """Determine automatically the datasets provided by this file. - Uses a per product type dataset registration mechanism. + First yield on any element from the provided configured_datasets, + and then continues with the internally provided datasets. """ - # pass along existing datasets for is_avail, ds_info in configured_datasets or []: yield is_avail, ds_info @@ -124,45 +112,49 @@ def register_dataset(self, ds_name, desc): self.dataset_infos[ds_name] = ds_infos + def same_dim_names_for_different_groups(self, dim_name, value): + """Check if we already have this dim_name registered from another group.""" + return ( + dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value + ) + def process_dimension(self, key, value): """Process a dimension entry from the file_content.""" dim_name = key.split("/")[-1] - if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value: - # This might happen if we have the same dim name from different groups: + if self.same_dim_names_for_different_groups(dim_name, value): raise KeyError(f"Detected duplicated dim name: {dim_name}") self.dimensions_desc[dim_name] = value + def has_variable_desc(self, var_path): + """Check if a given variable path is available.""" + return var_path in self.variable_desc + def process_attribute(self, key, value): """Process a attribute entry from the file_content.""" var_path, aname = key.split("/attr/") - if var_path not in self.variable_desc: - # maybe this variable is ignored, or this is a group attr. + if not self.has_variable_desc(var_path): return self.variable_desc[var_path]["attribs"][aname] = value - def process_variable(self, key): - """Process a variable entry from the file_content.""" - shape = self.file_content[f"{key}/shape"] + def has_at_most_one_element(self, shape): + """Check if a shape corresponds to an array with at most 1 element.""" + return np.prod(shape) <= 1 - if np.prod(shape) <= 1: - # Ignoring scalar variable. - return + def is_variable_ignored(self, var_name): + """Check if a variable should be ignored.""" + return any(p.search(var_name) is not None for p in self.ignored_patterns) - # Check if this variable should be ignored: - if any(p.search(key) is not None for p in self.ignored_patterns): - # Ignoring variable on user request. - return - - # Prepare a description for this variable: + def prepare_variable_description(self, key, shape): + """Prepare a description for a given variable.""" prefix, var_name = key.rsplit("/", 1) dims = self.file_content[f"{key}/dimensions"] dtype = self.file_content[f"{key}/dtype"] - desc = { + return { "location": key, "prefix": prefix, "var_name": var_name, @@ -172,7 +164,17 @@ def process_variable(self, key): "attribs": {}, } - self.variable_desc[key] = desc + def process_variable(self, key): + """Process a variable entry from the file_content.""" + shape = self.file_content[f"{key}/shape"] + + if self.has_at_most_one_element(shape): + return + + if self.is_variable_ignored(key): + return + + self.variable_desc[key] = self.prepare_variable_description(key, shape) def parse_file_content(self): """Parse the file_content to discover the available datasets and dimensions.""" @@ -186,44 +188,38 @@ def parse_file_content(self): self.process_attribute(key, val) continue - # if isinstance(val, netCDF4.Variable): if f"{key}/shape" in self.file_content: self.process_variable(key) continue + def check_variable_alias(self, vpath, ds_name): + """Check if a variable path matches an alias pattern.""" + for pat, sub in self.dataset_aliases.items(): + match = pat.search(vpath) + if match: + var_name = match.group(1) + return sub.replace("${VAR_NAME}", var_name) + + return ds_name + def register_available_datasets(self): """Register the available dataset in the current product file.""" if self.dataset_infos is not None: - # Datasets are already registered. return - # Otherwise, we need to perform the registration: self.dataset_infos = {} - # Parse the file content: self.parse_file_content() for vpath, desc in self.variable_desc.items(): - # Check if we have an alias for this variable: ds_name = desc["var_name"] - - # Check if this variable path matches an alias pattern: - for pat, sub in self.dataset_aliases.items(): - # Search for the pattern in the input string - match = pat.search(vpath) - if match: - # Extract the captured group(s) - var_name = match.group(1) - ds_name = sub.replace("${VAR_NAME}", var_name) - break + ds_name = self.check_variable_alias(vpath, ds_name) unit = desc["attribs"].get("units", None) if unit is not None and unit.startswith("seconds since "): - # request conversion to datetime: desc["seconds_since_epoch"] = unit.replace("seconds since ", "") if self.broadcast_timestamps and desc["var_name"] == "onboard_utc": - # Broadcast on the "n_fov" dimension: desc["broadcast_on_dim"] = "n_fov" self.register_dataset(ds_name, desc) @@ -235,24 +231,30 @@ def get_dataset_infos(self, ds_name): return self.dataset_infos[ds_name] + def is_attribute_path(self, var_path): + """Check if a given path is a root attribute path.""" + return var_path.startswith("/attr") + + def is_property_path(self, var_path): + """Check if a given path is a sub-property path.""" + return var_path.endswith(("/dtype", "/shape", "/dimensions")) + + def is_netcdf_group(self, obj): + """Check if a given object is a netCDF group.""" + return isinstance(obj, netCDF4.Group) + def variable_path_exists(self, var_path): """Check if a given variable path is available in the underlying netCDF file. All we really need to do here is to access the file_content dictionary and check if we have a variable under that var_path key. """ - # but we ignore attributes: or sub properties: - if var_path.startswith("/attr") or var_path.endswith( - ("/dtype", "/shape", "/dimensions") - ): + if self.is_attribute_path(var_path) or self.is_property_path(var_path): return False - # Check if the path is found: if var_path in self.file_content: - # This is only a valid variable if it is not a netcdf group: - return not isinstance(self.file_content[var_path], netCDF4.Group) + return not self.is_netcdf_group(self.file_content[var_path]) - # Var path not in file_content: return False def convert_data_type(self, data_array, dtype="auto"): @@ -278,13 +280,11 @@ def apply_fill_value(self, data_array): """Apply the rescaling transform on a given array.""" dtype = np.dtype(data_array.dtype).name if dtype not in ["float32", "float64"]: - # We won't be able to use NaN in the other cases: return data_array nan_val = np.nan if dtype == "float64" else np.float32(np.nan) attribs = data_array.attrs - # Apply the min/max valid range: if "valid_min" in attribs: vmin = attribs["valid_min"] data_array = data_array.where(data_array >= vmin, other=nan_val) @@ -298,7 +298,6 @@ def apply_fill_value(self, data_array): data_array = data_array.where(data_array >= vrange[0], other=nan_val) data_array = data_array.where(data_array <= vrange[1], other=nan_val) - # Check the missing value: missing_val = attribs.get("missing_value", None) missing_val = attribs.get("_FillValue", missing_val) @@ -309,7 +308,6 @@ def apply_fill_value(self, data_array): def apply_rescaling(self, data_array): """Apply the rescaling transform on a given array.""" - # Check if we have the scaling elements: attribs = data_array.attrs if "scale_factor" in attribs or "add_offset" in attribs: scale_factor = attribs.setdefault("scale_factor", 1) @@ -317,7 +315,6 @@ def apply_rescaling(self, data_array): data_array = (data_array * scale_factor) + add_offset - # rescale the valid range accordingly for key in ["valid_range", "valid_min", "valid_max"]: if key in attribs: attribs[key] = attribs[key] * scale_factor + add_offset @@ -366,23 +363,19 @@ def apply_broadcast(self, data_array, ds_info): raise KeyError(f"Invalid dimension name {dim_name}") rep_count = self.dimensions_desc[dim_name] - # Apply "a repeat operation" with the last dimension size: data_array = xr.concat([data_array] * rep_count, dim=data_array.dims[-1]) return data_array def get_transformed_dataset(self, ds_info): """Retrieve a dataset with all transformations applied on it.""" - # Extract location: vname = ds_info["location"] if not self.variable_path_exists(vname): raise KeyError(f"Invalid variable path: {vname}") - # Read the raw variable data from file (this is an xr.DataArray): arr = self[vname] - # Apply the transformations: arr = self.convert_data_type(arr) arr = self.apply_fill_value(arr) arr = self.apply_rescaling(arr) @@ -400,19 +393,14 @@ def get_dataset(self, dataset_id, ds_info=None): """Get a dataset.""" ds_name = dataset_id["name"] - # In case this dataset name is not explicitly provided by this file - # handler then we should simply return None. if ds_name not in self.dataset_infos: return None - # Retrieve default infos if missing: if ds_info is None: ds_info = self.get_dataset_infos(ds_name) ds_name = ds_info["name"] - # Retrieve the transformed data array: data_array = self.get_transformed_dataset(ds_info) - # Return the resulting array: return data_array From 5e67f7a25d20c0b71398f51d4a96e609d0bd05dc Mon Sep 17 00:00:00 2001 From: GMV - Emmanuel Roche Date: Fri, 6 Dec 2024 08:52:33 +0100 Subject: [PATCH 41/41] Removing all excessive code comments from test_iasi_ng_l2_nc.py --- .../tests/reader_tests/test_iasi_ng_l2_nc.py | 57 ++++--------------- 1 file changed, 10 insertions(+), 47 deletions(-) diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py index f502f08212..23d199ebf6 100644 --- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py +++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py @@ -126,32 +126,31 @@ class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler): chunks = (10, 10, 10) def inject_missing_value(self, dask_array, attribs): - """Inject missing value in data array.""" - # Force setting a few elements to this missing value (but still lazily with - # dask map_blocks function) + """Inject missing value in data array. + + This method is used to randomly set a few elements of the in the + input array to the configured missing value read from the attribs + dict. This operation is done lazily using the dask map_blocks + function. + """ missing_value = attribs["missing_value"] - # Ratio of elements to replace with missing value: missing_ratio = 0.05 def set_missing_values(block): - # Generate random indices to set as missing values within this block block_shape = block.shape block_size = np.prod(block_shape) block_num_to_replace = int(block_size * missing_ratio) - # Create a Generator instance rng = np.random.default_rng() - # Generate unique random indices to set as missing values within this block flat_indices = rng.choice(block_size, block_num_to_replace, replace=False) unraveled_indices = np.unravel_index(flat_indices, block_shape) block[unraveled_indices] = missing_value return block - # Apply the function lazily to each block dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype) return dask_array @@ -173,14 +172,11 @@ def generate_dask_array(self, dims, dtype, attribs): if dtype == "float32": dask_array = dask_array.astype(np.float32) - # Max flaot32 value is 3.4028235e38, we use a smaller value - # by default here: vmin = attribs.get("valid_min", -1e16) vmax = attribs.get("valid_max", 1e16) rand_min = vmin - (vmax - vmin) * 0.1 rand_max = vmax + (vmax - vmin) * 0.1 - # Scale and shift to the desired range [min_val, max_val] dask_array = dask_array * (rand_max - rand_min) + rand_min else: raise ValueError(f"Unsupported data type: {dtype}") @@ -189,21 +185,17 @@ def generate_dask_array(self, dims, dtype, attribs): def add_rand_data(self, desc): """Build a random DataArray from a given description.""" - # Create a lazy dask array with random int32 values dtype = desc.get("data_type", "int32") dims = desc["dims"] attribs = desc["attribs"] key = desc["key"] dask_array = self.generate_dask_array(dims, dtype, attribs) - # Define the chunk size for dask array if "missing_value" in attribs: dask_array = self.inject_missing_value(dask_array, attribs) - # Wrap the dask array with xarray.DataArray data_array = xr.DataArray(dask_array, dims=dims) - data_array.attrs.update(attribs) self.content[key] = data_array @@ -218,9 +210,9 @@ def get_valid_attribs(self, anames, alist): """Retrieve only the valid attributes from a list.""" attribs = {} for idx, val in enumerate(alist): - # AEnsure we do not inject "None" attribute values: if val is not None: attribs[anames[idx]] = val + return attribs def get_test_content(self, _filename, _filename_info, _filetype_info): @@ -236,15 +228,10 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): self.content = {} - # Note: below we use the full range of int32 to generate the random - # values, we expect the handler to "fix" out of range values replacing - # them with NaNs. for grp_desc in DATA_DESC: - # get the group prefix: prefix = grp_desc["group"] anames = grp_desc["attrs"] for vname, vdesc in grp_desc["variables"].items(): - # For each variable we create a dataset descriptor: attribs = self.get_valid_attribs(anames, vdesc[2]) desc = { @@ -256,7 +243,6 @@ def get_test_content(self, _filename, _filename_info, _filetype_info): self.add_rand_data(desc) - # We should also register the dimensions as we will need access to "n_fov" for instance: for key, val in self.dims.items(): self.content[f"data/dimension/{key}"] = val @@ -305,21 +291,17 @@ def _create_file_handler(self, filename): """Create an handler for the given file checking that it can be parsed correctly.""" reader = load_reader(self.reader_configs) - # Test if the file is recognized by the reader files = reader.select_files_from_pathnames([filename]) assert len(files) == 1, "File should be recognized by the reader" - # Create the file handler: reader.create_filehandlers(files) - # We should have our handler now: assert len(reader.file_handlers) == 1 assert self.reader_name in reader.file_handlers handlers = reader.file_handlers[self.reader_name] - # We should have a single handler for a single file: assert len(handlers) == 1 assert isinstance(handlers[0], IASINGL2NCFileHandler) @@ -327,7 +309,6 @@ def _create_file_handler(self, filename): def test_filename_matching(self): """Test filename matching against some random name.""" - # Example filename prefix = "W_fr-meteo-sat,GRAL,MTI1-IASING-2" suffix = ( "C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc" @@ -338,10 +319,9 @@ def test_filename_matching(self): def test_real_filename_matching(self): """Test that we will match an actual IASI NG L2 product file name.""" - # Below we test the TWV,CLD,GHG,SFC,O3_ and CO_ products: - ptypes = ["TWV", "CLD", "GHG", "SFC", "O3_", "CO_"] + supported_types = ["TWV", "CLD", "GHG", "SFC", "O3_", "CO_"] - for ptype in ptypes: + for ptype in supported_types: filename = f"{self.file_prefix}-{ptype}_{self.file_suffix}" handler = self._create_file_handler(filename) @@ -388,7 +368,6 @@ def test_latlon_datasets(self, twv_scene): lat = twv_scene["sounder_pixel_latitude"] lon = twv_scene["sounder_pixel_longitude"] - # Should be 2D now: assert len(lat.dims) == 2 assert len(lon.dims) == 2 assert lat.dims[0] == "x" @@ -396,15 +375,12 @@ def test_latlon_datasets(self, twv_scene): assert lon.dims[0] == "x" assert lon.dims[1] == "y" - # Should have been converted to float64: assert lat.dtype == np.float64 assert lon.dtype == np.float64 - # All valid lat values should be in range [-90.0,90.0] assert np.nanmin(lat) >= -90.0 assert np.nanmax(lat) <= 90.0 - # All valid lon values should be in range [-180.0,180.0] assert np.nanmin(lon) >= -180.0 assert np.nanmax(lon) <= 180.0 @@ -413,15 +389,12 @@ def test_onboard_utc_dataset(self, twv_scene): twv_scene.load(["onboard_utc", "sounder_pixel_latitude"]) dset = twv_scene["onboard_utc"] - # Should be 2D now: assert len(dset.dims) == 2 assert dset.dims[0] == "x" assert dset.dims[1] == "y" - # Should have been converted to datetime: assert dset.dtype == np.dtype("datetime64[ns]") - # Should also have the same size as "lattitude" for instance: lat = twv_scene["sounder_pixel_latitude"] assert lat.shape == dset.shape @@ -432,7 +405,6 @@ def test_nbr_iterations_dataset(self, twv_scene): dset = twv_scene["nbr_iterations"] assert len(dset.dims) == 2 - # Should still be in int32 type: assert dset.dtype == np.int32 def test_register_dataset(self, twv_handler): @@ -442,7 +414,6 @@ def test_register_dataset(self, twv_handler): assert "test_dataset" in twv_handler.dataset_infos assert twv_handler.dataset_infos["test_dataset"]["attr"] == "value" - # Should refuse to register the same dataset again afterwards: with pytest.raises(KeyError): twv_handler.register_dataset("test_dataset", {"attr": "new_value"}) @@ -452,7 +423,6 @@ def test_process_dimension(self, twv_handler): twv_handler.process_dimension("group/dimension/test_dim", 10) assert twv_handler.dimensions_desc["test_dim"] == 10 - # Should refuse to process the same dim again: with pytest.raises(KeyError): twv_handler.process_dimension("group/dimension/test_dim", 20) @@ -464,8 +434,6 @@ def test_process_attribute(self, twv_handler): def test_process_variable(self, twv_handler): """Test the process_variable method.""" - # Note: we only support variables inside a group, - # because this is always the case in the IASI-NG file format. twv_handler.file_content = { "group/test_var/shape": (10, 10), "group/test_var/dimensions": ("x", "y"), @@ -477,8 +445,6 @@ def test_process_variable(self, twv_handler): def test_ignore_scalar_variable(self, twv_handler): """Test ignoring of scalar variable.""" - # Note: we only support variables inside a group, - # because this is always the case in the IASI-NG file format. twv_handler.file_content = { "group/test_var/shape": (1, 1), "group/test_var/dimensions": ("x", "y"), @@ -490,8 +456,6 @@ def test_ignore_scalar_variable(self, twv_handler): def test_ignore_pattern_variable(self, twv_handler): """Test ignoring of pattern in variable.""" - # Note: we only support variables inside a group, - # because this is always the case in the IASI-NG file format. twv_handler.file_content = { "group/test_var/shape": (10, 10), "group/test_var/dimensions": ("x", "y"), @@ -567,7 +531,6 @@ def test_convert_data_type(self, twv_handler): result = twv_handler.convert_data_type(data, dtype="float64") assert result.dtype == np.float64 - # Should stay as f32 if dtype is auto: data.attrs["scale_factor"] = 2.0 result = twv_handler.convert_data_type(data, dtype="auto") assert result.dtype == np.float32