From 0de793db7f9372888ff9850faad451925d657115 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 7 Aug 2024 10:33:52 +0200
Subject: [PATCH 01/41] Adding minimal test class on iasi ng l2 reader

---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
new file mode 100644
index 0000000000..e47f4db72b
--- /dev/null
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2022 Satpy developers
+#
+# satpy is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# satpy is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with satpy.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Unit tests on the IASI NG L2 reader using the conventional mock constructed context."""
+
+
+class TestIasiNGL2:
+    """Main test class for the IASI NG L2 reader."""
+
+    def test_sanity(self):
+        """Minimal temporary sanity check unit test."""
+        assert 1 == 0

From bdb2cc8766e8af37688669aa5af4449e157b968f Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 7 Aug 2024 11:18:44 +0200
Subject: [PATCH 02/41] Fixing sanity unit test

---
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index e47f4db72b..3979e42283 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -21,4 +21,4 @@ class TestIasiNGL2:
 
     def test_sanity(self):
         """Minimal temporary sanity check unit test."""
-        assert 1 == 0
+        assert 1 == 1

From 83736952ddac308b676383c6da9a364add622f29 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 7 Aug 2024 17:43:04 +0200
Subject: [PATCH 03/41] Added initial version of file pattern and unit test

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml          | 17 ++++
 satpy/readers/iasi_ng_l2_nc.py                | 70 +++++++++++++++++
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 78 ++++++++++++++++++-
 3 files changed, 161 insertions(+), 4 deletions(-)
 create mode 100644 satpy/etc/readers/iasi_ng_l2_nc.yaml
 create mode 100644 satpy/readers/iasi_ng_l2_nc.py

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
new file mode 100644
index 0000000000..d239ba7f82
--- /dev/null
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -0,0 +1,17 @@
+reader:
+  name: iasi_ng_l2_nc
+  short_name: IASI-NG L2 NC Reader
+  long_name: IASI-NG Level-2 NetCDF Reader
+  description: Reader for IASI-NG Level-2 NetCDF files
+  sensors: [iasi]
+  reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
+  status: Alpha
+  supports_fsspec: false
+
+file_types:
+  iasi_ng_l2_nc:
+    file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler
+    file_patterns:
+      [
+        "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}-{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
+      ]
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
new file mode 100644
index 0000000000..33d82ed852
--- /dev/null
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2017-2023 Satpy developers
+#
+# This file is part of satpy.
+#
+# satpy is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# satpy.  If not, see <http://www.gnu.org/licenses/>.
+
+r"""IASI-NG L2 reader
+
+This reader supports reading all the products from the IASI-NG L2 processing
+level:
+  * IASI-L2-TWV
+  * IASI-L2-CLD
+  * IASI-L2-GHG
+  * IASI-L2-SFC
+"""
+
+import logging
+import logging
+import numpy as np
+import xarray as xr
+
+
+from .netcdf_utils import NetCDF4FsspecFileHandler
+
+logger = logging.getLogger(__name__)
+
+
+class IASINGL2NCFileHandler(NetCDF4FsspecFileHandler):
+    """Reader for IASI-NG L2 products in NetCDF format."""
+
+    def __init__(self, filename, filename_info, filetype_info, **kwargs):
+        """Initialize object."""
+        super().__init__(filename, filename_info, filetype_info, **kwargs)
+
+        logger.info("Creating reader with infos: %s", filename_info)
+
+    # def get_dataset(self, data_id, ds_info):
+    #     """Obtain dataset."""
+    #     ds = self[data_id["name"]]
+    #     if "scan_lines" in ds.dims:
+    #         ds = ds.rename(scan_lines="y")
+    #     if "pixels" in ds.dims:
+    #         ds = ds.rename(pixels="x")
+    #     if "_FillValue" in ds.attrs and ds.dtype.kind == "f":
+    #         with xr.set_options(keep_attrs=True):
+    #             # have to inverse the logic due to https://github.com/pydata/xarray/issues/7581
+    #             return xr.where(ds != ds.attrs["_FillValue"], ds, np.nan)
+    #     return ds
+
+    # def available_datasets(self, configured_datasets=None):
+    #     """Get available datasets based on what's in the file.
+
+    #     Returns all datasets in the root group.
+    #     """
+    #     yield from super().available_datasets(configured_datasets)
+    #     common = {"file_type": "iasi_l2_cdr_nc", "resolution": 12000}
+    #     for key in self.file_content:
+    #         if "/" in key:  # not a dataset
+    #             continue
+    #         yield (True, {"name": key} | common | self[key].attrs)
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 3979e42283..822593314b 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -14,11 +14,81 @@
 # along with satpy.  If not, see <http://www.gnu.org/licenses/>.
 
 """Unit tests on the IASI NG L2 reader using the conventional mock constructed context."""
+import pytest
+import logging
+import os
+from unittest import mock
+from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler
 
+from satpy.readers import load_reader
 
-class TestIasiNGL2:
+from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler
+
+logger = logging.getLogger(__name__)
+
+
+class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
+    """Fake base class for IASI NG handler"""
+
+    def get_test_content(self, _filename, _filename_info, _filetype_info):
+        """Get the content of the test data.
+
+        Here we generate the default content we want to provide depending
+        on the provided filename infos.
+        """
+
+        dset = {}
+        return dset
+
+
+class TestIASINGL2NCReader:
     """Main test class for the IASI NG L2 reader."""
 
-    def test_sanity(self):
-        """Minimal temporary sanity check unit test."""
-        assert 1 == 1
+    yaml_file = "iasi_ng_l2_nc.yaml"
+
+    def setup_method(self):
+        """Setup the reade config"""
+        from satpy._config import config_search_paths
+
+        self.reader_configs = config_search_paths(
+            os.path.join("readers", self.yaml_file)
+        )
+
+    @pytest.fixture(autouse=True, scope="class")
+    def fake_handler(self):
+        """Wrap NetCDF4 FileHandler with our own fake handler."""
+        patch_ctx = mock.patch.object(
+            IASINGL2NCFileHandler, "__bases__", (FakeIASINGFileHandlerBase,)
+        )
+
+        with patch_ctx:
+            patch_ctx.is_local = True
+            yield patch_ctx
+
+    def test_filename_matching(self):
+        # Example filename
+        filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425-20220101115728_____W______.nc"
+
+        reader = load_reader(self.reader_configs)
+
+        # Test if the file is recognized by the reader
+        files = reader.select_files_from_pathnames([filename])
+
+        assert len(files) == 1, "File should be recognized by the reader"
+
+        # Create the file handler:
+        reader.create_filehandlers(files)
+
+        # We should have our handler now:
+        assert len(reader.file_handlers) == 1
+
+        # logger.info("File handlers are: %s", reader.file_handlers)
+
+        assert "iasi_ng_l2_nc" in reader.file_handlers
+
+        handlers = reader.file_handlers["iasi_ng_l2_nc"]
+
+        # We should have a single handler for a single file:
+        assert len(handlers) == 1
+
+        assert isinstance(handlers[0], IASINGL2NCFileHandler)

From 21674841f1192823676cf088d96bd1a0122d840a Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 7 Aug 2024 18:13:41 +0200
Subject: [PATCH 04/41] Fixing minor error in file pattern

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml          |  2 +-
 satpy/readers/iasi_ng_l2_nc.py                |  2 +-
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 41 ++++++++++++++-----
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index d239ba7f82..3b48654c59 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -13,5 +13,5 @@ file_types:
     file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler
     file_patterns:
       [
-        "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}-{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
+        "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 33d82ed852..de362d4b84 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -42,7 +42,7 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs):
         """Initialize object."""
         super().__init__(filename, filename_info, filetype_info, **kwargs)
 
-        logger.info("Creating reader with infos: %s", filename_info)
+        # logger.info("Creating reader with infos: %s", filename_info)
 
     # def get_dataset(self, data_id, ds_info):
     #     """Obtain dataset."""
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 822593314b..a686ef0443 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -44,14 +44,14 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
 class TestIASINGL2NCReader:
     """Main test class for the IASI NG L2 reader."""
 
-    yaml_file = "iasi_ng_l2_nc.yaml"
+    reader_name = "iasi_ng_l2_nc"
 
     def setup_method(self):
         """Setup the reade config"""
         from satpy._config import config_search_paths
 
         self.reader_configs = config_search_paths(
-            os.path.join("readers", self.yaml_file)
+            os.path.join("readers", self.reader_name + ".yaml")
         )
 
     @pytest.fixture(autouse=True, scope="class")
@@ -65,15 +65,14 @@ def fake_handler(self):
             patch_ctx.is_local = True
             yield patch_ctx
 
-    def test_filename_matching(self):
-        # Example filename
-        filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425-20220101115728_____W______.nc"
+    def _create_file_handler(self, filename):
+        """Create an handler for the given file checking that it can
+        be parsed correctly"""
 
         reader = load_reader(self.reader_configs)
 
         # Test if the file is recognized by the reader
         files = reader.select_files_from_pathnames([filename])
-
         assert len(files) == 1, "File should be recognized by the reader"
 
         # Create the file handler:
@@ -83,12 +82,34 @@ def test_filename_matching(self):
         assert len(reader.file_handlers) == 1
 
         # logger.info("File handlers are: %s", reader.file_handlers)
+        assert self.reader_name in reader.file_handlers
 
-        assert "iasi_ng_l2_nc" in reader.file_handlers
-
-        handlers = reader.file_handlers["iasi_ng_l2_nc"]
+        handlers = reader.file_handlers[self.reader_name]
 
         # We should have a single handler for a single file:
         assert len(handlers) == 1
-
         assert isinstance(handlers[0], IASINGL2NCFileHandler)
+
+        return handlers[0]
+
+    def test_filename_matching(self):
+        """Test filename matching against some random name"""
+
+        # Example filename
+        filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc"
+
+        self._create_file_handler(filename)
+
+    def test_real_filename_matching(self):
+        """Test that we will match an actual IASI NG L2 product file name"""
+
+        # Below we test the TWV,CLD,GHG and SFC products:
+        filenames = {
+            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc",
+            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-CLD_C_EUMT_20170616120000_G_V_20070912094037_20070912094308_O_N____.nc",
+            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-GHG_C_EUMT_20170616120000_G_V_20070912090651_20070912090922_O_N____.nc",
+            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-SFC_C_EUMT_20170616120000_G_V_20070912100911_20070912101141_O_N____.nc",
+        }
+
+        for filename in filenames:
+            self._create_file_handler(filename)

From 53c7e52259d285648885aa2f01a6cabc5da24de6 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 7 Aug 2024 18:36:27 +0200
Subject: [PATCH 05/41] Adding simple properties in reader

---
 satpy/readers/iasi_ng_l2_nc.py                | 35 ++++++++++++++++++-
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 20 ++++++++++-
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index de362d4b84..9342db632b 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -42,7 +42,40 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs):
         """Initialize object."""
         super().__init__(filename, filename_info, filetype_info, **kwargs)
 
-        # logger.info("Creating reader with infos: %s", filename_info)
+        self.sensors = {"iasi_ng"}
+
+        # List of datasets provided by this handler:
+        self.dataset_infos = []
+
+        logger.info("Creating reader with infos: %s", filename_info)
+
+    @property
+    def start_time(self):
+        """Get the start time."""
+        return self.filename_info["sensing_start_time"]
+
+    @property
+    def end_time(self):
+        """Get the end time."""
+        return self.filename_info["sensing_end_time"]
+
+    @property
+    def sensor_names(self):
+        """List of sensors represented in this file."""
+        return self.sensors
+
+    def available_datasets(self, configured_datasets=None):
+        """Determine automatically the datasets provided by this file.
+
+        Uses a per product type dataset registration mechanism.
+        """
+
+        # pass along existing datasets
+        for is_avail, ds_info in configured_datasets or []:
+            yield is_avail, ds_info
+
+        for ds_info in self.dataset_infos:
+            yield True, ds_info
 
     # def get_dataset(self, data_id, ds_info):
     #     """Obtain dataset."""
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index a686ef0443..25c7a0fb27 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -19,7 +19,7 @@
 import os
 from unittest import mock
 from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler
-
+from datetime import datetime
 from satpy.readers import load_reader
 
 from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler
@@ -65,6 +65,12 @@ def fake_handler(self):
             patch_ctx.is_local = True
             yield patch_ctx
 
+    @pytest.fixture()
+    def twv_handler(self):
+        """Create a simple (and fake) default handler on a TWV product"""
+        filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc"
+        return self._create_file_handler(filename)
+
     def _create_file_handler(self, filename):
         """Create an handler for the given file checking that it can
         be parsed correctly"""
@@ -113,3 +119,15 @@ def test_real_filename_matching(self):
 
         for filename in filenames:
             self._create_file_handler(filename)
+
+    def test_sensing_times(self, twv_handler):
+        """Test that we read the sensing start/end times correctly
+        from filename"""
+
+        assert twv_handler.start_time == datetime(2007, 9, 12, 8, 43, 29)
+        assert twv_handler.end_time == datetime(2007, 9, 12, 8, 46, 0)
+
+    def test_sensor_names(self, twv_handler):
+        """Test that the handler reports iasi_ng as sensor"""
+
+        assert twv_handler.sensor_names == {"iasi_ng"}

From 252808a9c4da4fa8c80d91fc7bc30066341b74be Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 08:39:46 +0200
Subject: [PATCH 06/41] Preparing initial support to read datasets in iasi_ng
 handler

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml          |   7 +-
 satpy/readers/iasi_ng_l2_nc.py                | 214 +++++++++++++++---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  |   9 +-
 3 files changed, 196 insertions(+), 34 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 3b48654c59..0dcb3ee2d7 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -13,5 +13,10 @@ file_types:
     file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler
     file_patterns:
       [
-        "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
+        "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
+    datasets:
+      latitude:
+        location: data/geolocation_information/sounder_pixel_latitude
+      longitude:
+        location: data/geolocation_information/sounder_pixel_longitude
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 9342db632b..c20c6289f2 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -25,10 +25,9 @@
 """
 
 import logging
-import logging
-import numpy as np
-import xarray as xr
 
+import netCDF4
+import numpy as np
 
 from .netcdf_utils import NetCDF4FsspecFileHandler
 
@@ -45,10 +44,15 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs):
         self.sensors = {"iasi_ng"}
 
         # List of datasets provided by this handler:
-        self.dataset_infos = []
+        self.dataset_infos = None
+
+        # Description of the available datasets:
+        self.ds_desc = self.filetype_info["datasets"]
 
         logger.info("Creating reader with infos: %s", filename_info)
 
+        self.register_available_datasets()
+
     @property
     def start_time(self):
         """Get the start time."""
@@ -74,30 +78,182 @@ def available_datasets(self, configured_datasets=None):
         for is_avail, ds_info in configured_datasets or []:
             yield is_avail, ds_info
 
-        for ds_info in self.dataset_infos:
+        for _, ds_info in self.dataset_infos.items():
             yield True, ds_info
 
-    # def get_dataset(self, data_id, ds_info):
-    #     """Obtain dataset."""
-    #     ds = self[data_id["name"]]
-    #     if "scan_lines" in ds.dims:
-    #         ds = ds.rename(scan_lines="y")
-    #     if "pixels" in ds.dims:
-    #         ds = ds.rename(pixels="x")
-    #     if "_FillValue" in ds.attrs and ds.dtype.kind == "f":
-    #         with xr.set_options(keep_attrs=True):
-    #             # have to inverse the logic due to https://github.com/pydata/xarray/issues/7581
-    #             return xr.where(ds != ds.attrs["_FillValue"], ds, np.nan)
-    #     return ds
-
-    # def available_datasets(self, configured_datasets=None):
-    #     """Get available datasets based on what's in the file.
-
-    #     Returns all datasets in the root group.
-    #     """
-    #     yield from super().available_datasets(configured_datasets)
-    #     common = {"file_type": "iasi_l2_cdr_nc", "resolution": 12000}
-    #     for key in self.file_content:
-    #         if "/" in key:  # not a dataset
-    #             continue
-    #         yield (True, {"name": key} | common | self[key].attrs)
+    def register_dataset(self, ds_name, desc):
+        """Register a simple dataset given its name and a desc dict"""
+
+        if ds_name in self.dataset_infos:
+            raise ValueError(f"Dataset for {ds_name} already registered.")
+
+        ds_infos = {
+            "name": ds_name,
+            "sensor": "iasi_ng",
+            "file_type": self.filetype_info["file_type"],
+        }
+
+        ds_infos.update(desc)
+
+        self.dataset_infos[ds_name] = ds_infos
+
+    def register_available_datasets(self):
+        """Register the available dataset in the current product file"""
+
+        if self.dataset_infos is not None:
+            # Datasets are already registered.
+            return
+
+        # Otherwise, we need to perform the registration:
+        self.dataset_infos = {}
+
+        for ds_name, desc in self.ds_desc.items():
+            self.register_dataset(ds_name, desc)
+
+    def get_dataset_infos(self, ds_name):
+        """Retrieve the dataset infos corresponding to one of the registered
+        datasets."""
+        if ds_name not in self.dataset_infos:
+            raise KeyError(f"No dataset registered for {ds_name}")
+
+        return self.dataset_infos[ds_name]
+
+    def variable_path_exists(self, var_path):
+        """Check if a given variable path is available in the underlying
+        netCDF file.
+
+        All we really need to do here is to access the file_content dictionary
+        and check if we have a variable under that var_path key.
+        """
+        # but we ignore attributes: or sub properties:
+        if var_path.startswith("/attr") or var_path.endswith(
+            ("/dtype", "/shape", "/dimensions")
+        ):
+            return False
+
+        # Check if the path is found:
+        if var_path in self.file_content:
+            # This is only a valid variable if it is not a netcdf group:
+            return not isinstance(self.file_content[var_path], netCDF4.Group)
+
+        # Var path not in file_content:
+        return False
+
+    def apply_fill_value(self, data_array, ds_info):
+        """Apply the rescaling transform on a given array."""
+
+        # Check if we should apply:
+        if ds_info.get("apply_fill_value", True) is not True:
+            return data_array
+
+        attribs = data_array.attrs
+
+        # Apply the min/max valid range:
+        if "valid_min" in attribs:
+            vmin = attribs["valid_min"]
+            data_array = data_array.where(data_array < vmin, np.float32(np.nan))
+
+        if "valid_max" in attribs:
+            vmax = attribs["valid_max"]
+            data_array = data_array.where(data_array > vmax, np.float32(np.nan))
+
+        if "valid_range" in attribs:
+            vrange = attribs["valid_range"]
+            data_array = data_array.where(data_array < vrange[0], np.float32(np.nan))
+            data_array = data_array.where(data_array > vrange[1], np.float32(np.nan))
+
+        # Check the missing value:
+        missing_val = attribs.get("missing_value", None)
+        missing_val = attribs.get("_FillValue", missing_val)
+
+        if missing_val is None:
+            return data_array
+
+        return data_array.where(data_array != missing_val, np.float32(np.nan))
+
+    def apply_rescaling(self, data_array, ds_info):
+        """Apply the rescaling transform on a given array."""
+
+        # Here we should apply the rescaling except if it is explicitly
+        # requested not to rescale:
+        if ds_info.get("apply_rescaling", True) is not True:
+            return data_array
+
+        # Check if we have the scaling elements:
+        attribs = data_array.attrs
+        if "scale_factor" in attribs or "add_offset" in attribs:
+            scale_factor = attribs.setdefault("scale_factor", 1)
+            add_offset = attribs.setdefault("add_offset", 0)
+
+            data_array = (data_array * scale_factor) + add_offset
+
+            # rescale the valid range accordingly
+            if "valid_range" in attribs:
+                attribs["valid_range"] = (
+                    attribs["valid_range"] * scale_factor + add_offset
+                )
+
+            data_array.attrs.update(attribs)
+
+        return data_array
+
+    def apply_reshaping(self, data_array, ds_info):
+        """Apply the reshaping transform on a given IASI-NG data array
+
+        Those arrays may come as 3D array, in which case we collapse the
+        last 2 dimensions on a single axis (ie. the number of columns or "y")
+
+        In the process, we also rename the first axis to "x"
+        """
+
+        if ds_info.get("apply_reshaping", True) is not True:
+            return data_array
+
+        if data_array.dims[0] != "x":
+            data_array = data_array.rename({data_array.dims[0]: "x"})
+
+        if len(data_array.dims) > 2:
+            return data_array.stack(y=(data_array.dims[1:]))
+
+        return data_array
+
+    def get_transformed_dataset(self, ds_info):
+        """Retrieve a dataset with all transformations applied on it."""
+
+        # Extract location:
+        vname = ds_info["location"]
+
+        if not self.variable_path_exists(vname):
+            raise ValueError(f"Invalid variable path: {vname}")
+
+        # Read the raw variable data from file (this is an xr.DataArray):
+        arr = self[vname]
+
+        # Apply the transformations:
+        arr = self.apply_fill_value(arr, ds_info)
+        arr = self.apply_rescaling(arr, ds_info)
+        arr = self.apply_reshaping(arr, ds_info)
+
+        return arr
+
+    def get_dataset(self, dataset_id, ds_info=None):
+        """Get a dataset."""
+
+        ds_name = dataset_id["name"]
+
+        # In case this dataset name is not explicitly provided by this file
+        # handler then we should simply return None.
+        if ds_name not in self.dataset_infos:
+            return None
+
+        # Retrieve default infos if missing:
+        if ds_info is None:
+            ds_info = self.get_dataset_infos(ds_name)
+
+        ds_name = ds_info["name"]
+
+        # Retrieve the transformed data array:
+        data_array = self.get_transformed_dataset(ds_info)
+
+        # Return the resulting array:
+        return data_array
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 25c7a0fb27..131a455676 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -14,14 +14,15 @@
 # along with satpy.  If not, see <http://www.gnu.org/licenses/>.
 
 """Unit tests on the IASI NG L2 reader using the conventional mock constructed context."""
-import pytest
 import logging
 import os
-from unittest import mock
-from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler
 from datetime import datetime
-from satpy.readers import load_reader
+from unittest import mock
 
+import pytest
+
+from satpy.readers import load_reader
+from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler
 from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler
 
 logger = logging.getLogger(__name__)

From 3e7ec22142cfd3812249f38eaa11b4ab4685dc81 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 10:19:43 +0200
Subject: [PATCH 07/41] Fixing value filling process and updated unit tests

---
 satpy/readers/iasi_ng_l2_nc.py                |  24 ++--
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 133 +++++++++++++++++-
 2 files changed, 146 insertions(+), 11 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index c20c6289f2..7f2a72e1e2 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -146,21 +146,28 @@ def apply_fill_value(self, data_array, ds_info):
         if ds_info.get("apply_fill_value", True) is not True:
             return data_array
 
+        if data_array.dtype == np.int32:
+            data_array = data_array.astype(np.float32)
+        else:
+            raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}")
+
         attribs = data_array.attrs
 
+        nan_val = np.float32(np.nan)
+
         # Apply the min/max valid range:
         if "valid_min" in attribs:
             vmin = attribs["valid_min"]
-            data_array = data_array.where(data_array < vmin, np.float32(np.nan))
+            data_array = data_array.where(data_array >= vmin, other=nan_val)
 
         if "valid_max" in attribs:
             vmax = attribs["valid_max"]
-            data_array = data_array.where(data_array > vmax, np.float32(np.nan))
+            data_array = data_array.where(data_array <= vmax, other=nan_val)
 
         if "valid_range" in attribs:
             vrange = attribs["valid_range"]
-            data_array = data_array.where(data_array < vrange[0], np.float32(np.nan))
-            data_array = data_array.where(data_array > vrange[1], np.float32(np.nan))
+            data_array = data_array.where(data_array >= vrange[0], other=nan_val)
+            data_array = data_array.where(data_array <= vrange[1], other=nan_val)
 
         # Check the missing value:
         missing_val = attribs.get("missing_value", None)
@@ -169,7 +176,7 @@ def apply_fill_value(self, data_array, ds_info):
         if missing_val is None:
             return data_array
 
-        return data_array.where(data_array != missing_val, np.float32(np.nan))
+        return data_array.where(data_array != missing_val, nan_val)
 
     def apply_rescaling(self, data_array, ds_info):
         """Apply the rescaling transform on a given array."""
@@ -188,10 +195,9 @@ def apply_rescaling(self, data_array, ds_info):
             data_array = (data_array * scale_factor) + add_offset
 
             # rescale the valid range accordingly
-            if "valid_range" in attribs:
-                attribs["valid_range"] = (
-                    attribs["valid_range"] * scale_factor + add_offset
-                )
+            for key in ["valid_range", "valid_min", "valid_max"]:
+                if key in attribs:
+                    attribs[key] = attribs[key] * scale_factor + add_offset
 
             data_array.attrs.update(attribs)
 
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 131a455676..e920c497f1 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -19,8 +19,12 @@
 from datetime import datetime
 from unittest import mock
 
+import dask.array as da
+import numpy as np
 import pytest
+import xarray as xr
 
+from satpy import Scene
 from satpy.readers import load_reader
 from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler
 from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler
@@ -31,15 +35,84 @@
 class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
     """Fake base class for IASI NG handler"""
 
+    chunks = (10, 10, 10)  # Define the chunk size for dask array
+
+    def add_rand_data(self, desc):
+        """
+        Build a random DataArray from a given description, and add it as content.
+        """
+
+        # Create a lazy dask array with random int32 values
+        dtype = desc.get("data_type", "int32")
+        rand_min = desc.get("rand_min", 0)
+        rand_max = desc.get("rand_max", 100)
+        dims = desc["dims"]
+        key = desc["key"]
+
+        shape = tuple(dims.values())
+
+        if dtype == "int32":
+            dask_array = da.random.randint(
+                rand_min, rand_max, size=shape, chunks=self.chunks, dtype="int32"
+            )
+        else:
+            raise ValueError(f"Unsupported data type: {dtype}")
+
+        # Wrap the dask array with xarray.DataArray
+        data_array = xr.DataArray(dask_array, dims=list(dims.keys()))
+
+        data_array.attrs.update(desc.get("attribs", {}))
+
+        self.content[key] = data_array
+
     def get_test_content(self, _filename, _filename_info, _filetype_info):
         """Get the content of the test data.
 
         Here we generate the default content we want to provide depending
         on the provided filename infos.
         """
+        n_lines = 10
+        n_for = 14
+        n_fov = 16
+        def_dims = {"n_lines": n_lines, "n_for": n_for, "n_fov": n_fov}
+
+        self.content = {}
+
+        # Note: below we use the full range of int32 to generate the random
+        # values, we expect the handler to "fix" out of range values replacing
+        # them with NaNs.
+        self.add_rand_data(
+            {
+                "key": "data/geolocation_information/sounder_pixel_latitude",
+                "dims": def_dims,
+                "data_type": "int32",
+                "rand_min": -2147483647,
+                "rand_max": 2147483647,
+                "attribs": {
+                    "valid_min": -1800000000,
+                    "valid_max": 1800000000,
+                    "scale_factor": 5.0e-8,
+                    "add_offset": 0.0,
+                },
+            }
+        )
+        self.add_rand_data(
+            {
+                "key": "data/geolocation_information/sounder_pixel_longitude",
+                "dims": def_dims,
+                "data_type": "int32",
+                "rand_min": -2147483647,
+                "rand_max": 2147483647,
+                "attribs": {
+                    "valid_min": -1843200000,
+                    "valid_max": 1843200000,
+                    "scale_factor": 9.765625e-8,
+                    "add_offset": 0.0,
+                },
+            }
+        )
 
-        dset = {}
-        return dset
+        return self.content
 
 
 class TestIASINGL2NCReader:
@@ -72,6 +145,12 @@ def twv_handler(self):
         filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc"
         return self._create_file_handler(filename)
 
+    @pytest.fixture()
+    def twv_scene(self):
+        """Create a simple (and fake) satpy scene on a TWV product"""
+        filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc"
+        return Scene(filenames=[filename], reader=self.reader_name)
+
     def _create_file_handler(self, filename):
         """Create an handler for the given file checking that it can
         be parsed correctly"""
@@ -132,3 +211,53 @@ def test_sensor_names(self, twv_handler):
         """Test that the handler reports iasi_ng as sensor"""
 
         assert twv_handler.sensor_names == {"iasi_ng"}
+
+    def test_available_datasets(self, twv_scene):
+        """Test the list of available datasets in scene"""
+
+        dnames = twv_scene.available_dataset_names()
+
+        assert "latitude" in dnames
+        assert "longitude" in dnames
+
+    def test_latitude_dataset(self, twv_scene):
+        """Test loading the latitude dataset"""
+
+        twv_scene.load(["latitude"])
+        dset = twv_scene["latitude"]
+
+        # Should be 2D now:
+        assert len(dset.dims) == 2
+        assert dset.dims[0] == "x"
+        assert dset.dims[1] == "y"
+
+        # Should have been converted to float32:
+        assert dset.dtype == np.float32
+
+        # All valid values should be in range [-90.0,90.0]
+        vmin = np.nanmin(dset)
+        vmax = np.nanmax(dset)
+
+        assert vmin >= -90.0
+        assert vmax <= 90.0
+
+    def test_longitude_dataset(self, twv_scene):
+        """Test loading the longitude dataset"""
+
+        twv_scene.load(["longitude"])
+        dset = twv_scene["longitude"]
+
+        # Should be 2D now:
+        assert len(dset.dims) == 2
+        assert dset.dims[0] == "x"
+        assert dset.dims[1] == "y"
+
+        # Should have been converted to float32:
+        assert dset.dtype == np.float32
+
+        # All valid values should be in range [-90.0,90.0]
+        vmin = np.nanmin(dset)
+        vmax = np.nanmax(dset)
+
+        assert vmin >= -180.0
+        assert vmax <= 180.0

From e4ea66dbb0a2cfd3eb8367fe9994e4b12886a9d4 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 11:18:09 +0200
Subject: [PATCH 08/41] Adding support for missing value handling in unit test

---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index e920c497f1..cfc6aedcd7 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -58,10 +58,39 @@ def add_rand_data(self, desc):
         else:
             raise ValueError(f"Unsupported data type: {dtype}")
 
+        attribs = desc.get("attribs", {})
+
+        if "missing_value" in attribs:
+            # Force setting a few elements to this missing value (but still lazily with
+            # dask map_blocks function)
+            missing_value = attribs["missing_value"]
+
+            # Ratio of elements to replace with missing value:
+            missing_ratio = 0.05
+
+            def set_missing_values(block):
+                # Generate random indices to set as missing values within this block
+                block_shape = block.shape
+
+                block_size = np.prod(block_shape)
+                block_num_to_replace = int(block_size * missing_ratio)
+
+                # Generate unique random indices to set as missing values within this block
+                flat_indices = np.random.choice(
+                    block_size, block_num_to_replace, replace=False
+                )
+                unraveled_indices = np.unravel_index(flat_indices, block_shape)
+                block[unraveled_indices] = missing_value
+
+                return block
+
+            # Apply the function lazily to each block
+            dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype)
+
         # Wrap the dask array with xarray.DataArray
         data_array = xr.DataArray(dask_array, dims=list(dims.keys()))
 
-        data_array.attrs.update(desc.get("attribs", {}))
+        data_array.attrs.update(attribs)
 
         self.content[key] = data_array
 
@@ -93,6 +122,7 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
                     "valid_max": 1800000000,
                     "scale_factor": 5.0e-8,
                     "add_offset": 0.0,
+                    "missing_value": -2147483648,
                 },
             }
         )
@@ -108,6 +138,7 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
                     "valid_max": 1843200000,
                     "scale_factor": 9.765625e-8,
                     "add_offset": 0.0,
+                    "missing_value": -2147483648,
                 },
             }
         )

From e094de9a4e59d7ffe6d6c26974e7ca2043c86efc Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 11:40:06 +0200
Subject: [PATCH 09/41] Using float 64 for lat/lon datasets

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml           |  2 ++
 satpy/readers/iasi_ng_l2_nc.py                 | 11 ++++++++---
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py |  8 ++++----
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 0dcb3ee2d7..7c1dbc3f8a 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -18,5 +18,7 @@ file_types:
     datasets:
       latitude:
         location: data/geolocation_information/sounder_pixel_latitude
+        data_type: float64
       longitude:
         location: data/geolocation_information/sounder_pixel_longitude
+        data_type: float64
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 7f2a72e1e2..d2efd35712 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -146,14 +146,19 @@ def apply_fill_value(self, data_array, ds_info):
         if ds_info.get("apply_fill_value", True) is not True:
             return data_array
 
+        dtype = ds_info.get("data_type", "float32")
+
         if data_array.dtype == np.int32:
-            data_array = data_array.astype(np.float32)
+            data_array = data_array.astype(dtype)
         else:
             raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}")
 
-        attribs = data_array.attrs
+        if dtype not in ["float32", "float64"]:
+            # We won't be able to use NaN in the other cases:
+            return data_array
 
-        nan_val = np.float32(np.nan)
+        attribs = data_array.attrs
+        nan_val = np.nan if dtype == "float64" else np.float32(np.nan)
 
         # Apply the min/max valid range:
         if "valid_min" in attribs:
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index cfc6aedcd7..cbd68506f3 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -262,8 +262,8 @@ def test_latitude_dataset(self, twv_scene):
         assert dset.dims[0] == "x"
         assert dset.dims[1] == "y"
 
-        # Should have been converted to float32:
-        assert dset.dtype == np.float32
+        # Should have been converted to float64:
+        assert dset.dtype == np.float64
 
         # All valid values should be in range [-90.0,90.0]
         vmin = np.nanmin(dset)
@@ -283,8 +283,8 @@ def test_longitude_dataset(self, twv_scene):
         assert dset.dims[0] == "x"
         assert dset.dims[1] == "y"
 
-        # Should have been converted to float32:
-        assert dset.dtype == np.float32
+        # Should have been converted to float64:
+        assert dset.dtype == np.float64
 
         # All valid values should be in range [-90.0,90.0]
         vmin = np.nanmin(dset)

From 51727f8e8ef0873aea391a0c3c107193274f5269 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 11:51:56 +0200
Subject: [PATCH 10/41] Adding dedicated dataset descs array

---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 71 ++++++++++---------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index cbd68506f3..5ace5a0b37 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -31,6 +31,37 @@
 
 logger = logging.getLogger(__name__)
 
+DATASET_DESCS = [
+    {
+        "key": "data/geolocation_information/sounder_pixel_latitude",
+        "dims": ("n_lines", "n_for", "n_fov"),
+        "data_type": "int32",
+        "rand_min": -2147483647,
+        "rand_max": 2147483647,
+        "attribs": {
+            "valid_min": -1800000000,
+            "valid_max": 1800000000,
+            "scale_factor": 5.0e-8,
+            "add_offset": 0.0,
+            "missing_value": -2147483648,
+        },
+    },
+    {
+        "key": "data/geolocation_information/sounder_pixel_longitude",
+        "dims": ("n_lines", "n_for", "n_fov"),
+        "data_type": "int32",
+        "rand_min": -2147483647,
+        "rand_max": 2147483647,
+        "attribs": {
+            "valid_min": -1843200000,
+            "valid_max": 1843200000,
+            "scale_factor": 9.765625e-8,
+            "add_offset": 0.0,
+            "missing_value": -2147483648,
+        },
+    },
+]
+
 
 class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
     """Fake base class for IASI NG handler"""
@@ -49,7 +80,7 @@ def add_rand_data(self, desc):
         dims = desc["dims"]
         key = desc["key"]
 
-        shape = tuple(dims.values())
+        shape = [self.dims[k] for k in dims]
 
         if dtype == "int32":
             dask_array = da.random.randint(
@@ -88,7 +119,7 @@ def set_missing_values(block):
             dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype)
 
         # Wrap the dask array with xarray.DataArray
-        data_array = xr.DataArray(dask_array, dims=list(dims.keys()))
+        data_array = xr.DataArray(dask_array, dims=dims)
 
         data_array.attrs.update(attribs)
 
@@ -103,45 +134,15 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
         n_lines = 10
         n_for = 14
         n_fov = 16
-        def_dims = {"n_lines": n_lines, "n_for": n_for, "n_fov": n_fov}
+        self.dims = {"n_lines": n_lines, "n_for": n_for, "n_fov": n_fov}
 
         self.content = {}
 
         # Note: below we use the full range of int32 to generate the random
         # values, we expect the handler to "fix" out of range values replacing
         # them with NaNs.
-        self.add_rand_data(
-            {
-                "key": "data/geolocation_information/sounder_pixel_latitude",
-                "dims": def_dims,
-                "data_type": "int32",
-                "rand_min": -2147483647,
-                "rand_max": 2147483647,
-                "attribs": {
-                    "valid_min": -1800000000,
-                    "valid_max": 1800000000,
-                    "scale_factor": 5.0e-8,
-                    "add_offset": 0.0,
-                    "missing_value": -2147483648,
-                },
-            }
-        )
-        self.add_rand_data(
-            {
-                "key": "data/geolocation_information/sounder_pixel_longitude",
-                "dims": def_dims,
-                "data_type": "int32",
-                "rand_min": -2147483647,
-                "rand_max": 2147483647,
-                "attribs": {
-                    "valid_min": -1843200000,
-                    "valid_max": 1843200000,
-                    "scale_factor": 9.765625e-8,
-                    "add_offset": 0.0,
-                    "missing_value": -2147483648,
-                },
-            }
-        )
+        for desc in DATASET_DESCS:
+            self.add_rand_data(desc)
 
         return self.content
 

From 2e57be929761a19a2d1c617c035b02a4924cf729 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 13:43:45 +0200
Subject: [PATCH 11/41] Adding support for datetime conversion

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml          | 13 ++-
 satpy/readers/iasi_ng_l2_nc.py                | 35 ++++++-
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 99 ++++++++++++++++++-
 3 files changed, 140 insertions(+), 7 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 7c1dbc3f8a..7422716baa 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -16,9 +16,18 @@ file_types:
         "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
     datasets:
+      onboard_utc:
+        location: data/geolocation_information/onboard_utc
+        seconds_since_epoch: "2020-01-01"
       latitude:
         location: data/geolocation_information/sounder_pixel_latitude
-        data_type: float64
       longitude:
         location: data/geolocation_information/sounder_pixel_longitude
-        data_type: float64
+      azimuth:
+        location: data/geolocation_information/sounder_pixel_azimuth
+      zenith:
+        location: data/geolocation_information/sounder_pixel_zenith
+      sun_azimuth:
+        location: data/geolocation_information/sounder_pixel_sun_azimuth
+      sun_zenith:
+        location: data/geolocation_information/sounder_pixel_sun_zenith
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index d2efd35712..fd2319d6dc 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -26,8 +26,11 @@
 
 import logging
 
+import dask.array as da
 import netCDF4
 import numpy as np
+import pandas as pd
+import xarray as xr
 
 from .netcdf_utils import NetCDF4FsspecFileHandler
 
@@ -146,10 +149,13 @@ def apply_fill_value(self, data_array, ds_info):
         if ds_info.get("apply_fill_value", True) is not True:
             return data_array
 
-        dtype = ds_info.get("data_type", "float32")
+        dtype = ds_info.get("data_type", "float64")
 
         if data_array.dtype == np.int32:
             data_array = data_array.astype(dtype)
+        elif data_array.dtype == np.float64:
+            if dtype != "float64":
+                data_array = data_array.astype(dtype)
         else:
             raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}")
 
@@ -220,11 +226,33 @@ def apply_reshaping(self, data_array, ds_info):
         if ds_info.get("apply_reshaping", True) is not True:
             return data_array
 
+        if len(data_array.dims) > 2:
+            data_array = data_array.stack(y=(data_array.dims[1:]))
+
         if data_array.dims[0] != "x":
             data_array = data_array.rename({data_array.dims[0]: "x"})
 
-        if len(data_array.dims) > 2:
-            return data_array.stack(y=(data_array.dims[1:]))
+        if data_array.dims[1] != "y":
+            data_array = data_array.rename({data_array.dims[1]: "y"})
+
+        return data_array
+
+    def apply_to_datetime(self, data_array, ds_info):
+        """Convert the data to datetime values."""
+
+        if "seconds_since_epoch" not in ds_info:
+            return data_array
+
+        epoch = ds_info["seconds_since_epoch"]
+        # Note: below could convert the resulting data to another type
+        # with .astype("datetime64[us]") for instance
+        data_array = xr.DataArray(
+            data=da.from_array(
+                pd.to_datetime(epoch) + data_array.astype("timedelta64[s]")
+            ),
+            dims=data_array.dims,
+            attrs=data_array.attrs,
+        )
 
         return data_array
 
@@ -244,6 +272,7 @@ def get_transformed_dataset(self, ds_info):
         arr = self.apply_fill_value(arr, ds_info)
         arr = self.apply_rescaling(arr, ds_info)
         arr = self.apply_reshaping(arr, ds_info)
+        arr = self.apply_to_datetime(arr, ds_info)
 
         return arr
 
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 5ace5a0b37..981f71f481 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -32,6 +32,18 @@
 logger = logging.getLogger(__name__)
 
 DATASET_DESCS = [
+    {
+        "key": "data/geolocation_information/onboard_utc",
+        "dims": ("n_lines", "n_for"),
+        "data_type": "float64",
+        "rand_min": -1100000000.0,
+        "rand_max": 1100000000.0,
+        "attribs": {
+            "valid_min": -1000000000.0,
+            "valid_max": 1000000000.0,
+            "missing_value": -9000000000.0,
+        },
+    },
     {
         "key": "data/geolocation_information/sounder_pixel_latitude",
         "dims": ("n_lines", "n_for", "n_fov"),
@@ -60,13 +72,69 @@
             "missing_value": -2147483648,
         },
     },
+    {
+        "key": "data/geolocation_information/sounder_pixel_sun_azimuth",
+        "dims": ("n_lines", "n_for", "n_fov"),
+        "data_type": "int32",
+        "rand_min": -2147483647,
+        "rand_max": 2147483647,
+        "attribs": {
+            "valid_min": -1843200000,
+            "valid_max": 1843200000,
+            "scale_factor": 9.765625e-8,
+            "add_offset": 0.0,
+            "missing_value": -2147483648,
+        },
+    },
+    {
+        "key": "data/geolocation_information/sounder_pixel_sun_zenith",
+        "dims": ("n_lines", "n_for", "n_fov"),
+        "data_type": "int32",
+        "rand_min": -2147483647,
+        "rand_max": 2147483647,
+        "attribs": {
+            "valid_min": -1800000000,
+            "valid_max": 1800000000,
+            "scale_factor": 5.0e-8,
+            "add_offset": 90.0,
+            "missing_value": -2147483648,
+        },
+    },
+    {
+        "key": "data/geolocation_information/sounder_pixel_zenith",
+        "dims": ("n_lines", "n_for", "n_fov"),
+        "data_type": "int32",
+        "rand_min": -2147483647,
+        "rand_max": 2147483647,
+        "attribs": {
+            "valid_min": -1800000000,
+            "valid_max": 1800000000,
+            "scale_factor": 2.5e-8,
+            "add_offset": 45.0,
+            "missing_value": -2147483648,
+        },
+    },
+    {
+        "key": "data/geolocation_information/sounder_pixel_azimuth",
+        "dims": ("n_lines", "n_for", "n_fov"),
+        "data_type": "int32",
+        "rand_min": -2147483647,
+        "rand_max": 2147483647,
+        "attribs": {
+            "valid_min": -1843200000,
+            "valid_max": 1843200000,
+            "scale_factor": 9.765625e-8,
+            "add_offset": 0.0,
+            "missing_value": -2147483648,
+        },
+    },
 ]
 
 
 class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
     """Fake base class for IASI NG handler"""
 
-    chunks = (10, 10, 10)  # Define the chunk size for dask array
+    chunks = (10, 10, 10)
 
     def add_rand_data(self, desc):
         """
@@ -82,10 +150,18 @@ def add_rand_data(self, desc):
 
         shape = [self.dims[k] for k in dims]
 
+        # Define the chunk size for dask array
+        chunks = [10] * len(dims)
+
         if dtype == "int32":
             dask_array = da.random.randint(
-                rand_min, rand_max, size=shape, chunks=self.chunks, dtype="int32"
+                rand_min, rand_max, size=shape, chunks=chunks, dtype=np.int32
             )
+        elif dtype == "float64":
+            dask_array = da.random.random(shape, chunks=chunks)
+
+            # Scale and shift to the desired range [min_val, max_val]
+            dask_array = dask_array * (rand_max - rand_min) + rand_min
         else:
             raise ValueError(f"Unsupported data type: {dtype}")
 
@@ -249,8 +325,13 @@ def test_available_datasets(self, twv_scene):
 
         dnames = twv_scene.available_dataset_names()
 
+        assert "onboard_utc" in dnames
         assert "latitude" in dnames
         assert "longitude" in dnames
+        assert "azimuth" in dnames
+        assert "zenith" in dnames
+        assert "sun_azimuth" in dnames
+        assert "sun_zenith" in dnames
 
     def test_latitude_dataset(self, twv_scene):
         """Test loading the latitude dataset"""
@@ -293,3 +374,17 @@ def test_longitude_dataset(self, twv_scene):
 
         assert vmin >= -180.0
         assert vmax <= 180.0
+
+    def test_onboard_utc_dataset(self, twv_scene):
+        """Test loading the onboard_utc dataset"""
+
+        twv_scene.load(["onboard_utc"])
+        dset = twv_scene["onboard_utc"]
+
+        # Should be 2D now:
+        assert len(dset.dims) == 2
+        assert dset.dims[0] == "x"
+        assert dset.dims[1] == "y"
+
+        # Should have been converted to datetime:
+        assert dset.dtype == np.dtype("datetime64[ns]")

From d98c9bf6e8f4a2151335805ae4f85b70ebe834ed Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 14:57:15 +0200
Subject: [PATCH 12/41] Optimized data description in unit test

---
 satpy/readers/iasi_ng_l2_nc.py                |  19 +-
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 192 +++++++++---------
 2 files changed, 109 insertions(+), 102 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index fd2319d6dc..33825ae6d0 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -149,16 +149,27 @@ def apply_fill_value(self, data_array, ds_info):
         if ds_info.get("apply_fill_value", True) is not True:
             return data_array
 
-        dtype = ds_info.get("data_type", "float64")
+        dtype = ds_info.get("data_type", "auto")
+        convert = False
 
         if data_array.dtype == np.int32:
-            data_array = data_array.astype(dtype)
+            if dtype == "auto":
+                dtype = "float64"
+            convert = dtype != "int32"
         elif data_array.dtype == np.float64:
-            if dtype != "float64":
-                data_array = data_array.astype(dtype)
+            if dtype == "auto":
+                dtype = "float64"
+            convert = dtype != "float64"
+        elif data_array.dtype == np.float32:
+            if dtype == "auto":
+                dtype = "float32"
+            convert = dtype != "float32"
         else:
             raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}")
 
+        if convert:
+            data_array = data_array.astype(dtype)
+
         if dtype not in ["float32", "float64"]:
             # We won't be able to use NaN in the other cases:
             return data_array
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 981f71f481..0c81d0df5a 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -31,101 +31,72 @@
 
 logger = logging.getLogger(__name__)
 
-DATASET_DESCS = [
-    {
-        "key": "data/geolocation_information/onboard_utc",
-        "dims": ("n_lines", "n_for"),
-        "data_type": "float64",
-        "rand_min": -1100000000.0,
-        "rand_max": 1100000000.0,
-        "attribs": {
-            "valid_min": -1000000000.0,
-            "valid_max": 1000000000.0,
-            "missing_value": -9000000000.0,
-        },
-    },
-    {
-        "key": "data/geolocation_information/sounder_pixel_latitude",
-        "dims": ("n_lines", "n_for", "n_fov"),
-        "data_type": "int32",
-        "rand_min": -2147483647,
-        "rand_max": 2147483647,
-        "attribs": {
-            "valid_min": -1800000000,
-            "valid_max": 1800000000,
-            "scale_factor": 5.0e-8,
-            "add_offset": 0.0,
-            "missing_value": -2147483648,
-        },
-    },
-    {
-        "key": "data/geolocation_information/sounder_pixel_longitude",
-        "dims": ("n_lines", "n_for", "n_fov"),
-        "data_type": "int32",
-        "rand_min": -2147483647,
-        "rand_max": 2147483647,
-        "attribs": {
-            "valid_min": -1843200000,
-            "valid_max": 1843200000,
-            "scale_factor": 9.765625e-8,
-            "add_offset": 0.0,
-            "missing_value": -2147483648,
-        },
-    },
-    {
-        "key": "data/geolocation_information/sounder_pixel_sun_azimuth",
-        "dims": ("n_lines", "n_for", "n_fov"),
-        "data_type": "int32",
-        "rand_min": -2147483647,
-        "rand_max": 2147483647,
-        "attribs": {
-            "valid_min": -1843200000,
-            "valid_max": 1843200000,
-            "scale_factor": 9.765625e-8,
-            "add_offset": 0.0,
-            "missing_value": -2147483648,
-        },
-    },
-    {
-        "key": "data/geolocation_information/sounder_pixel_sun_zenith",
-        "dims": ("n_lines", "n_for", "n_fov"),
-        "data_type": "int32",
-        "rand_min": -2147483647,
-        "rand_max": 2147483647,
-        "attribs": {
-            "valid_min": -1800000000,
-            "valid_max": 1800000000,
-            "scale_factor": 5.0e-8,
-            "add_offset": 90.0,
-            "missing_value": -2147483648,
-        },
-    },
+d_lff = ("n_lines", "n_for", "n_fov")
+
+DATA_DESC = [
     {
-        "key": "data/geolocation_information/sounder_pixel_zenith",
-        "dims": ("n_lines", "n_for", "n_fov"),
-        "data_type": "int32",
-        "rand_min": -2147483647,
-        "rand_max": 2147483647,
-        "attribs": {
-            "valid_min": -1800000000,
-            "valid_max": 1800000000,
-            "scale_factor": 2.5e-8,
-            "add_offset": 45.0,
-            "missing_value": -2147483648,
+        "group": "data/geolocation_information",
+        "attrs": [
+            "unit",
+            "valid_min",
+            "valid_max",
+            "scale_factor",
+            "add_offset",
+            "missing_value",
+        ],
+        "variables": {
+            "onboard_utc": [
+                ("n_lines", "n_for"),
+                "float64",
+                ("seconds", -1e9, 1e9, None, None, -9e9),
+            ],
+            "sounder_pixel_latitude": [
+                d_lff,
+                "int32",
+                ("degrees_north", -18e8, 18e8, 5.0e-8, 0.0, -2147483648),
+            ],
+            "sounder_pixel_longitude": [
+                d_lff,
+                "int32",
+                ("degrees_east", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648),
+            ],
+            "sounder_pixel_sun_azimuth": [
+                d_lff,
+                "int32",
+                ("degrees", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648),
+            ],
+            "sounder_pixel_sun_zenith": [
+                d_lff,
+                "int32",
+                ("degrees", -18e8, 18e8, 5.0e-8, 90.0, -2147483648),
+            ],
+            "sounder_pixel_azimuth": [
+                d_lff,
+                "int32",
+                ("degrees", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648),
+            ],
+            "sounder_pixel_zenith": [
+                d_lff,
+                "int32",
+                ("degrees", -18e8, 18e8, 2.5e-8, 45.0, -2147483648),
+            ],
         },
     },
     {
-        "key": "data/geolocation_information/sounder_pixel_azimuth",
-        "dims": ("n_lines", "n_for", "n_fov"),
-        "data_type": "int32",
-        "rand_min": -2147483647,
-        "rand_max": 2147483647,
-        "attribs": {
-            "valid_min": -1843200000,
-            "valid_max": 1843200000,
-            "scale_factor": 9.765625e-8,
-            "add_offset": 0.0,
-            "missing_value": -2147483648,
+        "group": "data/statistical_retrieval",
+        "attrs": ["unit", "valid_min", "valid_max", "missing_value"],
+        "variables": {
+            "air_temperature": [d_lff, "float32", ("K", 100.0, 400.0, 3.4e38)],
+            "atmosphere_mass_content_of_water": [
+                d_lff,
+                "float32",
+                ("kg.m-2", 0, 300, 3.4e38),
+            ],
+            "qi_air_temperature": [d_lff, "float32", ("", 0, 25, 3.4e38)],
+            "qi_speciﬁc_humidity": [d_lff, "float32", ("", 0, 25, 3.4e38)],
+            "speciﬁc_humidity": [d_lff, "float32", ("kg/kg", 0, 1, 3.4e38)],
+            "surface_air_temperature": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
+            "surface_speciﬁc_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
         },
     },
 ]
@@ -143,8 +114,6 @@ def add_rand_data(self, desc):
 
         # Create a lazy dask array with random int32 values
         dtype = desc.get("data_type", "int32")
-        rand_min = desc.get("rand_min", 0)
-        rand_max = desc.get("rand_max", 100)
         dims = desc["dims"]
         key = desc["key"]
 
@@ -152,21 +121,30 @@ def add_rand_data(self, desc):
 
         # Define the chunk size for dask array
         chunks = [10] * len(dims)
+        attribs = desc["attribs"]
 
         if dtype == "int32":
+            rand_min = -2147483647
+            rand_max = 2147483647
             dask_array = da.random.randint(
                 rand_min, rand_max, size=shape, chunks=chunks, dtype=np.int32
             )
-        elif dtype == "float64":
+        elif dtype == "float64" or dtype == "float32":
             dask_array = da.random.random(shape, chunks=chunks)
 
+            if dtype == "float32":
+                dask_array = dask_array.astype(np.float32)
+
+            vmin = attribs["valid_min"]
+            vmax = attribs["valid_max"]
+            rand_min = vmin - (vmax - vmin) * 0.1
+            rand_max = vmax + (vmax - vmin) * 0.1
+
             # Scale and shift to the desired range [min_val, max_val]
             dask_array = dask_array * (rand_max - rand_min) + rand_min
         else:
             raise ValueError(f"Unsupported data type: {dtype}")
 
-        attribs = desc.get("attribs", {})
-
         if "missing_value" in attribs:
             # Force setting a few elements to this missing value (but still lazily with
             # dask map_blocks function)
@@ -217,8 +195,26 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
         # Note: below we use the full range of int32 to generate the random
         # values, we expect the handler to "fix" out of range values replacing
         # them with NaNs.
-        for desc in DATASET_DESCS:
-            self.add_rand_data(desc)
+        for grp_desc in DATA_DESC:
+            # get the group prefix:
+            prefix = grp_desc["group"]
+            anames = grp_desc["attrs"]
+            for vname, vdesc in grp_desc["variables"].items():
+                # For each variable we create a dataset descriptor:
+                attribs = {}
+                for idx, val in enumerate(vdesc[2]):
+                    # AEnsure we do not inject "None" attribute values:
+                    if val is not None:
+                        attribs[anames[idx]] = val
+
+                desc = {
+                    "key": f"{prefix}/{vname}",
+                    "dims": vdesc[0],
+                    "data_type": vdesc[1],
+                    "attribs": attribs,
+                }
+
+                self.add_rand_data(desc)
 
         return self.content
 

From 7f648d82f9b74f34e79960ba976267770a33be3c Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 15:44:46 +0200
Subject: [PATCH 13/41] Adding support for dataset renaming

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml | 36 ++++++++++++++++------------
 satpy/readers/iasi_ng_l2_nc.py       | 19 +++++++++++++--
 2 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 7422716baa..2a4193f58c 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -15,19 +15,25 @@ file_types:
       [
         "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
+
+    onboard_utc_epoch: "2020-01-01"
+
     datasets:
-      onboard_utc:
-        location: data/geolocation_information/onboard_utc
-        seconds_since_epoch: "2020-01-01"
-      latitude:
-        location: data/geolocation_information/sounder_pixel_latitude
-      longitude:
-        location: data/geolocation_information/sounder_pixel_longitude
-      azimuth:
-        location: data/geolocation_information/sounder_pixel_azimuth
-      zenith:
-        location: data/geolocation_information/sounder_pixel_zenith
-      sun_azimuth:
-        location: data/geolocation_information/sounder_pixel_sun_azimuth
-      sun_zenith:
-        location: data/geolocation_information/sounder_pixel_sun_zenith
+      - group: data/geolocation_information
+        variables:
+          - onboard_utc
+          - sounder_pixel_latitude:latitude
+          - sounder_pixel_longitude:longitude
+          - sounder_pixel_azimuth:azimuth
+          - sounder_pixel_zenith:zenith
+          - sounder_pixel_sun_azimuth:sun_azimuth
+          - sounder_pixel_sun_zenith:sun_zenith
+      - group: data/statistical_retrieval
+        variables:
+          - air_temperature
+          - atmosphere_mass_content_of_water
+          - qi_air_temperature
+          - qi_specific_humidity
+          - specific_humidity
+          - surface_air_temperature
+          - surface_specific_humidity
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 33825ae6d0..f3fdf9c50a 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -110,8 +110,23 @@ def register_available_datasets(self):
         # Otherwise, we need to perform the registration:
         self.dataset_infos = {}
 
-        for ds_name, desc in self.ds_desc.items():
-            self.register_dataset(ds_name, desc)
+        for grp_desc in self.ds_desc:
+            prefix = grp_desc["group"]
+            for vname in grp_desc["variables"]:
+                # Check if we have an alias for this variable:
+                ds_name = vname
+                if ":" in vname:
+                    vname, ds_name = vname.split(":")
+
+                desc = {"location": f"{prefix}/{vname}"}
+
+                if ds_name == "onboard_utc":
+                    # add the seconds_since_epoch:
+                    desc["seconds_since_epoch"] = self.filetype_info["onboard_utc_epoch"]
+
+                # print(f"Registering {ds_name} with desc: {desc}")
+
+                self.register_dataset(ds_name, desc)
 
     def get_dataset_infos(self, ds_name):
         """Retrieve the dataset infos corresponding to one of the registered

From 2eb0311be24e16998e1abb32d64ed2b9df5367d0 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 15:46:52 +0200
Subject: [PATCH 14/41] Replacing dataset alais symbol in yaml file

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml | 12 ++++++------
 satpy/readers/iasi_ng_l2_nc.py       |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 2a4193f58c..300d3c0967 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -22,12 +22,12 @@ file_types:
       - group: data/geolocation_information
         variables:
           - onboard_utc
-          - sounder_pixel_latitude:latitude
-          - sounder_pixel_longitude:longitude
-          - sounder_pixel_azimuth:azimuth
-          - sounder_pixel_zenith:zenith
-          - sounder_pixel_sun_azimuth:sun_azimuth
-          - sounder_pixel_sun_zenith:sun_zenith
+          - sounder_pixel_latitude => latitude
+          - sounder_pixel_longitude => longitude
+          - sounder_pixel_azimuth => azimuth
+          - sounder_pixel_zenith => zenith
+          - sounder_pixel_sun_azimuth => sun_azimuth
+          - sounder_pixel_sun_zenith => sun_zenith
       - group: data/statistical_retrieval
         variables:
           - air_temperature
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index f3fdf9c50a..f64d4c5170 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -115,8 +115,8 @@ def register_available_datasets(self):
             for vname in grp_desc["variables"]:
                 # Check if we have an alias for this variable:
                 ds_name = vname
-                if ":" in vname:
-                    vname, ds_name = vname.split(":")
+                if "=>" in vname:
+                    vname, ds_name = (v.strip() for v in vname.split("=>"))
 
                 desc = {"location": f"{prefix}/{vname}"}
 

From 86dd68f2cd76c8d8e906b0d1d56a8d80b097f1ac Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 15:49:28 +0200
Subject: [PATCH 15/41] Updating unit test on available datasets

---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 0c81d0df5a..c09245c903 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -321,13 +321,25 @@ def test_available_datasets(self, twv_scene):
 
         dnames = twv_scene.available_dataset_names()
 
-        assert "onboard_utc" in dnames
-        assert "latitude" in dnames
-        assert "longitude" in dnames
-        assert "azimuth" in dnames
-        assert "zenith" in dnames
-        assert "sun_azimuth" in dnames
-        assert "sun_zenith" in dnames
+        expected_names = [
+            "onboard_utc",
+            "latitude",
+            "longitude",
+            "azimuth",
+            "zenith",
+            "sun_azimuth",
+            "sun_zenith",
+            "air_temperature",
+            "atmosphere_mass_content_of_water",
+            "qi_air_temperature",
+            "qi_specific_humidity",
+            "specific_humidity",
+            "surface_air_temperature",
+            "surface_specific_humidity",
+        ]
+
+        for dname in expected_names:
+            assert dname in dnames
 
     def test_latitude_dataset(self, twv_scene):
         """Test loading the latitude dataset"""

From 34e03edbfcd094ca4a973134cae8f92fa506fb3c Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 16:08:52 +0200
Subject: [PATCH 16/41] Adding diagnostic variables

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml          |  6 +++++
 satpy/readers/iasi_ng_l2_nc.py                |  9 +++++--
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 26 +++++++++++++++++--
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 300d3c0967..db8e3e9fb0 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -19,6 +19,12 @@ file_types:
     onboard_utc_epoch: "2020-01-01"
 
     datasets:
+      - group: data/diagnostics
+        variables:
+          - fg_cost
+          - nbr_iterations
+          - rt_cost_x
+          - rt_cost_y
       - group: data/geolocation_information
         variables:
           - onboard_utc
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index f64d4c5170..1b93bea2d6 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -166,10 +166,16 @@ def apply_fill_value(self, data_array, ds_info):
 
         dtype = ds_info.get("data_type", "auto")
         convert = False
+        attribs = data_array.attrs
 
         if data_array.dtype == np.int32:
             if dtype == "auto":
-                dtype = "float64"
+                # Only convert to float if we have the scale factor or add_offset:
+                dtype = (
+                    "float64"
+                    if ("scale_factor" in attribs or "add_offset" in attribs)
+                    else "int32"
+                )
             convert = dtype != "int32"
         elif data_array.dtype == np.float64:
             if dtype == "auto":
@@ -189,7 +195,6 @@ def apply_fill_value(self, data_array, ds_info):
             # We won't be able to use NaN in the other cases:
             return data_array
 
-        attribs = data_array.attrs
         nan_val = np.nan if dtype == "float64" else np.float32(np.nan)
 
         # Apply the min/max valid range:
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index c09245c903..96450366fe 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -34,6 +34,16 @@
 d_lff = ("n_lines", "n_for", "n_fov")
 
 DATA_DESC = [
+    {
+        "group": "data/diagnostics",
+        "attrs": ["missing_value"],
+        "variables": {
+            "fg_cost": [d_lff, "float32", (3.4e38,)],
+            "nbr_iterations": [d_lff, "int32", (2147483647,)],
+            "rt_cost_x": [d_lff, "float32", (3.4e38,)],
+            "rt_cost_y": [d_lff, "float32", (3.4e38,)],
+        },
+    },
     {
         "group": "data/geolocation_information",
         "attrs": [
@@ -135,8 +145,10 @@ def add_rand_data(self, desc):
             if dtype == "float32":
                 dask_array = dask_array.astype(np.float32)
 
-            vmin = attribs["valid_min"]
-            vmax = attribs["valid_max"]
+            # Max flaot32 value is 3.4028235e38, we use a smaller value
+            # by default here:
+            vmin = attribs.get("valid_min", -1e16)
+            vmax = attribs.get("valid_max", 1e16)
             rand_min = vmin - (vmax - vmin) * 0.1
             rand_max = vmax + (vmax - vmin) * 0.1
 
@@ -396,3 +408,13 @@ def test_onboard_utc_dataset(self, twv_scene):
 
         # Should have been converted to datetime:
         assert dset.dtype == np.dtype("datetime64[ns]")
+
+    def test_nbr_iterations_dataset(self, twv_scene):
+        """Test loading the nbr_iterations dataset"""
+
+        twv_scene.load(["nbr_iterations"])
+        dset = twv_scene["nbr_iterations"]
+
+        assert len(dset.dims) == 2
+        # Should still be in int32 type:
+        assert dset.dtype == np.int32

From b80f68cca05783f4c2fc94db32eb0052217b0030 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 16:12:35 +0200
Subject: [PATCH 17/41] Adding surface info datasets

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml           |  6 ++++++
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 10 ++++++++++
 2 files changed, 16 insertions(+)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index db8e3e9fb0..2ff74e8ffc 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -43,3 +43,9 @@ file_types:
           - specific_humidity
           - surface_air_temperature
           - surface_specific_humidity
+      - group: data/surface_info
+        variables:
+          - height
+          - height_std
+          - ice_fraction
+          - land_fraction
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 96450366fe..5a11c1913b 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -109,6 +109,16 @@
             "surface_speciﬁc_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
         },
     },
+    {
+        "group": "data/surface_info",
+        "attrs": ["unit", "valid_min", "valid_max", "missing_value"],
+        "variables": {
+            "height": [d_lff, "float32", ("m", -418, 8848, 3.4e38)],
+            "height_std": [d_lff, "float32", ("m", 0, 999.0, 3.4e38)],
+            "ice_fraction": [d_lff, "float32", ("%", 0, 100.0, 3.4e38)],
+            "land_fraction": [d_lff, "float32", ("%", 0, 100.0, 3.4e38)],
+        },
+    },
 ]
 
 

From aa2988a7058b043ae9fa2ccb23a76630deb3f0a8 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 21:42:23 +0200
Subject: [PATCH 18/41] Adding support to broadcast onboard_utc variable

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml           | 1 +
 satpy/readers/iasi_ng_l2_nc.py                 | 6 ++++++
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 7 ++++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 2ff74e8ffc..caf9749624 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -17,6 +17,7 @@ file_types:
       ]
 
     onboard_utc_epoch: "2020-01-01"
+    onboard_utc_broadcast: true
 
     datasets:
       - group: data/diagnostics
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 1b93bea2d6..5a6d921290 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -285,6 +285,12 @@ def apply_to_datetime(self, data_array, ds_info):
             attrs=data_array.attrs,
         )
 
+        # print(f"file_content keys: {self.file_content.keys()}")
+        lat_shape = self["data/geolocation_information/sounder_pixel_latitude"].shape
+
+        # Apply "repeat" with the last dimension size:
+        data_array = xr.concat([data_array] * lat_shape[2], dim=data_array.dims[-1])
+
         return data_array
 
     def get_transformed_dataset(self, ds_info):
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 5a11c1913b..b28cbd49c4 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -408,7 +408,7 @@ def test_longitude_dataset(self, twv_scene):
     def test_onboard_utc_dataset(self, twv_scene):
         """Test loading the onboard_utc dataset"""
 
-        twv_scene.load(["onboard_utc"])
+        twv_scene.load(["onboard_utc", "latitude"])
         dset = twv_scene["onboard_utc"]
 
         # Should be 2D now:
@@ -419,6 +419,11 @@ def test_onboard_utc_dataset(self, twv_scene):
         # Should have been converted to datetime:
         assert dset.dtype == np.dtype("datetime64[ns]")
 
+        # Should also have the same size as "lattitude" for instance:
+        lat = twv_scene["latitude"]
+
+        assert lat.shape == dset.shape
+
     def test_nbr_iterations_dataset(self, twv_scene):
         """Test loading the nbr_iterations dataset"""
 

From 9c2f3ecd376d612f435674b5a67e8fe35683c45b Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 8 Aug 2024 23:25:28 +0200
Subject: [PATCH 19/41] fixing warning in unit test

---
 satpy/readers/iasi_ng_l2_nc.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 5a6d921290..c013a5d40b 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -278,9 +278,7 @@ def apply_to_datetime(self, data_array, ds_info):
         # Note: below could convert the resulting data to another type
         # with .astype("datetime64[us]") for instance
         data_array = xr.DataArray(
-            data=da.from_array(
-                pd.to_datetime(epoch) + data_array.astype("timedelta64[s]")
-            ),
+            data=pd.to_datetime(epoch) + data_array.astype("timedelta64[s]"),
             dims=data_array.dims,
             attrs=data_array.attrs,
         )

From b113750c5b729532732c1bc971877bfdde215b92 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 09:58:22 +0200
Subject: [PATCH 20/41] Optimized variables discovery process.

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml          |  36 +-----
 satpy/readers/iasi_ng_l2_nc.py                | 119 +++++++++++++++---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  |  44 ++++---
 3 files changed, 130 insertions(+), 69 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index caf9749624..59ef9a8227 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -16,37 +16,7 @@ file_types:
         "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
 
-    onboard_utc_epoch: "2020-01-01"
-    onboard_utc_broadcast: true
+    ignored_patterns:
+      - /optimal_estimation/ # cf. eum respase2 task2 issue #4
 
-    datasets:
-      - group: data/diagnostics
-        variables:
-          - fg_cost
-          - nbr_iterations
-          - rt_cost_x
-          - rt_cost_y
-      - group: data/geolocation_information
-        variables:
-          - onboard_utc
-          - sounder_pixel_latitude => latitude
-          - sounder_pixel_longitude => longitude
-          - sounder_pixel_azimuth => azimuth
-          - sounder_pixel_zenith => zenith
-          - sounder_pixel_sun_azimuth => sun_azimuth
-          - sounder_pixel_sun_zenith => sun_zenith
-      - group: data/statistical_retrieval
-        variables:
-          - air_temperature
-          - atmosphere_mass_content_of_water
-          - qi_air_temperature
-          - qi_specific_humidity
-          - specific_humidity
-          - surface_air_temperature
-          - surface_specific_humidity
-      - group: data/surface_info
-        variables:
-          - height
-          - height_std
-          - ice_fraction
-          - land_fraction
+    onboard_utc_broadcast: true
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index c013a5d40b..950e16e422 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -25,8 +25,8 @@
 """
 
 import logging
+import re
 
-import dask.array as da
 import netCDF4
 import numpy as np
 import pandas as pd
@@ -49,11 +49,18 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs):
         # List of datasets provided by this handler:
         self.dataset_infos = None
 
-        # Description of the available datasets:
-        self.ds_desc = self.filetype_info["datasets"]
+        # Description of the available variables:
+        self.variable_desc = {}
 
-        logger.info("Creating reader with infos: %s", filename_info)
+        # Description of the available dimensions:
+        self.dimensions_desc = {}
 
+        # Ignored variable patterns:
+        patterns = self.filetype_info.get("ignored_patterns", [])
+        self.ignored_patterns = [re.compile(pstr) for pstr in patterns]
+
+        # dataset aliases:
+        self.dataset_aliases = self.filetype_info.get("dataset_aliases", {})
         self.register_available_datasets()
 
     @property
@@ -71,6 +78,16 @@ def sensor_names(self):
         """List of sensors represented in this file."""
         return self.sensors
 
+    # Note: patching the collect_groups_info method below to
+    # also collect dimensions in sub groups.
+    def _collect_groups_info(self, base_name, obj):
+        for group_name, group_obj in obj.groups.items():
+            full_group_name = base_name + group_name
+            self.file_content[full_group_name] = group_obj
+            self._collect_attrs(full_group_name, group_obj)
+            self.collect_metadata(full_group_name, group_obj)
+            self.collect_dimensions(full_group_name, group_obj)
+
     def available_datasets(self, configured_datasets=None):
         """Determine automatically the datasets provided by this file.
 
@@ -100,6 +117,72 @@ def register_dataset(self, ds_name, desc):
 
         self.dataset_infos[ds_name] = ds_infos
 
+    def parse_file_content(self):
+        """Parse the file_content to discover the available datasets and dimensions"""
+
+        for key, val in self.file_content.items():
+            # print(f"Found key: {key}")
+
+            if "/dimension/" in key:
+                dim_name = key.split("/")[-1]
+                # print(f"Found dimension: {dim_name}={val}")
+                if (
+                    dim_name in self.dimensions_desc
+                    and self.dimensions_desc[dim_name] != val
+                ):
+                    # This might happen if we have the same dim name from different groups:
+                    raise ValueError(f"Detected duplicated dim name: {dim_name}")
+
+                self.dimensions_desc[dim_name] = val
+                continue
+
+            if "/attr/" in key:
+                var_path, aname = key.split("/attr/")
+                # print(f"Found attrib for: {var_path}: {aname}")
+
+                if var_path not in self.variable_desc:
+                    # maybe this variable is ignored, or this is a group attr.
+                    continue
+
+                self.variable_desc[var_path]["attribs"][aname] = val
+                continue
+
+            # if isinstance(val, netCDF4.Variable):
+            if f"{key}/shape" in self.file_content:
+                # print(f"Found variable: {key}")
+
+                shape = self.file_content[f"{key}/shape"]
+                if np.prod(shape) <= 1:
+                    # print(f"Ignoring scalar variable {key}")
+                    continue
+
+                # Check if this variable should be ignored:
+                if any(p.search(key) is not None for p in self.ignored_patterns):
+                    # print(f"Ignoring variable {key}")
+                    continue
+
+                # Prepare a description for this variable:
+                prefix, var_name = key.rsplit("/", 1)
+                dims = self.file_content[f"{key}/dimensions"]
+                dtype = self.file_content[f"{key}/dtype"]
+
+                desc = {
+                    "location": key,
+                    "prefix": prefix,
+                    "var_name": var_name,
+                    "shape": shape,
+                    "dtype": f"{dtype}",
+                    "dims": dims,
+                    "attribs": {},
+                }
+
+                self.variable_desc[key] = desc
+                continue
+
+        print(f"Found {len(self.variable_desc)} variables:")
+        for vpath, desc in self.variable_desc.items():
+            print(f"{vpath}: {desc}")
+
     def register_available_datasets(self):
         """Register the available dataset in the current product file"""
 
@@ -110,23 +193,25 @@ def register_available_datasets(self):
         # Otherwise, we need to perform the registration:
         self.dataset_infos = {}
 
-        for grp_desc in self.ds_desc:
-            prefix = grp_desc["group"]
-            for vname in grp_desc["variables"]:
-                # Check if we have an alias for this variable:
-                ds_name = vname
-                if "=>" in vname:
-                    vname, ds_name = (v.strip() for v in vname.split("=>"))
+        # Parse the file content:
+        self.parse_file_content()
 
-                desc = {"location": f"{prefix}/{vname}"}
+        for vpath, desc in self.variable_desc.items():
+            # Check if we have an alias for this variable:
+            ds_name = desc["var_name"]
+            if vpath in self.dataset_aliases:
+                ds_name = self.dataset_aliases[vpath]
 
-                if ds_name == "onboard_utc":
-                    # add the seconds_since_epoch:
-                    desc["seconds_since_epoch"] = self.filetype_info["onboard_utc_epoch"]
+            unit = desc["attribs"].get("units", None)
 
-                # print(f"Registering {ds_name} with desc: {desc}")
+            if unit is not None and unit.startswith("seconds since "):
+                # request conversion to datetime:
+                desc["seconds_since_epoch"] = unit.replace("seconds since ", "")
+                print(
+                    f"Converting {ds_name} in secs from epoch {desc['seconds_since_epoch']}"
+                )
 
-                self.register_dataset(ds_name, desc)
+            self.register_dataset(ds_name, desc)
 
     def get_dataset_infos(self, ds_name):
         """Retrieve the dataset infos corresponding to one of the registered
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index b28cbd49c4..16dbaa0c23 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -47,7 +47,7 @@
     {
         "group": "data/geolocation_information",
         "attrs": [
-            "unit",
+            "units",
             "valid_min",
             "valid_max",
             "scale_factor",
@@ -58,7 +58,7 @@
             "onboard_utc": [
                 ("n_lines", "n_for"),
                 "float64",
-                ("seconds", -1e9, 1e9, None, None, -9e9),
+                ("seconds since 2020-01-01 00:00:00.000", -1e9, 1e9, None, None, -9e9),
             ],
             "sounder_pixel_latitude": [
                 d_lff,
@@ -94,7 +94,7 @@
     },
     {
         "group": "data/statistical_retrieval",
-        "attrs": ["unit", "valid_min", "valid_max", "missing_value"],
+        "attrs": ["units", "valid_min", "valid_max", "missing_value"],
         "variables": {
             "air_temperature": [d_lff, "float32", ("K", 100.0, 400.0, 3.4e38)],
             "atmosphere_mass_content_of_water": [
@@ -103,15 +103,15 @@
                 ("kg.m-2", 0, 300, 3.4e38),
             ],
             "qi_air_temperature": [d_lff, "float32", ("", 0, 25, 3.4e38)],
-            "qi_speciﬁc_humidity": [d_lff, "float32", ("", 0, 25, 3.4e38)],
-            "speciﬁc_humidity": [d_lff, "float32", ("kg/kg", 0, 1, 3.4e38)],
+            "qi_specific_humidity": [d_lff, "float32", ("", 0, 25, 3.4e38)],
+            "specific_humidity": [d_lff, "float32", ("kg/kg", 0, 1, 3.4e38)],
             "surface_air_temperature": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
-            "surface_speciﬁc_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
+            "surface_specific_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
         },
     },
     {
         "group": "data/surface_info",
-        "attrs": ["unit", "valid_min", "valid_max", "missing_value"],
+        "attrs": ["units", "valid_min", "valid_max", "missing_value"],
         "variables": {
             "height": [d_lff, "float32", ("m", -418, 8848, 3.4e38)],
             "height_std": [d_lff, "float32", ("m", 0, 999.0, 3.4e38)],
@@ -200,6 +200,12 @@ def set_missing_values(block):
         data_array.attrs.update(attribs)
 
         self.content[key] = data_array
+        self.content[key + "/shape"] = data_array.shape
+        self.content[key + "/dtype"] = dask_array.dtype
+        self.content[key + "/dimensions"] = data_array.dims
+
+        for aname, val in attribs.items():
+            self.content[key + "/attr/" + aname] = val
 
     def get_test_content(self, _filename, _filename_info, _filetype_info):
         """Get the content of the test data.
@@ -345,12 +351,12 @@ def test_available_datasets(self, twv_scene):
 
         expected_names = [
             "onboard_utc",
-            "latitude",
-            "longitude",
-            "azimuth",
-            "zenith",
-            "sun_azimuth",
-            "sun_zenith",
+            "sounder_pixel_latitude",
+            "sounder_pixel_longitude",
+            "sounder_pixel_azimuth",
+            "sounder_pixel_zenith",
+            "sounder_pixel_sun_azimuth",
+            "sounder_pixel_sun_zenith",
             "air_temperature",
             "atmosphere_mass_content_of_water",
             "qi_air_temperature",
@@ -366,8 +372,8 @@ def test_available_datasets(self, twv_scene):
     def test_latitude_dataset(self, twv_scene):
         """Test loading the latitude dataset"""
 
-        twv_scene.load(["latitude"])
-        dset = twv_scene["latitude"]
+        twv_scene.load(["sounder_pixel_latitude"])
+        dset = twv_scene["sounder_pixel_latitude"]
 
         # Should be 2D now:
         assert len(dset.dims) == 2
@@ -387,8 +393,8 @@ def test_latitude_dataset(self, twv_scene):
     def test_longitude_dataset(self, twv_scene):
         """Test loading the longitude dataset"""
 
-        twv_scene.load(["longitude"])
-        dset = twv_scene["longitude"]
+        twv_scene.load(["sounder_pixel_longitude"])
+        dset = twv_scene["sounder_pixel_longitude"]
 
         # Should be 2D now:
         assert len(dset.dims) == 2
@@ -408,7 +414,7 @@ def test_longitude_dataset(self, twv_scene):
     def test_onboard_utc_dataset(self, twv_scene):
         """Test loading the onboard_utc dataset"""
 
-        twv_scene.load(["onboard_utc", "latitude"])
+        twv_scene.load(["onboard_utc", "sounder_pixel_latitude"])
         dset = twv_scene["onboard_utc"]
 
         # Should be 2D now:
@@ -420,7 +426,7 @@ def test_onboard_utc_dataset(self, twv_scene):
         assert dset.dtype == np.dtype("datetime64[ns]")
 
         # Should also have the same size as "lattitude" for instance:
-        lat = twv_scene["latitude"]
+        lat = twv_scene["sounder_pixel_latitude"]
 
         assert lat.shape == dset.shape
 

From becb81c9f9d63822fe5cb2501cd63331ae371d22 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 10:32:18 +0200
Subject: [PATCH 21/41] Updated onboard_utc broadcast process.

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml          |  4 +-
 satpy/readers/iasi_ng_l2_nc.py                | 72 ++++++++++---------
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  |  4 ++
 3 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 59ef9a8227..0c13795cdd 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -19,4 +19,6 @@ file_types:
     ignored_patterns:
       - /optimal_estimation/ # cf. eum respase2 task2 issue #4
 
-    onboard_utc_broadcast: true
+    # Define if we should broadcast the timestamps from
+    # dims (n_lines, n_for) to (n_lines, n_for, n_fov):
+    broadcast_timestamps: true
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 950e16e422..f25b51a966 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -61,6 +61,10 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs):
 
         # dataset aliases:
         self.dataset_aliases = self.filetype_info.get("dataset_aliases", {})
+
+        # broadcasts timestamps flag:
+        self.broadcast_timestamps = self.filetype_info.get("broadcast_timestamps", False)
+
         self.register_available_datasets()
 
     @property
@@ -179,9 +183,9 @@ def parse_file_content(self):
                 self.variable_desc[key] = desc
                 continue
 
-        print(f"Found {len(self.variable_desc)} variables:")
-        for vpath, desc in self.variable_desc.items():
-            print(f"{vpath}: {desc}")
+        # print(f"Found {len(self.variable_desc)} variables:")
+        # for vpath, desc in self.variable_desc.items():
+        #     print(f"{vpath}: {desc}")
 
     def register_available_datasets(self):
         """Register the available dataset in the current product file"""
@@ -203,13 +207,13 @@ def register_available_datasets(self):
                 ds_name = self.dataset_aliases[vpath]
 
             unit = desc["attribs"].get("units", None)
-
             if unit is not None and unit.startswith("seconds since "):
                 # request conversion to datetime:
                 desc["seconds_since_epoch"] = unit.replace("seconds since ", "")
-                print(
-                    f"Converting {ds_name} in secs from epoch {desc['seconds_since_epoch']}"
-                )
+
+            if self.broadcast_timestamps and desc["var_name"] == "onboard_utc":
+                # Broadcast on the "n_fov" dimension:
+                desc["broadcast_on_dim"] = "n_fov"
 
             self.register_dataset(ds_name, desc)
 
@@ -245,11 +249,7 @@ def variable_path_exists(self, var_path):
     def apply_fill_value(self, data_array, ds_info):
         """Apply the rescaling transform on a given array."""
 
-        # Check if we should apply:
-        if ds_info.get("apply_fill_value", True) is not True:
-            return data_array
-
-        dtype = ds_info.get("data_type", "auto")
+        dtype = ds_info.get("target_data_type", "auto")
         convert = False
         attribs = data_array.attrs
 
@@ -271,7 +271,7 @@ def apply_fill_value(self, data_array, ds_info):
                 dtype = "float32"
             convert = dtype != "float32"
         else:
-            raise ValueError(f"Unexpected raw dataarray data type: {data_array.dtype}")
+            raise ValueError(f"Unexpected raw array data type: {data_array.dtype}")
 
         if convert:
             data_array = data_array.astype(dtype)
@@ -303,16 +303,11 @@ def apply_fill_value(self, data_array, ds_info):
         if missing_val is None:
             return data_array
 
-        return data_array.where(data_array != missing_val, nan_val)
+        return data_array.where(data_array != missing_val, other=nan_val)
 
-    def apply_rescaling(self, data_array, ds_info):
+    def apply_rescaling(self, data_array):
         """Apply the rescaling transform on a given array."""
 
-        # Here we should apply the rescaling except if it is explicitly
-        # requested not to rescale:
-        if ds_info.get("apply_rescaling", True) is not True:
-            return data_array
-
         # Check if we have the scaling elements:
         attribs = data_array.attrs
         if "scale_factor" in attribs or "add_offset" in attribs:
@@ -330,7 +325,7 @@ def apply_rescaling(self, data_array, ds_info):
 
         return data_array
 
-    def apply_reshaping(self, data_array, ds_info):
+    def apply_reshaping(self, data_array):
         """Apply the reshaping transform on a given IASI-NG data array
 
         Those arrays may come as 3D array, in which case we collapse the
@@ -339,9 +334,6 @@ def apply_reshaping(self, data_array, ds_info):
         In the process, we also rename the first axis to "x"
         """
 
-        if ds_info.get("apply_reshaping", True) is not True:
-            return data_array
-
         if len(data_array.dims) > 2:
             data_array = data_array.stack(y=(data_array.dims[1:]))
 
@@ -353,13 +345,11 @@ def apply_reshaping(self, data_array, ds_info):
 
         return data_array
 
-    def apply_to_datetime(self, data_array, ds_info):
+    def convert_to_datetime(self, data_array, ds_info):
         """Convert the data to datetime values."""
 
-        if "seconds_since_epoch" not in ds_info:
-            return data_array
-
         epoch = ds_info["seconds_since_epoch"]
+
         # Note: below could convert the resulting data to another type
         # with .astype("datetime64[us]") for instance
         data_array = xr.DataArray(
@@ -368,11 +358,18 @@ def apply_to_datetime(self, data_array, ds_info):
             attrs=data_array.attrs,
         )
 
-        # print(f"file_content keys: {self.file_content.keys()}")
-        lat_shape = self["data/geolocation_information/sounder_pixel_latitude"].shape
+        return data_array
+
+    def apply_broadcast(self, data_array, ds_info):
+        """Apply the broadcast of the data array"""
+
+        dim_name = ds_info["broadcast_on_dim"]
+        if dim_name not in self.dimensions_desc:
+            raise ValueError(f"Invalid dimension name {dim_name}")
+        rep_count = self.dimensions_desc[dim_name]
 
-        # Apply "repeat" with the last dimension size:
-        data_array = xr.concat([data_array] * lat_shape[2], dim=data_array.dims[-1])
+        # Apply "a repeat operation" with the last dimension size:
+        data_array = xr.concat([data_array] * rep_count, dim=data_array.dims[-1])
 
         return data_array
 
@@ -390,9 +387,14 @@ def get_transformed_dataset(self, ds_info):
 
         # Apply the transformations:
         arr = self.apply_fill_value(arr, ds_info)
-        arr = self.apply_rescaling(arr, ds_info)
-        arr = self.apply_reshaping(arr, ds_info)
-        arr = self.apply_to_datetime(arr, ds_info)
+        arr = self.apply_rescaling(arr)
+        arr = self.apply_reshaping(arr)
+
+        if "seconds_since_epoch" in ds_info:
+            arr = self.convert_to_datetime(arr, ds_info)
+
+        if "broadcast_on_dim" in ds_info:
+            arr = self.apply_broadcast(arr, ds_info)
 
         return arr
 
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 16dbaa0c23..63e34d0c3d 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -244,6 +244,10 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
 
                 self.add_rand_data(desc)
 
+        # We should also register the dimensions as we will need access to "n_fov" for instance:
+        for key, val in self.dims.items():
+            self.content[f"data/dimension/{key}"] = val
+
         return self.content
 
 

From 3be91cb93183b744331a164915822b9065aeb248 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 11:43:00 +0200
Subject: [PATCH 22/41] Installed pre-commit hook

---
 satpy/readers/iasi_ng_l2_nc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index f25b51a966..11c3dcbe05 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -361,7 +361,7 @@ def convert_to_datetime(self, data_array, ds_info):
         return data_array
 
     def apply_broadcast(self, data_array, ds_info):
-        """Apply the broadcast of the data array"""
+        """Apply the broadcast of the data array."""
 
         dim_name = ds_info["broadcast_on_dim"]
         if dim_name not in self.dimensions_desc:

From 3cdaee742b7ee6d9c0e55924cbf5e12c53345382 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 12:20:58 +0200
Subject: [PATCH 23/41] Fixing pre-commit issues

---
 README                                        |  2 +-
 doc/source/dev_guide/CONTRIBUTING.rst         |  2 +-
 satpy/readers/iasi_ng_l2_nc.py                | 74 +++++++-----------
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 76 ++++++++-----------
 4 files changed, 62 insertions(+), 92 deletions(-)

diff --git a/README b/README
index 92cacd2853..a1320b1b4a 120000
--- a/README
+++ b/README
@@ -1 +1 @@
-README.rst
\ No newline at end of file
+README.rst
diff --git a/doc/source/dev_guide/CONTRIBUTING.rst b/doc/source/dev_guide/CONTRIBUTING.rst
index ac9338fc25..f60965aeba 120000
--- a/doc/source/dev_guide/CONTRIBUTING.rst
+++ b/doc/source/dev_guide/CONTRIBUTING.rst
@@ -1 +1 @@
-../../../CONTRIBUTING.rst
\ No newline at end of file
+../../../CONTRIBUTING.rst
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 11c3dcbe05..32a5dea713 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -14,7 +14,7 @@
 # You should have received a copy of the GNU General Public License along with
 # satpy.  If not, see <http://www.gnu.org/licenses/>.
 
-r"""IASI-NG L2 reader
+r"""IASI-NG L2 reader implementation.
 
 This reader supports reading all the products from the IASI-NG L2 processing
 level:
@@ -97,7 +97,6 @@ def available_datasets(self, configured_datasets=None):
 
         Uses a per product type dataset registration mechanism.
         """
-
         # pass along existing datasets
         for is_avail, ds_info in configured_datasets or []:
             yield is_avail, ds_info
@@ -106,8 +105,7 @@ def available_datasets(self, configured_datasets=None):
             yield True, ds_info
 
     def register_dataset(self, ds_name, desc):
-        """Register a simple dataset given its name and a desc dict"""
-
+        """Register a simple dataset given its name and a desc dict."""
         if ds_name in self.dataset_infos:
             raise ValueError(f"Dataset for {ds_name} already registered.")
 
@@ -122,8 +120,7 @@ def register_dataset(self, ds_name, desc):
         self.dataset_infos[ds_name] = ds_infos
 
     def parse_file_content(self):
-        """Parse the file_content to discover the available datasets and dimensions"""
-
+        """Parse the file_content to discover the available datasets and dimensions."""
         for key, val in self.file_content.items():
             # print(f"Found key: {key}")
 
@@ -188,8 +185,7 @@ def parse_file_content(self):
         #     print(f"{vpath}: {desc}")
 
     def register_available_datasets(self):
-        """Register the available dataset in the current product file"""
-
+        """Register the available dataset in the current product file."""
         if self.dataset_infos is not None:
             # Datasets are already registered.
             return
@@ -218,16 +214,14 @@ def register_available_datasets(self):
             self.register_dataset(ds_name, desc)
 
     def get_dataset_infos(self, ds_name):
-        """Retrieve the dataset infos corresponding to one of the registered
-        datasets."""
+        """Retrieve the dataset infos corresponding to one of the registered datasets."""
         if ds_name not in self.dataset_infos:
             raise KeyError(f"No dataset registered for {ds_name}")
 
         return self.dataset_infos[ds_name]
 
     def variable_path_exists(self, var_path):
-        """Check if a given variable path is available in the underlying
-        netCDF file.
+        """Check if a given variable path is available in the underlying netCDF file.
 
         All we really need to do here is to access the file_content dictionary
         and check if we have a variable under that var_path key.
@@ -246,41 +240,34 @@ def variable_path_exists(self, var_path):
         # Var path not in file_content:
         return False
 
-    def apply_fill_value(self, data_array, ds_info):
-        """Apply the rescaling transform on a given array."""
-
-        dtype = ds_info.get("target_data_type", "auto")
-        convert = False
+    def convert_data_type(self, data_array, dtype="auto"):
+        """Convert the data type if applicable."""
         attribs = data_array.attrs
 
-        if data_array.dtype == np.int32:
-            if dtype == "auto":
-                # Only convert to float if we have the scale factor or add_offset:
-                dtype = (
-                    "float64"
-                    if ("scale_factor" in attribs or "add_offset" in attribs)
-                    else "int32"
-                )
-            convert = dtype != "int32"
-        elif data_array.dtype == np.float64:
-            if dtype == "auto":
-                dtype = "float64"
-            convert = dtype != "float64"
-        elif data_array.dtype == np.float32:
-            if dtype == "auto":
-                dtype = "float32"
-            convert = dtype != "float32"
-        else:
-            raise ValueError(f"Unexpected raw array data type: {data_array.dtype}")
-
-        if convert:
+        cur_dtype = np.dtype(data_array.dtype).name
+
+        if dtype == "auto" and cur_dtype in ["float32", "float64"]:
+            dtype = cur_dtype
+
+        to_float = "scale_factor" in attribs or "add_offset" in attribs
+
+        if dtype == "auto":
+            dtype = "float64" if to_float else cur_dtype
+
+        if cur_dtype != dtype:
             data_array = data_array.astype(dtype)
 
+        return data_array
+
+    def apply_fill_value(self, data_array):
+        """Apply the rescaling transform on a given array."""
+        dtype = np.dtype(data_array.dtype).name
         if dtype not in ["float32", "float64"]:
             # We won't be able to use NaN in the other cases:
             return data_array
 
         nan_val = np.nan if dtype == "float64" else np.float32(np.nan)
+        attribs = data_array.attrs
 
         # Apply the min/max valid range:
         if "valid_min" in attribs:
@@ -307,7 +294,6 @@ def apply_fill_value(self, data_array, ds_info):
 
     def apply_rescaling(self, data_array):
         """Apply the rescaling transform on a given array."""
-
         # Check if we have the scaling elements:
         attribs = data_array.attrs
         if "scale_factor" in attribs or "add_offset" in attribs:
@@ -326,14 +312,13 @@ def apply_rescaling(self, data_array):
         return data_array
 
     def apply_reshaping(self, data_array):
-        """Apply the reshaping transform on a given IASI-NG data array
+        """Apply the reshaping transform on a given IASI-NG data array.
 
         Those arrays may come as 3D array, in which case we collapse the
         last 2 dimensions on a single axis (ie. the number of columns or "y")
 
         In the process, we also rename the first axis to "x"
         """
-
         if len(data_array.dims) > 2:
             data_array = data_array.stack(y=(data_array.dims[1:]))
 
@@ -347,7 +332,6 @@ def apply_reshaping(self, data_array):
 
     def convert_to_datetime(self, data_array, ds_info):
         """Convert the data to datetime values."""
-
         epoch = ds_info["seconds_since_epoch"]
 
         # Note: below could convert the resulting data to another type
@@ -362,7 +346,6 @@ def convert_to_datetime(self, data_array, ds_info):
 
     def apply_broadcast(self, data_array, ds_info):
         """Apply the broadcast of the data array."""
-
         dim_name = ds_info["broadcast_on_dim"]
         if dim_name not in self.dimensions_desc:
             raise ValueError(f"Invalid dimension name {dim_name}")
@@ -375,7 +358,6 @@ def apply_broadcast(self, data_array, ds_info):
 
     def get_transformed_dataset(self, ds_info):
         """Retrieve a dataset with all transformations applied on it."""
-
         # Extract location:
         vname = ds_info["location"]
 
@@ -386,7 +368,8 @@ def get_transformed_dataset(self, ds_info):
         arr = self[vname]
 
         # Apply the transformations:
-        arr = self.apply_fill_value(arr, ds_info)
+        arr = self.convert_data_type(arr)
+        arr = self.apply_fill_value(arr)
         arr = self.apply_rescaling(arr)
         arr = self.apply_reshaping(arr)
 
@@ -400,7 +383,6 @@ def get_transformed_dataset(self, ds_info):
 
     def get_dataset(self, dataset_id, ds_info=None):
         """Get a dataset."""
-
         ds_name = dataset_id["name"]
 
         # In case this dataset name is not explicitly provided by this file
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 63e34d0c3d..84f7164f4f 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -123,15 +123,12 @@
 
 
 class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
-    """Fake base class for IASI NG handler"""
+    """Fake base class for IASI NG handler."""
 
     chunks = (10, 10, 10)
 
     def add_rand_data(self, desc):
-        """
-        Build a random DataArray from a given description, and add it as content.
-        """
-
+        """Build a random DataArray from a given description."""
         # Create a lazy dask array with random int32 values
         dtype = desc.get("data_type", "int32")
         dims = desc["dims"]
@@ -182,10 +179,11 @@ def set_missing_values(block):
                 block_size = np.prod(block_shape)
                 block_num_to_replace = int(block_size * missing_ratio)
 
+                # Create a Generator instance
+                rng = np.random.default_rng()
+
                 # Generate unique random indices to set as missing values within this block
-                flat_indices = np.random.choice(
-                    block_size, block_num_to_replace, replace=False
-                )
+                flat_indices = rng.choice(block_size, block_num_to_replace, replace=False)
                 unraveled_indices = np.unravel_index(flat_indices, block_shape)
                 block[unraveled_indices] = missing_value
 
@@ -256,8 +254,12 @@ class TestIASINGL2NCReader:
 
     reader_name = "iasi_ng_l2_nc"
 
+    file_prefix = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02"
+    file_suffix = "C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc"
+    twv_filename = f"{file_prefix}-TWV_{file_suffix}"
+
     def setup_method(self):
-        """Setup the reade config"""
+        """Setup the reade config."""
         from satpy._config import config_search_paths
 
         self.reader_configs = config_search_paths(
@@ -277,20 +279,16 @@ def fake_handler(self):
 
     @pytest.fixture()
     def twv_handler(self):
-        """Create a simple (and fake) default handler on a TWV product"""
-        filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc"
-        return self._create_file_handler(filename)
+        """Create a simple (and fake) default handler on a TWV product."""
+        return self._create_file_handler(self.twv_filename)
 
     @pytest.fixture()
     def twv_scene(self):
-        """Create a simple (and fake) satpy scene on a TWV product"""
-        filename = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc"
-        return Scene(filenames=[filename], reader=self.reader_name)
+        """Create a simple (and fake) satpy scene on a TWV product."""
+        return Scene(filenames=[self.twv_filename], reader=self.reader_name)
 
     def _create_file_handler(self, filename):
-        """Create an handler for the given file checking that it can
-        be parsed correctly"""
-
+        """Create an handler for the given file checking that it can be parsed correctly."""
         reader = load_reader(self.reader_configs)
 
         # Test if the file is recognized by the reader
@@ -315,42 +313,36 @@ def _create_file_handler(self, filename):
         return handlers[0]
 
     def test_filename_matching(self):
-        """Test filename matching against some random name"""
-
+        """Test filename matching against some random name."""
         # Example filename
-        filename = "W_fr-meteo-sat,GRAL,MTI1-IASING-2-l2p_C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc"
+        prefix = "W_fr-meteo-sat,GRAL,MTI1-IASING-2"
+        suffix = (
+            "C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc"
+        )
+        filename = f"{prefix}-l2p_{suffix}"
 
         self._create_file_handler(filename)
 
     def test_real_filename_matching(self):
-        """Test that we will match an actual IASI NG L2 product file name"""
-
+        """Test that we will match an actual IASI NG L2 product file name."""
         # Below we test the TWV,CLD,GHG and SFC products:
-        filenames = {
-            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-TWV_C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc",
-            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-CLD_C_EUMT_20170616120000_G_V_20070912094037_20070912094308_O_N____.nc",
-            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-GHG_C_EUMT_20170616120000_G_V_20070912090651_20070912090922_O_N____.nc",
-            "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02-SFC_C_EUMT_20170616120000_G_V_20070912100911_20070912101141_O_N____.nc",
-        }
+        ptypes = ["TWV", "CLD", "GHG", "SFC"]
+        filenames = [f"{self.file_prefix}-{ptype}_{self.file_suffix}" for ptype in ptypes]
 
         for filename in filenames:
             self._create_file_handler(filename)
 
     def test_sensing_times(self, twv_handler):
-        """Test that we read the sensing start/end times correctly
-        from filename"""
-
+        """Test that we read the sensing start/end times correctly from filename."""
         assert twv_handler.start_time == datetime(2007, 9, 12, 8, 43, 29)
         assert twv_handler.end_time == datetime(2007, 9, 12, 8, 46, 0)
 
     def test_sensor_names(self, twv_handler):
-        """Test that the handler reports iasi_ng as sensor"""
-
+        """Test that the handler reports iasi_ng as sensor."""
         assert twv_handler.sensor_names == {"iasi_ng"}
 
     def test_available_datasets(self, twv_scene):
-        """Test the list of available datasets in scene"""
-
+        """Test the list of available datasets in scene."""
         dnames = twv_scene.available_dataset_names()
 
         expected_names = [
@@ -374,8 +366,7 @@ def test_available_datasets(self, twv_scene):
             assert dname in dnames
 
     def test_latitude_dataset(self, twv_scene):
-        """Test loading the latitude dataset"""
-
+        """Test loading the latitude dataset."""
         twv_scene.load(["sounder_pixel_latitude"])
         dset = twv_scene["sounder_pixel_latitude"]
 
@@ -395,8 +386,7 @@ def test_latitude_dataset(self, twv_scene):
         assert vmax <= 90.0
 
     def test_longitude_dataset(self, twv_scene):
-        """Test loading the longitude dataset"""
-
+        """Test loading the longitude dataset."""
         twv_scene.load(["sounder_pixel_longitude"])
         dset = twv_scene["sounder_pixel_longitude"]
 
@@ -416,8 +406,7 @@ def test_longitude_dataset(self, twv_scene):
         assert vmax <= 180.0
 
     def test_onboard_utc_dataset(self, twv_scene):
-        """Test loading the onboard_utc dataset"""
-
+        """Test loading the onboard_utc dataset."""
         twv_scene.load(["onboard_utc", "sounder_pixel_latitude"])
         dset = twv_scene["onboard_utc"]
 
@@ -435,8 +424,7 @@ def test_onboard_utc_dataset(self, twv_scene):
         assert lat.shape == dset.shape
 
     def test_nbr_iterations_dataset(self, twv_scene):
-        """Test loading the nbr_iterations dataset"""
-
+        """Test loading the nbr_iterations dataset."""
         twv_scene.load(["nbr_iterations"])
         dset = twv_scene["nbr_iterations"]
 

From 17fc4308abc8b06c20d8a8fc01822698011899f9 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 12:27:01 +0200
Subject: [PATCH 24/41] Refactoring parse_file_content to reduce complexity

---
 satpy/readers/iasi_ng_l2_nc.py | 100 ++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 46 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 32a5dea713..81890ed531 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -119,65 +119,73 @@ def register_dataset(self, ds_name, desc):
 
         self.dataset_infos[ds_name] = ds_infos
 
+    def process_dimension(self, key, value):
+        """Process a dimension entry from the file_content."""
+        dim_name = key.split("/")[-1]
+        # print(f"Found dimension: {dim_name}={val}")
+        if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value:
+            # This might happen if we have the same dim name from different groups:
+            raise ValueError(f"Detected duplicated dim name: {dim_name}")
+
+        self.dimensions_desc[dim_name] = value
+
+    def process_attribute(self, key, value):
+        """Process a attribute entry from the file_content."""
+        var_path, aname = key.split("/attr/")
+        # print(f"Found attrib for: {var_path}: {aname}")
+
+        if var_path not in self.variable_desc:
+            # maybe this variable is ignored, or this is a group attr.
+            return
+
+        self.variable_desc[var_path]["attribs"][aname] = value
+
+    def process_variable(self, key):
+        """Process a variable entry from the file_content."""
+        shape = self.file_content[f"{key}/shape"]
+        # print(f"Found variable: {key}")
+        if np.prod(shape) <= 1:
+            # print(f"Ignoring scalar variable {key}")
+            return
+
+        # Check if this variable should be ignored:
+        if any(p.search(key) is not None for p in self.ignored_patterns):
+            # print(f"Ignoring variable {key}")
+            return
+
+        # Prepare a description for this variable:
+        prefix, var_name = key.rsplit("/", 1)
+        dims = self.file_content[f"{key}/dimensions"]
+        dtype = self.file_content[f"{key}/dtype"]
+
+        desc = {
+            "location": key,
+            "prefix": prefix,
+            "var_name": var_name,
+            "shape": shape,
+            "dtype": f"{dtype}",
+            "dims": dims,
+            "attribs": {},
+        }
+
+        self.variable_desc[key] = desc
+
     def parse_file_content(self):
         """Parse the file_content to discover the available datasets and dimensions."""
         for key, val in self.file_content.items():
             # print(f"Found key: {key}")
 
             if "/dimension/" in key:
-                dim_name = key.split("/")[-1]
-                # print(f"Found dimension: {dim_name}={val}")
-                if (
-                    dim_name in self.dimensions_desc
-                    and self.dimensions_desc[dim_name] != val
-                ):
-                    # This might happen if we have the same dim name from different groups:
-                    raise ValueError(f"Detected duplicated dim name: {dim_name}")
-
-                self.dimensions_desc[dim_name] = val
+                self.process_dimension(key, val)
                 continue
 
             if "/attr/" in key:
-                var_path, aname = key.split("/attr/")
-                # print(f"Found attrib for: {var_path}: {aname}")
-
-                if var_path not in self.variable_desc:
-                    # maybe this variable is ignored, or this is a group attr.
-                    continue
-
-                self.variable_desc[var_path]["attribs"][aname] = val
+                self.process_attribute(key, val)
                 continue
 
             # if isinstance(val, netCDF4.Variable):
             if f"{key}/shape" in self.file_content:
-                # print(f"Found variable: {key}")
-
-                shape = self.file_content[f"{key}/shape"]
-                if np.prod(shape) <= 1:
-                    # print(f"Ignoring scalar variable {key}")
-                    continue
-
-                # Check if this variable should be ignored:
-                if any(p.search(key) is not None for p in self.ignored_patterns):
-                    # print(f"Ignoring variable {key}")
-                    continue
-
-                # Prepare a description for this variable:
-                prefix, var_name = key.rsplit("/", 1)
-                dims = self.file_content[f"{key}/dimensions"]
-                dtype = self.file_content[f"{key}/dtype"]
-
-                desc = {
-                    "location": key,
-                    "prefix": prefix,
-                    "var_name": var_name,
-                    "shape": shape,
-                    "dtype": f"{dtype}",
-                    "dims": dims,
-                    "attribs": {},
-                }
-
-                self.variable_desc[key] = desc
+                self.process_variable(key)
                 continue
 
         # print(f"Found {len(self.variable_desc)} variables:")

From b906f4cdbc18ee350887e03a593872396404d320 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 12:34:21 +0200
Subject: [PATCH 25/41] Refactoring add_rand_data to reduce complexity

---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 77 +++++++++++--------
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 84f7164f4f..399ca728ae 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -127,18 +127,41 @@ class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
 
     chunks = (10, 10, 10)
 
-    def add_rand_data(self, desc):
-        """Build a random DataArray from a given description."""
-        # Create a lazy dask array with random int32 values
-        dtype = desc.get("data_type", "int32")
-        dims = desc["dims"]
-        key = desc["key"]
+    def inject_missing_value(self, dask_array, attribs):
+        """Inject missing value in data array."""
+        # Force setting a few elements to this missing value (but still lazily with
+        # dask map_blocks function)
+        missing_value = attribs["missing_value"]
 
-        shape = [self.dims[k] for k in dims]
+        # Ratio of elements to replace with missing value:
+        missing_ratio = 0.05
 
-        # Define the chunk size for dask array
+        def set_missing_values(block):
+            # Generate random indices to set as missing values within this block
+            block_shape = block.shape
+
+            block_size = np.prod(block_shape)
+            block_num_to_replace = int(block_size * missing_ratio)
+
+            # Create a Generator instance
+            rng = np.random.default_rng()
+
+            # Generate unique random indices to set as missing values within this block
+            flat_indices = rng.choice(block_size, block_num_to_replace, replace=False)
+            unraveled_indices = np.unravel_index(flat_indices, block_shape)
+            block[unraveled_indices] = missing_value
+
+            return block
+
+        # Apply the function lazily to each block
+        dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype)
+
+        return dask_array
+
+    def generate_dask_array(self, dims, dtype, attribs):
+        """Generate some random dask array of the given dtype."""
+        shape = [self.dims[k] for k in dims]
         chunks = [10] * len(dims)
-        attribs = desc["attribs"]
 
         if dtype == "int32":
             rand_min = -2147483647
@@ -164,33 +187,21 @@ def add_rand_data(self, desc):
         else:
             raise ValueError(f"Unsupported data type: {dtype}")
 
-        if "missing_value" in attribs:
-            # Force setting a few elements to this missing value (but still lazily with
-            # dask map_blocks function)
-            missing_value = attribs["missing_value"]
-
-            # Ratio of elements to replace with missing value:
-            missing_ratio = 0.05
-
-            def set_missing_values(block):
-                # Generate random indices to set as missing values within this block
-                block_shape = block.shape
+        return dask_array
 
-                block_size = np.prod(block_shape)
-                block_num_to_replace = int(block_size * missing_ratio)
-
-                # Create a Generator instance
-                rng = np.random.default_rng()
-
-                # Generate unique random indices to set as missing values within this block
-                flat_indices = rng.choice(block_size, block_num_to_replace, replace=False)
-                unraveled_indices = np.unravel_index(flat_indices, block_shape)
-                block[unraveled_indices] = missing_value
+    def add_rand_data(self, desc):
+        """Build a random DataArray from a given description."""
+        # Create a lazy dask array with random int32 values
+        dtype = desc.get("data_type", "int32")
+        dims = desc["dims"]
+        attribs = desc["attribs"]
+        key = desc["key"]
 
-                return block
+        dask_array = self.generate_dask_array(dims, dtype, attribs)
+        # Define the chunk size for dask array
 
-            # Apply the function lazily to each block
-            dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype)
+        if "missing_value" in attribs:
+            dask_array = self.inject_missing_value(dask_array, attribs)
 
         # Wrap the dask array with xarray.DataArray
         data_array = xr.DataArray(dask_array, dims=dims)

From f660cb1979af1c4551f3dda1752dd3722ec4db4a Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 12:39:47 +0200
Subject: [PATCH 26/41] Removing lat/lon unit test duplication

---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 52 +++++++------------
 1 file changed, 19 insertions(+), 33 deletions(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 399ca728ae..9c2eda3c6f 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -376,45 +376,31 @@ def test_available_datasets(self, twv_scene):
         for dname in expected_names:
             assert dname in dnames
 
-    def test_latitude_dataset(self, twv_scene):
-        """Test loading the latitude dataset."""
-        twv_scene.load(["sounder_pixel_latitude"])
-        dset = twv_scene["sounder_pixel_latitude"]
-
-        # Should be 2D now:
-        assert len(dset.dims) == 2
-        assert dset.dims[0] == "x"
-        assert dset.dims[1] == "y"
-
-        # Should have been converted to float64:
-        assert dset.dtype == np.float64
-
-        # All valid values should be in range [-90.0,90.0]
-        vmin = np.nanmin(dset)
-        vmax = np.nanmax(dset)
-
-        assert vmin >= -90.0
-        assert vmax <= 90.0
-
-    def test_longitude_dataset(self, twv_scene):
-        """Test loading the longitude dataset."""
-        twv_scene.load(["sounder_pixel_longitude"])
-        dset = twv_scene["sounder_pixel_longitude"]
+    def test_latlon_datasets(self, twv_scene):
+        """Test loading the latitude/longitude dataset."""
+        twv_scene.load(["sounder_pixel_latitude", "sounder_pixel_longitude"])
+        lat = twv_scene["sounder_pixel_latitude"]
+        lon = twv_scene["sounder_pixel_longitude"]
 
         # Should be 2D now:
-        assert len(dset.dims) == 2
-        assert dset.dims[0] == "x"
-        assert dset.dims[1] == "y"
+        assert len(lat.dims) == 2
+        assert len(lon.dims) == 2
+        assert lat.dims[0] == "x"
+        assert lat.dims[1] == "y"
+        assert lon.dims[0] == "x"
+        assert lon.dims[1] == "y"
 
         # Should have been converted to float64:
-        assert dset.dtype == np.float64
+        assert lat.dtype == np.float64
+        assert lon.dtype == np.float64
 
-        # All valid values should be in range [-90.0,90.0]
-        vmin = np.nanmin(dset)
-        vmax = np.nanmax(dset)
+        # All valid lat values should be in range [-90.0,90.0]
+        assert np.nanmin(lat) >= -90.0
+        assert np.nanmax(lat) <= 90.0
 
-        assert vmin >= -180.0
-        assert vmax <= 180.0
+        # All valid lon values should be in range [-180.0,180.0]
+        assert np.nanmin(lon) >= -180.0
+        assert np.nanmax(lon) <= 180.0
 
     def test_onboard_utc_dataset(self, twv_scene):
         """Test loading the onboard_utc dataset."""

From efc88656a5d7d9aaf5070dd5cd2808a9a7d2839e Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 9 Aug 2024 12:43:37 +0200
Subject: [PATCH 27/41] Reduced get_test_content complexity

---
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 9c2eda3c6f..02f1b8eaae 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -216,6 +216,15 @@ def add_rand_data(self, desc):
         for aname, val in attribs.items():
             self.content[key + "/attr/" + aname] = val
 
+    def get_valid_attribs(self, anames, alist):
+        """Retrieve only the valid attributes from a list."""
+        attribs = {}
+        for idx, val in enumerate(alist):
+            # AEnsure we do not inject "None" attribute values:
+            if val is not None:
+                attribs[anames[idx]] = val
+        return attribs
+
     def get_test_content(self, _filename, _filename_info, _filetype_info):
         """Get the content of the test data.
 
@@ -238,11 +247,7 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
             anames = grp_desc["attrs"]
             for vname, vdesc in grp_desc["variables"].items():
                 # For each variable we create a dataset descriptor:
-                attribs = {}
-                for idx, val in enumerate(vdesc[2]):
-                    # AEnsure we do not inject "None" attribute values:
-                    if val is not None:
-                        attribs[anames[idx]] = val
+                attribs = self.get_valid_attribs(anames, vdesc[2])
 
                 desc = {
                     "key": f"{prefix}/{vname}",

From c237c108ffb2209a3badb2a7ca235536b9e6409e Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Tue, 27 Aug 2024 08:56:36 +0200
Subject: [PATCH 28/41] Added support for O3_ and CO_ products

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml           |  2 +-
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 0c13795cdd..1d637db0d4 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -13,7 +13,7 @@ file_types:
     file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler
     file_patterns:
       [
-        "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
+        "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type:3s}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
 
     ignored_patterns:
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 02f1b8eaae..3368b144b4 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -341,12 +341,16 @@ def test_filename_matching(self):
 
     def test_real_filename_matching(self):
         """Test that we will match an actual IASI NG L2 product file name."""
-        # Below we test the TWV,CLD,GHG and SFC products:
-        ptypes = ["TWV", "CLD", "GHG", "SFC"]
-        filenames = [f"{self.file_prefix}-{ptype}_{self.file_suffix}" for ptype in ptypes]
+        # Below we test the TWV,CLD,GHG,SFC,O3_ and CO_ products:
+        ptypes = ["TWV", "CLD", "GHG", "SFC", "O3_", "CO_"]
 
-        for filename in filenames:
-            self._create_file_handler(filename)
+        for ptype in ptypes:
+            filename = f"{self.file_prefix}-{ptype}_{self.file_suffix}"
+            handler = self._create_file_handler(filename)
+
+            assert handler.filename_info["oflag"] == "C"
+            assert handler.filename_info["originator"] == "EUMT"
+            assert handler.filename_info["product_type"] == ptype
 
     def test_sensing_times(self, twv_handler):
         """Test that we read the sensing start/end times correctly from filename."""

From 90f7016c4f4368cd94f9208024e0035cb2908bb8 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Tue, 27 Aug 2024 09:02:58 +0200
Subject: [PATCH 29/41] Adding my name to authors.md

---
 AUTHORS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS.md b/AUTHORS.md
index 461ce3ca1e..d1e6763134 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -66,6 +66,7 @@ The following people have made contributions to this project:
 - [Lars Ørum Rasmussen (loerum)](https://github.com/loerum)
 - [Martin Raspaud (mraspaud)](https://github.com/mraspaud)
 - [William Roberts (wroberts4)](https://github.com/wroberts4)
+- [Emmanuel Roche (roche-emmanuel)](https://github.com/roche-emmanuel)
 - [Benjamin Rösner (BENR0)](https://github.com/BENR0)
 - [Pascale Roquet (roquetp)](https://github.com/roquetp)
 - [Kristian Rune Larsen](https://github.com/)

From 87cf5eafc16f31662574d6b92785e2b24b0471d9 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Tue, 27 Aug 2024 09:27:18 +0200
Subject: [PATCH 30/41] Minor change trying to fix the readthedocs build
 process

---
 satpy/readers/iasi_ng_l2_nc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 81890ed531..c0635dbb09 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -14,7 +14,7 @@
 # You should have received a copy of the GNU General Public License along with
 # satpy.  If not, see <http://www.gnu.org/licenses/>.
 
-r"""IASI-NG L2 reader implementation.
+"""IASI-NG L2 reader implementation.
 
 This reader supports reading all the products from the IASI-NG L2 processing
 level:

From 430fcd2d094760ac713bbcc7292200c0f289e7f4 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Tue, 27 Aug 2024 09:27:59 +0200
Subject: [PATCH 31/41] Updated IASI ng L2 reader description

---
 satpy/readers/iasi_ng_l2_nc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index c0635dbb09..f2d47a1630 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -22,6 +22,8 @@
   * IASI-L2-CLD
   * IASI-L2-GHG
   * IASI-L2-SFC
+  * IASI-L2-O3_
+  * IASI-L2-CO_
 """
 
 import logging

From e69de92f64fdfb57f50cc3e342a11dd1c5125e0c Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Tue, 27 Aug 2024 10:01:35 +0200
Subject: [PATCH 32/41] Fixing readthedocs build process

---
 satpy/readers/iasi_ng_l2_nc.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index f2d47a1630..880725d341 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -18,12 +18,13 @@
 
 This reader supports reading all the products from the IASI-NG L2 processing
 level:
-  * IASI-L2-TWV
-  * IASI-L2-CLD
-  * IASI-L2-GHG
-  * IASI-L2-SFC
-  * IASI-L2-O3_
-  * IASI-L2-CO_
+
+* IASI-L2-TWV
+* IASI-L2-CLD
+* IASI-L2-GHG
+* IASI-L2-SFC
+* IASI-L2-O3
+* IASI-L2-CO
 """
 
 import logging

From 93c509fba801abdef40e43ec70fb56d2bdaf355a Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Tue, 27 Aug 2024 21:16:45 +0200
Subject: [PATCH 33/41] Renaming contributing.rst file correctly

---
 ...TING.rst~contributing_and_rtd_build_fixes => CONTRIBUTING.rst} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename doc/source/dev_guide/{CONTRIBUTING.rst~contributing_and_rtd_build_fixes => CONTRIBUTING.rst} (100%)

diff --git a/doc/source/dev_guide/CONTRIBUTING.rst~contributing_and_rtd_build_fixes b/doc/source/dev_guide/CONTRIBUTING.rst
similarity index 100%
rename from doc/source/dev_guide/CONTRIBUTING.rst~contributing_and_rtd_build_fixes
rename to doc/source/dev_guide/CONTRIBUTING.rst

From 1c8741db503a2c601419b357da41760c2a09a46e Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Mon, 30 Sep 2024 13:17:00 +0200
Subject: [PATCH 34/41] Updated reader to enable optimal_estimations and use
 dataset name aliases.

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml |  7 +++++--
 satpy/readers/iasi_ng_l2_nc.py       | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index 1d637db0d4..ea27c2328b 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -16,8 +16,11 @@ file_types:
         "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type:3s}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
 
-    ignored_patterns:
-      - /optimal_estimation/ # cf. eum respase2 task2 issue #4
+    # ignored_patterns:
+    #   - /optimal_estimation/ # cf. eum respase2 task2 issue #4
+
+    dataset_aliases:
+      "/optimal_estimation/(.+)$": "${VAR_NAME}_oem"
 
     # Define if we should broadcast the timestamps from
     # dims (n_lines, n_for) to (n_lines, n_for, n_fov):
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 880725d341..75d749be0f 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -65,6 +65,11 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs):
         # dataset aliases:
         self.dataset_aliases = self.filetype_info.get("dataset_aliases", {})
 
+        # Transform the aliases on regex patterns:
+        self.dataset_aliases = {
+            re.compile(key): val for key, val in self.dataset_aliases.items()
+        }
+
         # broadcasts timestamps flag:
         self.broadcast_timestamps = self.filetype_info.get("broadcast_timestamps", False)
 
@@ -210,6 +215,18 @@ def register_available_datasets(self):
         for vpath, desc in self.variable_desc.items():
             # Check if we have an alias for this variable:
             ds_name = desc["var_name"]
+
+            # Check if this variable path matches an alias pattern:
+            for pat, sub in self.dataset_aliases.items():
+                # Search for the pattern in the input string
+                match = pat.search(vpath)
+                if match:
+                    # Extract the captured group(s)
+                    var_name = match.group(1)
+                    ds_name = sub.replace("${VAR_NAME}", var_name)
+                    # logger.info("=> matched vpath %s: ds_name: %s", vpath, ds_name)
+                    break
+
             if vpath in self.dataset_aliases:
                 ds_name = self.dataset_aliases[vpath]
 

From e542e607f912d1a7ad6fc00778f7b8d27db85001 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Mon, 30 Sep 2024 13:28:43 +0200
Subject: [PATCH 35/41] Fixing pytest.fixture syntax

---
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 3368b144b4..97d738f08f 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -293,12 +293,12 @@ def fake_handler(self):
             patch_ctx.is_local = True
             yield patch_ctx
 
-    @pytest.fixture()
+    @pytest.fixture
     def twv_handler(self):
         """Create a simple (and fake) default handler on a TWV product."""
         return self._create_file_handler(self.twv_filename)
 
-    @pytest.fixture()
+    @pytest.fixture
     def twv_scene(self):
         """Create a simple (and fake) satpy scene on a TWV product."""
         return Scene(filenames=[self.twv_filename], reader=self.reader_name)

From 6021c92267d5a4a3c4f635925fc89e414f8d75b7 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 2 Oct 2024 14:41:25 +0200
Subject: [PATCH 36/41] Adding collection of unit tests on IASI-NG l2 reader.

---
 satpy/readers/iasi_ng_l2_nc.py                |  10 +-
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 164 ++++++++++++++++++
 2 files changed, 169 insertions(+), 5 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 75d749be0f..65423550de 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -115,7 +115,7 @@ def available_datasets(self, configured_datasets=None):
     def register_dataset(self, ds_name, desc):
         """Register a simple dataset given its name and a desc dict."""
         if ds_name in self.dataset_infos:
-            raise ValueError(f"Dataset for {ds_name} already registered.")
+            raise KeyError(f"Dataset for {ds_name} already registered.")
 
         ds_infos = {
             "name": ds_name,
@@ -133,7 +133,7 @@ def process_dimension(self, key, value):
         # print(f"Found dimension: {dim_name}={val}")
         if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value:
             # This might happen if we have the same dim name from different groups:
-            raise ValueError(f"Detected duplicated dim name: {dim_name}")
+            raise KeyError(f"Detected duplicated dim name: {dim_name}")
 
         self.dimensions_desc[dim_name] = value
 
@@ -362,10 +362,10 @@ def convert_to_datetime(self, data_array, ds_info):
         """Convert the data to datetime values."""
         epoch = ds_info["seconds_since_epoch"]
 
-        # Note: below could convert the resulting data to another type
-        # with .astype("datetime64[us]") for instance
+        # Note: converting the time values to ns precision to avoid warnings
+        # from panda+numpy:
         data_array = xr.DataArray(
-            data=pd.to_datetime(epoch) + data_array.astype("timedelta64[s]"),
+            data=pd.to_datetime(epoch) + (data_array * 1e9).astype("timedelta64[ns]"),
             dims=data_array.dims,
             attrs=data_array.attrs,
         )
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 97d738f08f..ec668095d2 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -437,3 +437,167 @@ def test_nbr_iterations_dataset(self, twv_scene):
         assert len(dset.dims) == 2
         # Should still be in int32 type:
         assert dset.dtype == np.int32
+
+    def test_register_dataset(self, twv_handler):
+        """Test the register_dataset method."""
+        twv_handler.dataset_infos = {}
+        twv_handler.register_dataset("test_dataset", {"attr": "value"})
+        assert "test_dataset" in twv_handler.dataset_infos
+        assert twv_handler.dataset_infos["test_dataset"]["attr"] == "value"
+
+        # Should refuse to register the same dataset again afterwards:
+        with pytest.raises(KeyError):
+            twv_handler.register_dataset("test_dataset", {"attr": "new_value"})
+
+    def test_process_dimension(self, twv_handler):
+        """Test the process_dimension method."""
+        twv_handler.dimensions_desc = {}
+        twv_handler.process_dimension("group/dimension/test_dim", 10)
+        assert twv_handler.dimensions_desc["test_dim"] == 10
+
+        # Should refuse to process the same dim again:
+        with pytest.raises(KeyError):
+            twv_handler.process_dimension("group/dimension/test_dim", 20)
+
+    def test_process_attribute(self, twv_handler):
+        """Test the process_attribute method."""
+        twv_handler.variable_desc = {"test_var": {"attribs": {}}}
+        twv_handler.process_attribute("test_var/attr/test_attr", "value")
+        assert twv_handler.variable_desc["test_var"]["attribs"]["test_attr"] == "value"
+
+    def test_process_variable(self, twv_handler):
+        """Test the process_variable method."""
+        # Note: we only support variables inside a group,
+        # because this is always the case in the IASI-NG file format.
+        twv_handler.file_content = {
+            "group/test_var/shape": (10, 10),
+            "group/test_var/dimensions": ("x", "y"),
+            "group/test_var/dtype": "float32",
+        }
+        twv_handler.ignored_patterns = []
+        twv_handler.process_variable("group/test_var")
+        assert "group/test_var" in twv_handler.variable_desc
+
+    def test_parse_file_content(self, twv_handler):
+        """Test the parse_file_content method."""
+        twv_handler.file_content = {
+            "dim/dimension/test_dim": 10,
+            "var/attr/test_attr": "value",
+            "grp/test_var": "my_data",
+            "grp/test_var/shape": (10, 10),
+            "grp/test_var/dimensions": ("x", "y"),
+            "grp/test_var/dtype": "float32",
+        }
+
+        twv_handler.parse_file_content()
+        assert "test_dim" in twv_handler.dimensions_desc
+        assert "grp/test_var" in twv_handler.variable_desc
+
+    def test_register_available_datasets(self, twv_handler):
+        """Test the register_available_datasets method."""
+        twv_handler.dataset_infos = None
+        twv_handler.variable_desc = {
+            "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}}
+        }
+        twv_handler.dataset_aliases = {}
+        twv_handler.register_available_datasets()
+        assert "test_var" in twv_handler.dataset_infos
+
+    def test_get_dataset_infos(self, twv_handler):
+        """Test the get_dataset_infos method."""
+        twv_handler.dataset_infos = {"test_dataset": {"attr": "value"}}
+        assert twv_handler.get_dataset_infos("test_dataset") == {"attr": "value"}
+
+        with pytest.raises(KeyError):
+            twv_handler.get_dataset_infos("non_existent_dataset")
+
+    def test_variable_path_exists(self, twv_handler):
+        """Test the variable_path_exists method."""
+        twv_handler.file_content = {"test_var": mock.MagicMock()}
+        assert twv_handler.variable_path_exists("test_var")
+        assert not twv_handler.variable_path_exists("/attr/test_var")
+        assert not twv_handler.variable_path_exists("test_var/dtype")
+
+    def test_convert_data_type(self, twv_handler):
+        """Test the convert_data_type method."""
+        data = xr.DataArray(np.array([1, 2, 3], dtype=np.float32))
+        result = twv_handler.convert_data_type(data, dtype="float64")
+        assert result.dtype == np.float64
+
+        # Should stay as f32 if dtype is auto:
+        data.attrs["scale_factor"] = 2.0
+        result = twv_handler.convert_data_type(data, dtype="auto")
+        assert result.dtype == np.float32
+
+    def test_apply_fill_value(self, twv_handler):
+        """Test the apply_fill_value method."""
+        data = xr.DataArray(
+            np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
+            attrs={"valid_min": 2.0, "valid_max": 4.0},
+        )
+        result = twv_handler.apply_fill_value(data)
+        assert np.isnan(result[0])
+        assert np.isnan(result[4])
+
+    def test_apply_rescaling(self, twv_handler):
+        """Test the apply_rescaling method."""
+        data = xr.DataArray(
+            np.array([1, 2, 3]), attrs={"scale_factor": 2, "add_offset": 1}
+        )
+        result = twv_handler.apply_rescaling(data)
+        np.testing.assert_array_equal(result, [3, 5, 7])
+
+    def test_apply_reshaping(self, twv_handler):
+        """Test the apply_reshaping method."""
+        data = xr.DataArray(np.ones((2, 3, 4)), dims=("a", "b", "c"))
+        result = twv_handler.apply_reshaping(data)
+        assert result.dims == ("x", "y")
+        assert result.shape == (2, 12)
+
+    def test_convert_to_datetime(self, twv_handler):
+        """Test the convert_to_datetime method."""
+        data = xr.DataArray(np.array([0, 86400]), dims=("time",))
+        ds_info = {"seconds_since_epoch": "2000-01-01 00:00:00"}
+        result = twv_handler.convert_to_datetime(data, ds_info)
+        assert result[0].values == np.datetime64("2000-01-01T00:00:00")
+        assert result[1].values == np.datetime64("2000-01-02T00:00:00")
+
+    def test_apply_broadcast(self, twv_handler):
+        """Test the apply_broadcast method."""
+        data = xr.DataArray(np.array([1, 2, 3]), dims=("x",))
+        twv_handler.dimensions_desc = {"n_fov": 2}
+        ds_info = {"broadcast_on_dim": "n_fov"}
+        result = twv_handler.apply_broadcast(data, ds_info)
+        assert result.shape == (6,)
+
+    def test_get_transformed_dataset(self, twv_handler):
+        """Test the get_transformed_dataset method."""
+        ds_info = {
+            "location": "test_var",
+            "seconds_since_epoch": "2000-01-01 00:00:00",
+            "broadcast_on_dim": "n_fov",
+        }
+        twv_handler.variable_path_exists = mock.MagicMock(return_value=True)
+        twv_handler.file_content["test_var"] = xr.DataArray(
+            np.array([[0, 86400]]), dims=("x", "y")
+        )
+
+        twv_handler.dimensions_desc = {"n_fov": 2}
+
+        result = twv_handler.get_transformed_dataset(ds_info)
+        assert result.shape == (1, 4)
+        assert result.dtype == "datetime64[ns]"
+
+    def test_get_dataset(self, twv_handler):
+        """Test the get_dataset method."""
+        twv_handler.dataset_infos = {
+            "test_dataset": {"name": "test_dataset", "location": "test_var"}
+        }
+        twv_handler.get_transformed_dataset = mock.MagicMock(
+            return_value=xr.DataArray([1, 2, 3])
+        )
+
+        result = twv_handler.get_dataset({"name": "test_dataset"})
+        assert result.equals(xr.DataArray([1, 2, 3]))
+
+        assert twv_handler.get_dataset({"name": "non_existent_dataset"}) is None

From 837e844398a6245e284e7cbbdbf4c3cffd01d150 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 2 Oct 2024 14:49:42 +0200
Subject: [PATCH 37/41] Cleaning code removing debug statements.

---
 satpy/etc/readers/iasi_ng_l2_nc.yaml           |  3 +++
 satpy/readers/iasi_ng_l2_nc.py                 | 18 ++++--------------
 satpy/tests/reader_tests/test_iasi_ng_l2_nc.py |  4 ----
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
index ea27c2328b..d95345eb6d 100644
--- a/satpy/etc/readers/iasi_ng_l2_nc.yaml
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -16,6 +16,9 @@ file_types:
         "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type:3s}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
       ]
 
+    # keeping optimal_estimation folder for the moment due to feature freeze,
+    # but it might be necessary to re-introduce this in a near future.
+    # So keeping this reference entry here for now.
     # ignored_patterns:
     #   - /optimal_estimation/ # cf. eum respase2 task2 issue #4
 
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 65423550de..e2f5631a9c 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -27,7 +27,6 @@
 * IASI-L2-CO
 """
 
-import logging
 import re
 
 import netCDF4
@@ -37,8 +36,6 @@
 
 from .netcdf_utils import NetCDF4FsspecFileHandler
 
-logger = logging.getLogger(__name__)
-
 
 class IASINGL2NCFileHandler(NetCDF4FsspecFileHandler):
     """Reader for IASI-NG L2 products in NetCDF format."""
@@ -130,7 +127,7 @@ def register_dataset(self, ds_name, desc):
     def process_dimension(self, key, value):
         """Process a dimension entry from the file_content."""
         dim_name = key.split("/")[-1]
-        # print(f"Found dimension: {dim_name}={val}")
+
         if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value:
             # This might happen if we have the same dim name from different groups:
             raise KeyError(f"Detected duplicated dim name: {dim_name}")
@@ -140,7 +137,6 @@ def process_dimension(self, key, value):
     def process_attribute(self, key, value):
         """Process a attribute entry from the file_content."""
         var_path, aname = key.split("/attr/")
-        # print(f"Found attrib for: {var_path}: {aname}")
 
         if var_path not in self.variable_desc:
             # maybe this variable is ignored, or this is a group attr.
@@ -151,14 +147,14 @@ def process_attribute(self, key, value):
     def process_variable(self, key):
         """Process a variable entry from the file_content."""
         shape = self.file_content[f"{key}/shape"]
-        # print(f"Found variable: {key}")
+
         if np.prod(shape) <= 1:
-            # print(f"Ignoring scalar variable {key}")
+            # Ignoring scalar variable.
             return
 
         # Check if this variable should be ignored:
         if any(p.search(key) is not None for p in self.ignored_patterns):
-            # print(f"Ignoring variable {key}")
+            # Ignoring variable on user request.
             return
 
         # Prepare a description for this variable:
@@ -181,7 +177,6 @@ def process_variable(self, key):
     def parse_file_content(self):
         """Parse the file_content to discover the available datasets and dimensions."""
         for key, val in self.file_content.items():
-            # print(f"Found key: {key}")
 
             if "/dimension/" in key:
                 self.process_dimension(key, val)
@@ -196,10 +191,6 @@ def parse_file_content(self):
                 self.process_variable(key)
                 continue
 
-        # print(f"Found {len(self.variable_desc)} variables:")
-        # for vpath, desc in self.variable_desc.items():
-        #     print(f"{vpath}: {desc}")
-
     def register_available_datasets(self):
         """Register the available dataset in the current product file."""
         if self.dataset_infos is not None:
@@ -224,7 +215,6 @@ def register_available_datasets(self):
                     # Extract the captured group(s)
                     var_name = match.group(1)
                     ds_name = sub.replace("${VAR_NAME}", var_name)
-                    # logger.info("=> matched vpath %s: ds_name: %s", vpath, ds_name)
                     break
 
             if vpath in self.dataset_aliases:
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index ec668095d2..f839aa301f 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -14,7 +14,6 @@
 # along with satpy.  If not, see <http://www.gnu.org/licenses/>.
 
 """Unit tests on the IASI NG L2 reader using the conventional mock constructed context."""
-import logging
 import os
 from datetime import datetime
 from unittest import mock
@@ -29,8 +28,6 @@
 from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler
 from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler
 
-logger = logging.getLogger(__name__)
-
 d_lff = ("n_lines", "n_for", "n_fov")
 
 DATA_DESC = [
@@ -317,7 +314,6 @@ def _create_file_handler(self, filename):
         # We should have our handler now:
         assert len(reader.file_handlers) == 1
 
-        # logger.info("File handlers are: %s", reader.file_handlers)
         assert self.reader_name in reader.file_handlers
 
         handlers = reader.file_handlers[self.reader_name]

From 82f62e52b9ecb95285496ea95886719ca8b266ac Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Wed, 2 Oct 2024 15:34:39 +0200
Subject: [PATCH 38/41] Adding more unit tests.

---
 satpy/readers/iasi_ng_l2_nc.py                |  3 --
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 46 +++++++++++++++++++
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index e2f5631a9c..697eaa0f9a 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -217,9 +217,6 @@ def register_available_datasets(self):
                     ds_name = sub.replace("${VAR_NAME}", var_name)
                     break
 
-            if vpath in self.dataset_aliases:
-                ds_name = self.dataset_aliases[vpath]
-
             unit = desc["attribs"].get("units", None)
             if unit is not None and unit.startswith("seconds since "):
                 # request conversion to datetime:
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index f839aa301f..53f08862e5 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -15,6 +15,7 @@
 
 """Unit tests on the IASI NG L2 reader using the conventional mock constructed context."""
 import os
+import re
 from datetime import datetime
 from unittest import mock
 
@@ -474,6 +475,32 @@ def test_process_variable(self, twv_handler):
         twv_handler.process_variable("group/test_var")
         assert "group/test_var" in twv_handler.variable_desc
 
+    def test_ignore_scalar_variable(self, twv_handler):
+        """Test ignoring of scalar variable."""
+        # Note: we only support variables inside a group,
+        # because this is always the case in the IASI-NG file format.
+        twv_handler.file_content = {
+            "group/test_var/shape": (1, 1),
+            "group/test_var/dimensions": ("x", "y"),
+            "group/test_var/dtype": "float32",
+        }
+        twv_handler.ignored_patterns = []
+        twv_handler.process_variable("group/test_var")
+        assert "group/test_var" not in twv_handler.variable_desc
+
+    def test_ignore_pattern_variable(self, twv_handler):
+        """Test ignoring of pattern in variable."""
+        # Note: we only support variables inside a group,
+        # because this is always the case in the IASI-NG file format.
+        twv_handler.file_content = {
+            "group/test_var/shape": (10, 10),
+            "group/test_var/dimensions": ("x", "y"),
+            "group/test_var/dtype": "float32",
+        }
+        twv_handler.ignored_patterns = [re.compile(r"test_")]
+        twv_handler.process_variable("group/test_var")
+        assert "group/test_var" not in twv_handler.variable_desc
+
     def test_parse_file_content(self, twv_handler):
         """Test the parse_file_content method."""
         twv_handler.file_content = {
@@ -499,6 +526,25 @@ def test_register_available_datasets(self, twv_handler):
         twv_handler.register_available_datasets()
         assert "test_var" in twv_handler.dataset_infos
 
+    def test_ignored_register_available_datasets(self, twv_handler):
+        """Test ignoring register_available_datasets method if done already."""
+        twv_handler.variable_desc = {
+            "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}}
+        }
+        twv_handler.dataset_aliases = {}
+        twv_handler.register_available_datasets()
+        assert "test_var" not in twv_handler.dataset_infos
+
+    def test_register_available_datasets_alias(self, twv_handler):
+        """Test the register_available_datasets method with alias."""
+        twv_handler.dataset_infos = None
+        twv_handler.variable_desc = {
+            "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}}
+        }
+        twv_handler.dataset_aliases = {re.compile(r"var/(.+)$"): "${VAR_NAME}_oem"}
+        twv_handler.register_available_datasets()
+        assert "test_var_oem" in twv_handler.dataset_infos
+
     def test_get_dataset_infos(self, twv_handler):
         """Test the get_dataset_infos method."""
         twv_handler.dataset_infos = {"test_dataset": {"attr": "value"}}

From c1303f351874ca12379a7aef687b5833702df54f Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 4 Oct 2024 07:23:45 +0200
Subject: [PATCH 39/41] Added a few additional unit tests for coverage.

---
 satpy/readers/iasi_ng_l2_nc.py                |  4 +--
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 32 +++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index 697eaa0f9a..a41b526b50 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -363,7 +363,7 @@ def apply_broadcast(self, data_array, ds_info):
         """Apply the broadcast of the data array."""
         dim_name = ds_info["broadcast_on_dim"]
         if dim_name not in self.dimensions_desc:
-            raise ValueError(f"Invalid dimension name {dim_name}")
+            raise KeyError(f"Invalid dimension name {dim_name}")
         rep_count = self.dimensions_desc[dim_name]
 
         # Apply "a repeat operation" with the last dimension size:
@@ -377,7 +377,7 @@ def get_transformed_dataset(self, ds_info):
         vname = ds_info["location"]
 
         if not self.variable_path_exists(vname):
-            raise ValueError(f"Invalid variable path: {vname}")
+            raise KeyError(f"Invalid variable path: {vname}")
 
         # Read the raw variable data from file (this is an xr.DataArray):
         arr = self[vname]
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index 53f08862e5..f502f08212 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -559,6 +559,7 @@ def test_variable_path_exists(self, twv_handler):
         assert twv_handler.variable_path_exists("test_var")
         assert not twv_handler.variable_path_exists("/attr/test_var")
         assert not twv_handler.variable_path_exists("test_var/dtype")
+        assert not twv_handler.variable_path_exists("/grp/a_non_existing_var")
 
     def test_convert_data_type(self, twv_handler):
         """Test the convert_data_type method."""
@@ -581,6 +582,17 @@ def test_apply_fill_value(self, twv_handler):
         assert np.isnan(result[0])
         assert np.isnan(result[4])
 
+    def test_apply_fill_value_range(self, twv_handler):
+        """Test the apply_fill_value method with range."""
+        data = xr.DataArray(
+            np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
+            attrs={"valid_range": [2.0, 4.0]},
+        )
+        result = twv_handler.apply_fill_value(data)
+        assert np.isnan(result[0])
+        assert np.isfinite(result[1])
+        assert np.isnan(result[4])
+
     def test_apply_rescaling(self, twv_handler):
         """Test the apply_rescaling method."""
         data = xr.DataArray(
@@ -612,6 +624,15 @@ def test_apply_broadcast(self, twv_handler):
         result = twv_handler.apply_broadcast(data, ds_info)
         assert result.shape == (6,)
 
+    def test_apply_broadcast_failure(self, twv_handler):
+        """Test the apply_broadcast method fails on missing dim."""
+        data = xr.DataArray(np.array([1, 2, 3]), dims=("x",))
+        twv_handler.dimensions_desc = {}
+        ds_info = {"broadcast_on_dim": "n_fov"}
+
+        with pytest.raises(KeyError):
+            twv_handler.apply_broadcast(data, ds_info)
+
     def test_get_transformed_dataset(self, twv_handler):
         """Test the get_transformed_dataset method."""
         ds_info = {
@@ -630,6 +651,17 @@ def test_get_transformed_dataset(self, twv_handler):
         assert result.shape == (1, 4)
         assert result.dtype == "datetime64[ns]"
 
+    def test_get_transformed_dataset_failure(self, twv_handler):
+        """Test the get_transformed_dataset method fails on invalid path."""
+        ds_info = {
+            "location": "test_var",
+            "seconds_since_epoch": "2000-01-01 00:00:00",
+            "broadcast_on_dim": "n_fov",
+        }
+
+        with pytest.raises(KeyError):
+            twv_handler.get_transformed_dataset(ds_info)
+
     def test_get_dataset(self, twv_handler):
         """Test the get_dataset method."""
         twv_handler.dataset_infos = {

From 2dc7811f4379926b9f01e093b5b1a5090940a208 Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Thu, 5 Dec 2024 09:35:21 +0100
Subject: [PATCH 40/41] Reduced comments in iasi_ng_L2_nc.py file

---
 satpy/readers/iasi_ng_l2_nc.py | 134 +++++++++++++++------------------
 1 file changed, 61 insertions(+), 73 deletions(-)

diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
index a41b526b50..5d6184c2bf 100644
--- a/satpy/readers/iasi_ng_l2_nc.py
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -46,28 +46,16 @@ def __init__(self, filename, filename_info, filetype_info, **kwargs):
 
         self.sensors = {"iasi_ng"}
 
-        # List of datasets provided by this handler:
         self.dataset_infos = None
-
-        # Description of the available variables:
         self.variable_desc = {}
-
-        # Description of the available dimensions:
         self.dimensions_desc = {}
 
-        # Ignored variable patterns:
         patterns = self.filetype_info.get("ignored_patterns", [])
         self.ignored_patterns = [re.compile(pstr) for pstr in patterns]
 
-        # dataset aliases:
-        self.dataset_aliases = self.filetype_info.get("dataset_aliases", {})
-
-        # Transform the aliases on regex patterns:
-        self.dataset_aliases = {
-            re.compile(key): val for key, val in self.dataset_aliases.items()
-        }
+        aliases = self.filetype_info.get("dataset_aliases", {})
+        self.dataset_aliases = {re.compile(key): val for key, val in aliases.items()}
 
-        # broadcasts timestamps flag:
         self.broadcast_timestamps = self.filetype_info.get("broadcast_timestamps", False)
 
         self.register_available_datasets()
@@ -100,9 +88,9 @@ def _collect_groups_info(self, base_name, obj):
     def available_datasets(self, configured_datasets=None):
         """Determine automatically the datasets provided by this file.
 
-        Uses a per product type dataset registration mechanism.
+        First yield on any element from the provided configured_datasets,
+        and then continues with the internally provided datasets.
         """
-        # pass along existing datasets
         for is_avail, ds_info in configured_datasets or []:
             yield is_avail, ds_info
 
@@ -124,45 +112,49 @@ def register_dataset(self, ds_name, desc):
 
         self.dataset_infos[ds_name] = ds_infos
 
+    def same_dim_names_for_different_groups(self, dim_name, value):
+        """Check if we already have this dim_name registered from another group."""
+        return (
+            dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value
+        )
+
     def process_dimension(self, key, value):
         """Process a dimension entry from the file_content."""
         dim_name = key.split("/")[-1]
 
-        if dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value:
-            # This might happen if we have the same dim name from different groups:
+        if self.same_dim_names_for_different_groups(dim_name, value):
             raise KeyError(f"Detected duplicated dim name: {dim_name}")
 
         self.dimensions_desc[dim_name] = value
 
+    def has_variable_desc(self, var_path):
+        """Check if a given variable path is available."""
+        return var_path in self.variable_desc
+
     def process_attribute(self, key, value):
         """Process a attribute entry from the file_content."""
         var_path, aname = key.split("/attr/")
 
-        if var_path not in self.variable_desc:
-            # maybe this variable is ignored, or this is a group attr.
+        if not self.has_variable_desc(var_path):
             return
 
         self.variable_desc[var_path]["attribs"][aname] = value
 
-    def process_variable(self, key):
-        """Process a variable entry from the file_content."""
-        shape = self.file_content[f"{key}/shape"]
+    def has_at_most_one_element(self, shape):
+        """Check if a shape corresponds to an array with at most 1 element."""
+        return np.prod(shape) <= 1
 
-        if np.prod(shape) <= 1:
-            # Ignoring scalar variable.
-            return
+    def is_variable_ignored(self, var_name):
+        """Check if a variable should be ignored."""
+        return any(p.search(var_name) is not None for p in self.ignored_patterns)
 
-        # Check if this variable should be ignored:
-        if any(p.search(key) is not None for p in self.ignored_patterns):
-            # Ignoring variable on user request.
-            return
-
-        # Prepare a description for this variable:
+    def prepare_variable_description(self, key, shape):
+        """Prepare a description for a given variable."""
         prefix, var_name = key.rsplit("/", 1)
         dims = self.file_content[f"{key}/dimensions"]
         dtype = self.file_content[f"{key}/dtype"]
 
-        desc = {
+        return {
             "location": key,
             "prefix": prefix,
             "var_name": var_name,
@@ -172,7 +164,17 @@ def process_variable(self, key):
             "attribs": {},
         }
 
-        self.variable_desc[key] = desc
+    def process_variable(self, key):
+        """Process a variable entry from the file_content."""
+        shape = self.file_content[f"{key}/shape"]
+
+        if self.has_at_most_one_element(shape):
+            return
+
+        if self.is_variable_ignored(key):
+            return
+
+        self.variable_desc[key] = self.prepare_variable_description(key, shape)
 
     def parse_file_content(self):
         """Parse the file_content to discover the available datasets and dimensions."""
@@ -186,44 +188,38 @@ def parse_file_content(self):
                 self.process_attribute(key, val)
                 continue
 
-            # if isinstance(val, netCDF4.Variable):
             if f"{key}/shape" in self.file_content:
                 self.process_variable(key)
                 continue
 
+    def check_variable_alias(self, vpath, ds_name):
+        """Check if a variable path matches an alias pattern."""
+        for pat, sub in self.dataset_aliases.items():
+            match = pat.search(vpath)
+            if match:
+                var_name = match.group(1)
+                return sub.replace("${VAR_NAME}", var_name)
+
+        return ds_name
+
     def register_available_datasets(self):
         """Register the available dataset in the current product file."""
         if self.dataset_infos is not None:
-            # Datasets are already registered.
             return
 
-        # Otherwise, we need to perform the registration:
         self.dataset_infos = {}
 
-        # Parse the file content:
         self.parse_file_content()
 
         for vpath, desc in self.variable_desc.items():
-            # Check if we have an alias for this variable:
             ds_name = desc["var_name"]
-
-            # Check if this variable path matches an alias pattern:
-            for pat, sub in self.dataset_aliases.items():
-                # Search for the pattern in the input string
-                match = pat.search(vpath)
-                if match:
-                    # Extract the captured group(s)
-                    var_name = match.group(1)
-                    ds_name = sub.replace("${VAR_NAME}", var_name)
-                    break
+            ds_name = self.check_variable_alias(vpath, ds_name)
 
             unit = desc["attribs"].get("units", None)
             if unit is not None and unit.startswith("seconds since "):
-                # request conversion to datetime:
                 desc["seconds_since_epoch"] = unit.replace("seconds since ", "")
 
             if self.broadcast_timestamps and desc["var_name"] == "onboard_utc":
-                # Broadcast on the "n_fov" dimension:
                 desc["broadcast_on_dim"] = "n_fov"
 
             self.register_dataset(ds_name, desc)
@@ -235,24 +231,30 @@ def get_dataset_infos(self, ds_name):
 
         return self.dataset_infos[ds_name]
 
+    def is_attribute_path(self, var_path):
+        """Check if a given path is a root attribute path."""
+        return var_path.startswith("/attr")
+
+    def is_property_path(self, var_path):
+        """Check if a given path is a sub-property path."""
+        return var_path.endswith(("/dtype", "/shape", "/dimensions"))
+
+    def is_netcdf_group(self, obj):
+        """Check if a given object is a netCDF group."""
+        return isinstance(obj, netCDF4.Group)
+
     def variable_path_exists(self, var_path):
         """Check if a given variable path is available in the underlying netCDF file.
 
         All we really need to do here is to access the file_content dictionary
         and check if we have a variable under that var_path key.
         """
-        # but we ignore attributes: or sub properties:
-        if var_path.startswith("/attr") or var_path.endswith(
-            ("/dtype", "/shape", "/dimensions")
-        ):
+        if self.is_attribute_path(var_path) or self.is_property_path(var_path):
             return False
 
-        # Check if the path is found:
         if var_path in self.file_content:
-            # This is only a valid variable if it is not a netcdf group:
-            return not isinstance(self.file_content[var_path], netCDF4.Group)
+            return not self.is_netcdf_group(self.file_content[var_path])
 
-        # Var path not in file_content:
         return False
 
     def convert_data_type(self, data_array, dtype="auto"):
@@ -278,13 +280,11 @@ def apply_fill_value(self, data_array):
         """Apply the rescaling transform on a given array."""
         dtype = np.dtype(data_array.dtype).name
         if dtype not in ["float32", "float64"]:
-            # We won't be able to use NaN in the other cases:
             return data_array
 
         nan_val = np.nan if dtype == "float64" else np.float32(np.nan)
         attribs = data_array.attrs
 
-        # Apply the min/max valid range:
         if "valid_min" in attribs:
             vmin = attribs["valid_min"]
             data_array = data_array.where(data_array >= vmin, other=nan_val)
@@ -298,7 +298,6 @@ def apply_fill_value(self, data_array):
             data_array = data_array.where(data_array >= vrange[0], other=nan_val)
             data_array = data_array.where(data_array <= vrange[1], other=nan_val)
 
-        # Check the missing value:
         missing_val = attribs.get("missing_value", None)
         missing_val = attribs.get("_FillValue", missing_val)
 
@@ -309,7 +308,6 @@ def apply_fill_value(self, data_array):
 
     def apply_rescaling(self, data_array):
         """Apply the rescaling transform on a given array."""
-        # Check if we have the scaling elements:
         attribs = data_array.attrs
         if "scale_factor" in attribs or "add_offset" in attribs:
             scale_factor = attribs.setdefault("scale_factor", 1)
@@ -317,7 +315,6 @@ def apply_rescaling(self, data_array):
 
             data_array = (data_array * scale_factor) + add_offset
 
-            # rescale the valid range accordingly
             for key in ["valid_range", "valid_min", "valid_max"]:
                 if key in attribs:
                     attribs[key] = attribs[key] * scale_factor + add_offset
@@ -366,23 +363,19 @@ def apply_broadcast(self, data_array, ds_info):
             raise KeyError(f"Invalid dimension name {dim_name}")
         rep_count = self.dimensions_desc[dim_name]
 
-        # Apply "a repeat operation" with the last dimension size:
         data_array = xr.concat([data_array] * rep_count, dim=data_array.dims[-1])
 
         return data_array
 
     def get_transformed_dataset(self, ds_info):
         """Retrieve a dataset with all transformations applied on it."""
-        # Extract location:
         vname = ds_info["location"]
 
         if not self.variable_path_exists(vname):
             raise KeyError(f"Invalid variable path: {vname}")
 
-        # Read the raw variable data from file (this is an xr.DataArray):
         arr = self[vname]
 
-        # Apply the transformations:
         arr = self.convert_data_type(arr)
         arr = self.apply_fill_value(arr)
         arr = self.apply_rescaling(arr)
@@ -400,19 +393,14 @@ def get_dataset(self, dataset_id, ds_info=None):
         """Get a dataset."""
         ds_name = dataset_id["name"]
 
-        # In case this dataset name is not explicitly provided by this file
-        # handler then we should simply return None.
         if ds_name not in self.dataset_infos:
             return None
 
-        # Retrieve default infos if missing:
         if ds_info is None:
             ds_info = self.get_dataset_infos(ds_name)
 
         ds_name = ds_info["name"]
 
-        # Retrieve the transformed data array:
         data_array = self.get_transformed_dataset(ds_info)
 
-        # Return the resulting array:
         return data_array

From 5e67f7a25d20c0b71398f51d4a96e609d0bd05dc Mon Sep 17 00:00:00 2001
From: GMV - Emmanuel Roche <eroche@gmv.com>
Date: Fri, 6 Dec 2024 08:52:33 +0100
Subject: [PATCH 41/41] Removing all excessive code comments from
 test_iasi_ng_l2_nc.py

---
 .../tests/reader_tests/test_iasi_ng_l2_nc.py  | 57 ++++---------------
 1 file changed, 10 insertions(+), 47 deletions(-)

diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
index f502f08212..23d199ebf6 100644
--- a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -126,32 +126,31 @@ class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
     chunks = (10, 10, 10)
 
     def inject_missing_value(self, dask_array, attribs):
-        """Inject missing value in data array."""
-        # Force setting a few elements to this missing value (but still lazily with
-        # dask map_blocks function)
+        """Inject missing value in data array.
+
+        This method is used to randomly set a few elements of the in the
+        input array to the configured missing value read from the attribs
+        dict. This operation is done lazily using the dask map_blocks
+        function.
+        """
         missing_value = attribs["missing_value"]
 
-        # Ratio of elements to replace with missing value:
         missing_ratio = 0.05
 
         def set_missing_values(block):
-            # Generate random indices to set as missing values within this block
             block_shape = block.shape
 
             block_size = np.prod(block_shape)
             block_num_to_replace = int(block_size * missing_ratio)
 
-            # Create a Generator instance
             rng = np.random.default_rng()
 
-            # Generate unique random indices to set as missing values within this block
             flat_indices = rng.choice(block_size, block_num_to_replace, replace=False)
             unraveled_indices = np.unravel_index(flat_indices, block_shape)
             block[unraveled_indices] = missing_value
 
             return block
 
-        # Apply the function lazily to each block
         dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype)
 
         return dask_array
@@ -173,14 +172,11 @@ def generate_dask_array(self, dims, dtype, attribs):
             if dtype == "float32":
                 dask_array = dask_array.astype(np.float32)
 
-            # Max flaot32 value is 3.4028235e38, we use a smaller value
-            # by default here:
             vmin = attribs.get("valid_min", -1e16)
             vmax = attribs.get("valid_max", 1e16)
             rand_min = vmin - (vmax - vmin) * 0.1
             rand_max = vmax + (vmax - vmin) * 0.1
 
-            # Scale and shift to the desired range [min_val, max_val]
             dask_array = dask_array * (rand_max - rand_min) + rand_min
         else:
             raise ValueError(f"Unsupported data type: {dtype}")
@@ -189,21 +185,17 @@ def generate_dask_array(self, dims, dtype, attribs):
 
     def add_rand_data(self, desc):
         """Build a random DataArray from a given description."""
-        # Create a lazy dask array with random int32 values
         dtype = desc.get("data_type", "int32")
         dims = desc["dims"]
         attribs = desc["attribs"]
         key = desc["key"]
 
         dask_array = self.generate_dask_array(dims, dtype, attribs)
-        # Define the chunk size for dask array
 
         if "missing_value" in attribs:
             dask_array = self.inject_missing_value(dask_array, attribs)
 
-        # Wrap the dask array with xarray.DataArray
         data_array = xr.DataArray(dask_array, dims=dims)
-
         data_array.attrs.update(attribs)
 
         self.content[key] = data_array
@@ -218,9 +210,9 @@ def get_valid_attribs(self, anames, alist):
         """Retrieve only the valid attributes from a list."""
         attribs = {}
         for idx, val in enumerate(alist):
-            # AEnsure we do not inject "None" attribute values:
             if val is not None:
                 attribs[anames[idx]] = val
+
         return attribs
 
     def get_test_content(self, _filename, _filename_info, _filetype_info):
@@ -236,15 +228,10 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
 
         self.content = {}
 
-        # Note: below we use the full range of int32 to generate the random
-        # values, we expect the handler to "fix" out of range values replacing
-        # them with NaNs.
         for grp_desc in DATA_DESC:
-            # get the group prefix:
             prefix = grp_desc["group"]
             anames = grp_desc["attrs"]
             for vname, vdesc in grp_desc["variables"].items():
-                # For each variable we create a dataset descriptor:
                 attribs = self.get_valid_attribs(anames, vdesc[2])
 
                 desc = {
@@ -256,7 +243,6 @@ def get_test_content(self, _filename, _filename_info, _filetype_info):
 
                 self.add_rand_data(desc)
 
-        # We should also register the dimensions as we will need access to "n_fov" for instance:
         for key, val in self.dims.items():
             self.content[f"data/dimension/{key}"] = val
 
@@ -305,21 +291,17 @@ def _create_file_handler(self, filename):
         """Create an handler for the given file checking that it can be parsed correctly."""
         reader = load_reader(self.reader_configs)
 
-        # Test if the file is recognized by the reader
         files = reader.select_files_from_pathnames([filename])
         assert len(files) == 1, "File should be recognized by the reader"
 
-        # Create the file handler:
         reader.create_filehandlers(files)
 
-        # We should have our handler now:
         assert len(reader.file_handlers) == 1
 
         assert self.reader_name in reader.file_handlers
 
         handlers = reader.file_handlers[self.reader_name]
 
-        # We should have a single handler for a single file:
         assert len(handlers) == 1
         assert isinstance(handlers[0], IASINGL2NCFileHandler)
 
@@ -327,7 +309,6 @@ def _create_file_handler(self, filename):
 
     def test_filename_matching(self):
         """Test filename matching against some random name."""
-        # Example filename
         prefix = "W_fr-meteo-sat,GRAL,MTI1-IASING-2"
         suffix = (
             "C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc"
@@ -338,10 +319,9 @@ def test_filename_matching(self):
 
     def test_real_filename_matching(self):
         """Test that we will match an actual IASI NG L2 product file name."""
-        # Below we test the TWV,CLD,GHG,SFC,O3_ and CO_ products:
-        ptypes = ["TWV", "CLD", "GHG", "SFC", "O3_", "CO_"]
+        supported_types = ["TWV", "CLD", "GHG", "SFC", "O3_", "CO_"]
 
-        for ptype in ptypes:
+        for ptype in supported_types:
             filename = f"{self.file_prefix}-{ptype}_{self.file_suffix}"
             handler = self._create_file_handler(filename)
 
@@ -388,7 +368,6 @@ def test_latlon_datasets(self, twv_scene):
         lat = twv_scene["sounder_pixel_latitude"]
         lon = twv_scene["sounder_pixel_longitude"]
 
-        # Should be 2D now:
         assert len(lat.dims) == 2
         assert len(lon.dims) == 2
         assert lat.dims[0] == "x"
@@ -396,15 +375,12 @@ def test_latlon_datasets(self, twv_scene):
         assert lon.dims[0] == "x"
         assert lon.dims[1] == "y"
 
-        # Should have been converted to float64:
         assert lat.dtype == np.float64
         assert lon.dtype == np.float64
 
-        # All valid lat values should be in range [-90.0,90.0]
         assert np.nanmin(lat) >= -90.0
         assert np.nanmax(lat) <= 90.0
 
-        # All valid lon values should be in range [-180.0,180.0]
         assert np.nanmin(lon) >= -180.0
         assert np.nanmax(lon) <= 180.0
 
@@ -413,15 +389,12 @@ def test_onboard_utc_dataset(self, twv_scene):
         twv_scene.load(["onboard_utc", "sounder_pixel_latitude"])
         dset = twv_scene["onboard_utc"]
 
-        # Should be 2D now:
         assert len(dset.dims) == 2
         assert dset.dims[0] == "x"
         assert dset.dims[1] == "y"
 
-        # Should have been converted to datetime:
         assert dset.dtype == np.dtype("datetime64[ns]")
 
-        # Should also have the same size as "lattitude" for instance:
         lat = twv_scene["sounder_pixel_latitude"]
 
         assert lat.shape == dset.shape
@@ -432,7 +405,6 @@ def test_nbr_iterations_dataset(self, twv_scene):
         dset = twv_scene["nbr_iterations"]
 
         assert len(dset.dims) == 2
-        # Should still be in int32 type:
         assert dset.dtype == np.int32
 
     def test_register_dataset(self, twv_handler):
@@ -442,7 +414,6 @@ def test_register_dataset(self, twv_handler):
         assert "test_dataset" in twv_handler.dataset_infos
         assert twv_handler.dataset_infos["test_dataset"]["attr"] == "value"
 
-        # Should refuse to register the same dataset again afterwards:
         with pytest.raises(KeyError):
             twv_handler.register_dataset("test_dataset", {"attr": "new_value"})
 
@@ -452,7 +423,6 @@ def test_process_dimension(self, twv_handler):
         twv_handler.process_dimension("group/dimension/test_dim", 10)
         assert twv_handler.dimensions_desc["test_dim"] == 10
 
-        # Should refuse to process the same dim again:
         with pytest.raises(KeyError):
             twv_handler.process_dimension("group/dimension/test_dim", 20)
 
@@ -464,8 +434,6 @@ def test_process_attribute(self, twv_handler):
 
     def test_process_variable(self, twv_handler):
         """Test the process_variable method."""
-        # Note: we only support variables inside a group,
-        # because this is always the case in the IASI-NG file format.
         twv_handler.file_content = {
             "group/test_var/shape": (10, 10),
             "group/test_var/dimensions": ("x", "y"),
@@ -477,8 +445,6 @@ def test_process_variable(self, twv_handler):
 
     def test_ignore_scalar_variable(self, twv_handler):
         """Test ignoring of scalar variable."""
-        # Note: we only support variables inside a group,
-        # because this is always the case in the IASI-NG file format.
         twv_handler.file_content = {
             "group/test_var/shape": (1, 1),
             "group/test_var/dimensions": ("x", "y"),
@@ -490,8 +456,6 @@ def test_ignore_scalar_variable(self, twv_handler):
 
     def test_ignore_pattern_variable(self, twv_handler):
         """Test ignoring of pattern in variable."""
-        # Note: we only support variables inside a group,
-        # because this is always the case in the IASI-NG file format.
         twv_handler.file_content = {
             "group/test_var/shape": (10, 10),
             "group/test_var/dimensions": ("x", "y"),
@@ -567,7 +531,6 @@ def test_convert_data_type(self, twv_handler):
         result = twv_handler.convert_data_type(data, dtype="float64")
         assert result.dtype == np.float64
 
-        # Should stay as f32 if dtype is auto:
         data.attrs["scale_factor"] = 2.0
         result = twv_handler.convert_data_type(data, dtype="auto")
         assert result.dtype == np.float32