diff --git a/satpy/etc/readers/iasi_ng_l2_nc.yaml b/satpy/etc/readers/iasi_ng_l2_nc.yaml
new file mode 100644
index 0000000000..d95345eb6d
--- /dev/null
+++ b/satpy/etc/readers/iasi_ng_l2_nc.yaml
@@ -0,0 +1,30 @@
+reader:
+ name: iasi_ng_l2_nc
+ short_name: IASI-NG L2 NC Reader
+ long_name: IASI-NG Level-2 NetCDF Reader
+ description: Reader for IASI-NG Level-2 NetCDF files
+ sensors: [iasi]
+ reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
+ status: Alpha
+ supports_fsspec: false
+
+file_types:
+ iasi_ng_l2_nc:
+ file_reader: !!python/name:satpy.readers.iasi_ng_l2_nc.IASINGL2NCFileHandler
+ file_patterns:
+ [
+ "{pflag}_{country}-{organization}-{location},{data_designator},{spacecraft}-{instrument}-{processing_level}-{product_type:3s}_{oflag}_{originator}_{generation_time}_{mission_type}_{environment}_{sensing_start_time:%Y%m%d%H%M%S}_{sensing_end_time:%Y%m%d%H%M%S}_{disposition_mode}_{processing_mode}_{free_text1}_{free_text2}_{free_text3}_{extension}.nc",
+ ]
+
+ # keeping optimal_estimation folder for the moment due to feature freeze,
+ # but it might be necessary to re-introduce this in a near future.
+ # So keeping this reference entry here for now.
+ # ignored_patterns:
+ # - /optimal_estimation/ # cf. eum respase2 task2 issue #4
+
+ dataset_aliases:
+ "/optimal_estimation/(.+)$": "${VAR_NAME}_oem"
+
+ # Define if we should broadcast the timestamps from
+ # dims (n_lines, n_for) to (n_lines, n_for, n_fov):
+ broadcast_timestamps: true
diff --git a/satpy/readers/iasi_ng_l2_nc.py b/satpy/readers/iasi_ng_l2_nc.py
new file mode 100644
index 0000000000..5d6184c2bf
--- /dev/null
+++ b/satpy/readers/iasi_ng_l2_nc.py
@@ -0,0 +1,406 @@
+# Copyright (c) 2017-2023 Satpy developers
+#
+# This file is part of satpy.
+#
+# satpy is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# satpy is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# satpy. If not, see .
+
+"""IASI-NG L2 reader implementation.
+
+This reader supports reading all the products from the IASI-NG L2 processing
+level:
+
+* IASI-L2-TWV
+* IASI-L2-CLD
+* IASI-L2-GHG
+* IASI-L2-SFC
+* IASI-L2-O3
+* IASI-L2-CO
+"""
+
+import re
+
+import netCDF4
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+from .netcdf_utils import NetCDF4FsspecFileHandler
+
+
+class IASINGL2NCFileHandler(NetCDF4FsspecFileHandler):
+ """Reader for IASI-NG L2 products in NetCDF format."""
+
+ def __init__(self, filename, filename_info, filetype_info, **kwargs):
+ """Initialize object."""
+ super().__init__(filename, filename_info, filetype_info, **kwargs)
+
+ self.sensors = {"iasi_ng"}
+
+ self.dataset_infos = None
+ self.variable_desc = {}
+ self.dimensions_desc = {}
+
+ patterns = self.filetype_info.get("ignored_patterns", [])
+ self.ignored_patterns = [re.compile(pstr) for pstr in patterns]
+
+ aliases = self.filetype_info.get("dataset_aliases", {})
+ self.dataset_aliases = {re.compile(key): val for key, val in aliases.items()}
+
+ self.broadcast_timestamps = self.filetype_info.get("broadcast_timestamps", False)
+
+ self.register_available_datasets()
+
+ @property
+ def start_time(self):
+ """Get the start time."""
+ return self.filename_info["sensing_start_time"]
+
+ @property
+ def end_time(self):
+ """Get the end time."""
+ return self.filename_info["sensing_end_time"]
+
+ @property
+ def sensor_names(self):
+ """List of sensors represented in this file."""
+ return self.sensors
+
+ # Note: patching the collect_groups_info method below to
+ # also collect dimensions in sub groups.
+ def _collect_groups_info(self, base_name, obj):
+ for group_name, group_obj in obj.groups.items():
+ full_group_name = base_name + group_name
+ self.file_content[full_group_name] = group_obj
+ self._collect_attrs(full_group_name, group_obj)
+ self.collect_metadata(full_group_name, group_obj)
+ self.collect_dimensions(full_group_name, group_obj)
+
+ def available_datasets(self, configured_datasets=None):
+ """Determine automatically the datasets provided by this file.
+
+ First yield on any element from the provided configured_datasets,
+ and then continues with the internally provided datasets.
+ """
+ for is_avail, ds_info in configured_datasets or []:
+ yield is_avail, ds_info
+
+ for _, ds_info in self.dataset_infos.items():
+ yield True, ds_info
+
+ def register_dataset(self, ds_name, desc):
+ """Register a simple dataset given its name and a desc dict."""
+ if ds_name in self.dataset_infos:
+ raise KeyError(f"Dataset for {ds_name} already registered.")
+
+ ds_infos = {
+ "name": ds_name,
+ "sensor": "iasi_ng",
+ "file_type": self.filetype_info["file_type"],
+ }
+
+ ds_infos.update(desc)
+
+ self.dataset_infos[ds_name] = ds_infos
+
+ def same_dim_names_for_different_groups(self, dim_name, value):
+ """Check if we already have this dim_name registered from another group."""
+ return (
+ dim_name in self.dimensions_desc and self.dimensions_desc[dim_name] != value
+ )
+
+ def process_dimension(self, key, value):
+ """Process a dimension entry from the file_content."""
+ dim_name = key.split("/")[-1]
+
+ if self.same_dim_names_for_different_groups(dim_name, value):
+ raise KeyError(f"Detected duplicated dim name: {dim_name}")
+
+ self.dimensions_desc[dim_name] = value
+
+ def has_variable_desc(self, var_path):
+ """Check if a given variable path is available."""
+ return var_path in self.variable_desc
+
+ def process_attribute(self, key, value):
+ """Process a attribute entry from the file_content."""
+ var_path, aname = key.split("/attr/")
+
+ if not self.has_variable_desc(var_path):
+ return
+
+ self.variable_desc[var_path]["attribs"][aname] = value
+
+ def has_at_most_one_element(self, shape):
+ """Check if a shape corresponds to an array with at most 1 element."""
+ return np.prod(shape) <= 1
+
+ def is_variable_ignored(self, var_name):
+ """Check if a variable should be ignored."""
+ return any(p.search(var_name) is not None for p in self.ignored_patterns)
+
+ def prepare_variable_description(self, key, shape):
+ """Prepare a description for a given variable."""
+ prefix, var_name = key.rsplit("/", 1)
+ dims = self.file_content[f"{key}/dimensions"]
+ dtype = self.file_content[f"{key}/dtype"]
+
+ return {
+ "location": key,
+ "prefix": prefix,
+ "var_name": var_name,
+ "shape": shape,
+ "dtype": f"{dtype}",
+ "dims": dims,
+ "attribs": {},
+ }
+
+ def process_variable(self, key):
+ """Process a variable entry from the file_content."""
+ shape = self.file_content[f"{key}/shape"]
+
+ if self.has_at_most_one_element(shape):
+ return
+
+ if self.is_variable_ignored(key):
+ return
+
+ self.variable_desc[key] = self.prepare_variable_description(key, shape)
+
+ def parse_file_content(self):
+ """Parse the file_content to discover the available datasets and dimensions."""
+ for key, val in self.file_content.items():
+
+ if "/dimension/" in key:
+ self.process_dimension(key, val)
+ continue
+
+ if "/attr/" in key:
+ self.process_attribute(key, val)
+ continue
+
+ if f"{key}/shape" in self.file_content:
+ self.process_variable(key)
+ continue
+
+ def check_variable_alias(self, vpath, ds_name):
+ """Check if a variable path matches an alias pattern."""
+ for pat, sub in self.dataset_aliases.items():
+ match = pat.search(vpath)
+ if match:
+ var_name = match.group(1)
+ return sub.replace("${VAR_NAME}", var_name)
+
+ return ds_name
+
+ def register_available_datasets(self):
+ """Register the available dataset in the current product file."""
+ if self.dataset_infos is not None:
+ return
+
+ self.dataset_infos = {}
+
+ self.parse_file_content()
+
+ for vpath, desc in self.variable_desc.items():
+ ds_name = desc["var_name"]
+ ds_name = self.check_variable_alias(vpath, ds_name)
+
+ unit = desc["attribs"].get("units", None)
+ if unit is not None and unit.startswith("seconds since "):
+ desc["seconds_since_epoch"] = unit.replace("seconds since ", "")
+
+ if self.broadcast_timestamps and desc["var_name"] == "onboard_utc":
+ desc["broadcast_on_dim"] = "n_fov"
+
+ self.register_dataset(ds_name, desc)
+
+ def get_dataset_infos(self, ds_name):
+ """Retrieve the dataset infos corresponding to one of the registered datasets."""
+ if ds_name not in self.dataset_infos:
+ raise KeyError(f"No dataset registered for {ds_name}")
+
+ return self.dataset_infos[ds_name]
+
+ def is_attribute_path(self, var_path):
+ """Check if a given path is a root attribute path."""
+ return var_path.startswith("/attr")
+
+ def is_property_path(self, var_path):
+ """Check if a given path is a sub-property path."""
+ return var_path.endswith(("/dtype", "/shape", "/dimensions"))
+
+ def is_netcdf_group(self, obj):
+ """Check if a given object is a netCDF group."""
+ return isinstance(obj, netCDF4.Group)
+
+ def variable_path_exists(self, var_path):
+ """Check if a given variable path is available in the underlying netCDF file.
+
+ All we really need to do here is to access the file_content dictionary
+ and check if we have a variable under that var_path key.
+ """
+ if self.is_attribute_path(var_path) or self.is_property_path(var_path):
+ return False
+
+ if var_path in self.file_content:
+ return not self.is_netcdf_group(self.file_content[var_path])
+
+ return False
+
+ def convert_data_type(self, data_array, dtype="auto"):
+ """Convert the data type if applicable."""
+ attribs = data_array.attrs
+
+ cur_dtype = np.dtype(data_array.dtype).name
+
+ if dtype == "auto" and cur_dtype in ["float32", "float64"]:
+ dtype = cur_dtype
+
+ to_float = "scale_factor" in attribs or "add_offset" in attribs
+
+ if dtype == "auto":
+ dtype = "float64" if to_float else cur_dtype
+
+ if cur_dtype != dtype:
+ data_array = data_array.astype(dtype)
+
+ return data_array
+
+ def apply_fill_value(self, data_array):
+ """Apply the rescaling transform on a given array."""
+ dtype = np.dtype(data_array.dtype).name
+ if dtype not in ["float32", "float64"]:
+ return data_array
+
+ nan_val = np.nan if dtype == "float64" else np.float32(np.nan)
+ attribs = data_array.attrs
+
+ if "valid_min" in attribs:
+ vmin = attribs["valid_min"]
+ data_array = data_array.where(data_array >= vmin, other=nan_val)
+
+ if "valid_max" in attribs:
+ vmax = attribs["valid_max"]
+ data_array = data_array.where(data_array <= vmax, other=nan_val)
+
+ if "valid_range" in attribs:
+ vrange = attribs["valid_range"]
+ data_array = data_array.where(data_array >= vrange[0], other=nan_val)
+ data_array = data_array.where(data_array <= vrange[1], other=nan_val)
+
+ missing_val = attribs.get("missing_value", None)
+ missing_val = attribs.get("_FillValue", missing_val)
+
+ if missing_val is None:
+ return data_array
+
+ return data_array.where(data_array != missing_val, other=nan_val)
+
+ def apply_rescaling(self, data_array):
+ """Apply the rescaling transform on a given array."""
+ attribs = data_array.attrs
+ if "scale_factor" in attribs or "add_offset" in attribs:
+ scale_factor = attribs.setdefault("scale_factor", 1)
+ add_offset = attribs.setdefault("add_offset", 0)
+
+ data_array = (data_array * scale_factor) + add_offset
+
+ for key in ["valid_range", "valid_min", "valid_max"]:
+ if key in attribs:
+ attribs[key] = attribs[key] * scale_factor + add_offset
+
+ data_array.attrs.update(attribs)
+
+ return data_array
+
+ def apply_reshaping(self, data_array):
+ """Apply the reshaping transform on a given IASI-NG data array.
+
+ Those arrays may come as 3D array, in which case we collapse the
+ last 2 dimensions on a single axis (ie. the number of columns or "y")
+
+ In the process, we also rename the first axis to "x"
+ """
+ if len(data_array.dims) > 2:
+ data_array = data_array.stack(y=(data_array.dims[1:]))
+
+ if data_array.dims[0] != "x":
+ data_array = data_array.rename({data_array.dims[0]: "x"})
+
+ if data_array.dims[1] != "y":
+ data_array = data_array.rename({data_array.dims[1]: "y"})
+
+ return data_array
+
+ def convert_to_datetime(self, data_array, ds_info):
+ """Convert the data to datetime values."""
+ epoch = ds_info["seconds_since_epoch"]
+
+ # Note: converting the time values to ns precision to avoid warnings
+ # from panda+numpy:
+ data_array = xr.DataArray(
+ data=pd.to_datetime(epoch) + (data_array * 1e9).astype("timedelta64[ns]"),
+ dims=data_array.dims,
+ attrs=data_array.attrs,
+ )
+
+ return data_array
+
+ def apply_broadcast(self, data_array, ds_info):
+ """Apply the broadcast of the data array."""
+ dim_name = ds_info["broadcast_on_dim"]
+ if dim_name not in self.dimensions_desc:
+ raise KeyError(f"Invalid dimension name {dim_name}")
+ rep_count = self.dimensions_desc[dim_name]
+
+ data_array = xr.concat([data_array] * rep_count, dim=data_array.dims[-1])
+
+ return data_array
+
+ def get_transformed_dataset(self, ds_info):
+ """Retrieve a dataset with all transformations applied on it."""
+ vname = ds_info["location"]
+
+ if not self.variable_path_exists(vname):
+ raise KeyError(f"Invalid variable path: {vname}")
+
+ arr = self[vname]
+
+ arr = self.convert_data_type(arr)
+ arr = self.apply_fill_value(arr)
+ arr = self.apply_rescaling(arr)
+ arr = self.apply_reshaping(arr)
+
+ if "seconds_since_epoch" in ds_info:
+ arr = self.convert_to_datetime(arr, ds_info)
+
+ if "broadcast_on_dim" in ds_info:
+ arr = self.apply_broadcast(arr, ds_info)
+
+ return arr
+
+ def get_dataset(self, dataset_id, ds_info=None):
+ """Get a dataset."""
+ ds_name = dataset_id["name"]
+
+ if ds_name not in self.dataset_infos:
+ return None
+
+ if ds_info is None:
+ ds_info = self.get_dataset_infos(ds_name)
+
+ ds_name = ds_info["name"]
+
+ data_array = self.get_transformed_dataset(ds_info)
+
+ return data_array
diff --git a/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
new file mode 100644
index 0000000000..23d199ebf6
--- /dev/null
+++ b/satpy/tests/reader_tests/test_iasi_ng_l2_nc.py
@@ -0,0 +1,640 @@
+# Copyright (c) 2022 Satpy developers
+#
+# satpy is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# satpy is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with satpy. If not, see .
+
+"""Unit tests on the IASI NG L2 reader using the conventional mock constructed context."""
+import os
+import re
+from datetime import datetime
+from unittest import mock
+
+import dask.array as da
+import numpy as np
+import pytest
+import xarray as xr
+
+from satpy import Scene
+from satpy.readers import load_reader
+from satpy.readers.iasi_ng_l2_nc import IASINGL2NCFileHandler
+from satpy.tests.reader_tests.test_netcdf_utils import FakeNetCDF4FileHandler
+
+d_lff = ("n_lines", "n_for", "n_fov")
+
+DATA_DESC = [
+ {
+ "group": "data/diagnostics",
+ "attrs": ["missing_value"],
+ "variables": {
+ "fg_cost": [d_lff, "float32", (3.4e38,)],
+ "nbr_iterations": [d_lff, "int32", (2147483647,)],
+ "rt_cost_x": [d_lff, "float32", (3.4e38,)],
+ "rt_cost_y": [d_lff, "float32", (3.4e38,)],
+ },
+ },
+ {
+ "group": "data/geolocation_information",
+ "attrs": [
+ "units",
+ "valid_min",
+ "valid_max",
+ "scale_factor",
+ "add_offset",
+ "missing_value",
+ ],
+ "variables": {
+ "onboard_utc": [
+ ("n_lines", "n_for"),
+ "float64",
+ ("seconds since 2020-01-01 00:00:00.000", -1e9, 1e9, None, None, -9e9),
+ ],
+ "sounder_pixel_latitude": [
+ d_lff,
+ "int32",
+ ("degrees_north", -18e8, 18e8, 5.0e-8, 0.0, -2147483648),
+ ],
+ "sounder_pixel_longitude": [
+ d_lff,
+ "int32",
+ ("degrees_east", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648),
+ ],
+ "sounder_pixel_sun_azimuth": [
+ d_lff,
+ "int32",
+ ("degrees", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648),
+ ],
+ "sounder_pixel_sun_zenith": [
+ d_lff,
+ "int32",
+ ("degrees", -18e8, 18e8, 5.0e-8, 90.0, -2147483648),
+ ],
+ "sounder_pixel_azimuth": [
+ d_lff,
+ "int32",
+ ("degrees", -18.432e8, 18.432e8, 9.765625e-8, 0.0, -2147483648),
+ ],
+ "sounder_pixel_zenith": [
+ d_lff,
+ "int32",
+ ("degrees", -18e8, 18e8, 2.5e-8, 45.0, -2147483648),
+ ],
+ },
+ },
+ {
+ "group": "data/statistical_retrieval",
+ "attrs": ["units", "valid_min", "valid_max", "missing_value"],
+ "variables": {
+ "air_temperature": [d_lff, "float32", ("K", 100.0, 400.0, 3.4e38)],
+ "atmosphere_mass_content_of_water": [
+ d_lff,
+ "float32",
+ ("kg.m-2", 0, 300, 3.4e38),
+ ],
+ "qi_air_temperature": [d_lff, "float32", ("", 0, 25, 3.4e38)],
+ "qi_specific_humidity": [d_lff, "float32", ("", 0, 25, 3.4e38)],
+ "specific_humidity": [d_lff, "float32", ("kg/kg", 0, 1, 3.4e38)],
+ "surface_air_temperature": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
+ "surface_specific_humidity": [d_lff, "float32", ("K", 100, 400, 3.4e38)],
+ },
+ },
+ {
+ "group": "data/surface_info",
+ "attrs": ["units", "valid_min", "valid_max", "missing_value"],
+ "variables": {
+ "height": [d_lff, "float32", ("m", -418, 8848, 3.4e38)],
+ "height_std": [d_lff, "float32", ("m", 0, 999.0, 3.4e38)],
+ "ice_fraction": [d_lff, "float32", ("%", 0, 100.0, 3.4e38)],
+ "land_fraction": [d_lff, "float32", ("%", 0, 100.0, 3.4e38)],
+ },
+ },
+]
+
+
+class FakeIASINGFileHandlerBase(FakeNetCDF4FileHandler):
+ """Fake base class for IASI NG handler."""
+
+ chunks = (10, 10, 10)
+
+ def inject_missing_value(self, dask_array, attribs):
+ """Inject missing value in data array.
+
+ This method is used to randomly set a few elements of the in the
+ input array to the configured missing value read from the attribs
+ dict. This operation is done lazily using the dask map_blocks
+ function.
+ """
+ missing_value = attribs["missing_value"]
+
+ missing_ratio = 0.05
+
+ def set_missing_values(block):
+ block_shape = block.shape
+
+ block_size = np.prod(block_shape)
+ block_num_to_replace = int(block_size * missing_ratio)
+
+ rng = np.random.default_rng()
+
+ flat_indices = rng.choice(block_size, block_num_to_replace, replace=False)
+ unraveled_indices = np.unravel_index(flat_indices, block_shape)
+ block[unraveled_indices] = missing_value
+
+ return block
+
+ dask_array = dask_array.map_blocks(set_missing_values, dtype=dask_array.dtype)
+
+ return dask_array
+
+ def generate_dask_array(self, dims, dtype, attribs):
+ """Generate some random dask array of the given dtype."""
+ shape = [self.dims[k] for k in dims]
+ chunks = [10] * len(dims)
+
+ if dtype == "int32":
+ rand_min = -2147483647
+ rand_max = 2147483647
+ dask_array = da.random.randint(
+ rand_min, rand_max, size=shape, chunks=chunks, dtype=np.int32
+ )
+ elif dtype == "float64" or dtype == "float32":
+ dask_array = da.random.random(shape, chunks=chunks)
+
+ if dtype == "float32":
+ dask_array = dask_array.astype(np.float32)
+
+ vmin = attribs.get("valid_min", -1e16)
+ vmax = attribs.get("valid_max", 1e16)
+ rand_min = vmin - (vmax - vmin) * 0.1
+ rand_max = vmax + (vmax - vmin) * 0.1
+
+ dask_array = dask_array * (rand_max - rand_min) + rand_min
+ else:
+ raise ValueError(f"Unsupported data type: {dtype}")
+
+ return dask_array
+
+ def add_rand_data(self, desc):
+ """Build a random DataArray from a given description."""
+ dtype = desc.get("data_type", "int32")
+ dims = desc["dims"]
+ attribs = desc["attribs"]
+ key = desc["key"]
+
+ dask_array = self.generate_dask_array(dims, dtype, attribs)
+
+ if "missing_value" in attribs:
+ dask_array = self.inject_missing_value(dask_array, attribs)
+
+ data_array = xr.DataArray(dask_array, dims=dims)
+ data_array.attrs.update(attribs)
+
+ self.content[key] = data_array
+ self.content[key + "/shape"] = data_array.shape
+ self.content[key + "/dtype"] = dask_array.dtype
+ self.content[key + "/dimensions"] = data_array.dims
+
+ for aname, val in attribs.items():
+ self.content[key + "/attr/" + aname] = val
+
+ def get_valid_attribs(self, anames, alist):
+ """Retrieve only the valid attributes from a list."""
+ attribs = {}
+ for idx, val in enumerate(alist):
+ if val is not None:
+ attribs[anames[idx]] = val
+
+ return attribs
+
+ def get_test_content(self, _filename, _filename_info, _filetype_info):
+ """Get the content of the test data.
+
+ Here we generate the default content we want to provide depending
+ on the provided filename infos.
+ """
+ n_lines = 10
+ n_for = 14
+ n_fov = 16
+ self.dims = {"n_lines": n_lines, "n_for": n_for, "n_fov": n_fov}
+
+ self.content = {}
+
+ for grp_desc in DATA_DESC:
+ prefix = grp_desc["group"]
+ anames = grp_desc["attrs"]
+ for vname, vdesc in grp_desc["variables"].items():
+ attribs = self.get_valid_attribs(anames, vdesc[2])
+
+ desc = {
+ "key": f"{prefix}/{vname}",
+ "dims": vdesc[0],
+ "data_type": vdesc[1],
+ "attribs": attribs,
+ }
+
+ self.add_rand_data(desc)
+
+ for key, val in self.dims.items():
+ self.content[f"data/dimension/{key}"] = val
+
+ return self.content
+
+
+class TestIASINGL2NCReader:
+ """Main test class for the IASI NG L2 reader."""
+
+ reader_name = "iasi_ng_l2_nc"
+
+ file_prefix = "W_XX-EUMETSAT-Darmstadt,SAT,SGA1-IAS-02"
+ file_suffix = "C_EUMT_20170616120000_G_V_20070912084329_20070912084600_O_N____.nc"
+ twv_filename = f"{file_prefix}-TWV_{file_suffix}"
+
+ def setup_method(self):
+ """Setup the reade config."""
+ from satpy._config import config_search_paths
+
+ self.reader_configs = config_search_paths(
+ os.path.join("readers", self.reader_name + ".yaml")
+ )
+
+ @pytest.fixture(autouse=True, scope="class")
+ def fake_handler(self):
+ """Wrap NetCDF4 FileHandler with our own fake handler."""
+ patch_ctx = mock.patch.object(
+ IASINGL2NCFileHandler, "__bases__", (FakeIASINGFileHandlerBase,)
+ )
+
+ with patch_ctx:
+ patch_ctx.is_local = True
+ yield patch_ctx
+
+ @pytest.fixture
+ def twv_handler(self):
+ """Create a simple (and fake) default handler on a TWV product."""
+ return self._create_file_handler(self.twv_filename)
+
+ @pytest.fixture
+ def twv_scene(self):
+ """Create a simple (and fake) satpy scene on a TWV product."""
+ return Scene(filenames=[self.twv_filename], reader=self.reader_name)
+
+ def _create_file_handler(self, filename):
+ """Create an handler for the given file checking that it can be parsed correctly."""
+ reader = load_reader(self.reader_configs)
+
+ files = reader.select_files_from_pathnames([filename])
+ assert len(files) == 1, "File should be recognized by the reader"
+
+ reader.create_filehandlers(files)
+
+ assert len(reader.file_handlers) == 1
+
+ assert self.reader_name in reader.file_handlers
+
+ handlers = reader.file_handlers[self.reader_name]
+
+ assert len(handlers) == 1
+ assert isinstance(handlers[0], IASINGL2NCFileHandler)
+
+ return handlers[0]
+
+ def test_filename_matching(self):
+ """Test filename matching against some random name."""
+ prefix = "W_fr-meteo-sat,GRAL,MTI1-IASING-2"
+ suffix = (
+ "C_EUMS_20220101120000_LEO_O_D_20220101115425_20220101115728_____W______.nc"
+ )
+ filename = f"{prefix}-l2p_{suffix}"
+
+ self._create_file_handler(filename)
+
+ def test_real_filename_matching(self):
+ """Test that we will match an actual IASI NG L2 product file name."""
+ supported_types = ["TWV", "CLD", "GHG", "SFC", "O3_", "CO_"]
+
+ for ptype in supported_types:
+ filename = f"{self.file_prefix}-{ptype}_{self.file_suffix}"
+ handler = self._create_file_handler(filename)
+
+ assert handler.filename_info["oflag"] == "C"
+ assert handler.filename_info["originator"] == "EUMT"
+ assert handler.filename_info["product_type"] == ptype
+
+ def test_sensing_times(self, twv_handler):
+ """Test that we read the sensing start/end times correctly from filename."""
+ assert twv_handler.start_time == datetime(2007, 9, 12, 8, 43, 29)
+ assert twv_handler.end_time == datetime(2007, 9, 12, 8, 46, 0)
+
+ def test_sensor_names(self, twv_handler):
+ """Test that the handler reports iasi_ng as sensor."""
+ assert twv_handler.sensor_names == {"iasi_ng"}
+
+ def test_available_datasets(self, twv_scene):
+ """Test the list of available datasets in scene."""
+ dnames = twv_scene.available_dataset_names()
+
+ expected_names = [
+ "onboard_utc",
+ "sounder_pixel_latitude",
+ "sounder_pixel_longitude",
+ "sounder_pixel_azimuth",
+ "sounder_pixel_zenith",
+ "sounder_pixel_sun_azimuth",
+ "sounder_pixel_sun_zenith",
+ "air_temperature",
+ "atmosphere_mass_content_of_water",
+ "qi_air_temperature",
+ "qi_specific_humidity",
+ "specific_humidity",
+ "surface_air_temperature",
+ "surface_specific_humidity",
+ ]
+
+ for dname in expected_names:
+ assert dname in dnames
+
+ def test_latlon_datasets(self, twv_scene):
+ """Test loading the latitude/longitude dataset."""
+ twv_scene.load(["sounder_pixel_latitude", "sounder_pixel_longitude"])
+ lat = twv_scene["sounder_pixel_latitude"]
+ lon = twv_scene["sounder_pixel_longitude"]
+
+ assert len(lat.dims) == 2
+ assert len(lon.dims) == 2
+ assert lat.dims[0] == "x"
+ assert lat.dims[1] == "y"
+ assert lon.dims[0] == "x"
+ assert lon.dims[1] == "y"
+
+ assert lat.dtype == np.float64
+ assert lon.dtype == np.float64
+
+ assert np.nanmin(lat) >= -90.0
+ assert np.nanmax(lat) <= 90.0
+
+ assert np.nanmin(lon) >= -180.0
+ assert np.nanmax(lon) <= 180.0
+
+ def test_onboard_utc_dataset(self, twv_scene):
+ """Test loading the onboard_utc dataset."""
+ twv_scene.load(["onboard_utc", "sounder_pixel_latitude"])
+ dset = twv_scene["onboard_utc"]
+
+ assert len(dset.dims) == 2
+ assert dset.dims[0] == "x"
+ assert dset.dims[1] == "y"
+
+ assert dset.dtype == np.dtype("datetime64[ns]")
+
+ lat = twv_scene["sounder_pixel_latitude"]
+
+ assert lat.shape == dset.shape
+
+ def test_nbr_iterations_dataset(self, twv_scene):
+ """Test loading the nbr_iterations dataset."""
+ twv_scene.load(["nbr_iterations"])
+ dset = twv_scene["nbr_iterations"]
+
+ assert len(dset.dims) == 2
+ assert dset.dtype == np.int32
+
+ def test_register_dataset(self, twv_handler):
+ """Test the register_dataset method."""
+ twv_handler.dataset_infos = {}
+ twv_handler.register_dataset("test_dataset", {"attr": "value"})
+ assert "test_dataset" in twv_handler.dataset_infos
+ assert twv_handler.dataset_infos["test_dataset"]["attr"] == "value"
+
+ with pytest.raises(KeyError):
+ twv_handler.register_dataset("test_dataset", {"attr": "new_value"})
+
+ def test_process_dimension(self, twv_handler):
+ """Test the process_dimension method."""
+ twv_handler.dimensions_desc = {}
+ twv_handler.process_dimension("group/dimension/test_dim", 10)
+ assert twv_handler.dimensions_desc["test_dim"] == 10
+
+ with pytest.raises(KeyError):
+ twv_handler.process_dimension("group/dimension/test_dim", 20)
+
+ def test_process_attribute(self, twv_handler):
+ """Test the process_attribute method."""
+ twv_handler.variable_desc = {"test_var": {"attribs": {}}}
+ twv_handler.process_attribute("test_var/attr/test_attr", "value")
+ assert twv_handler.variable_desc["test_var"]["attribs"]["test_attr"] == "value"
+
+ def test_process_variable(self, twv_handler):
+ """Test the process_variable method."""
+ twv_handler.file_content = {
+ "group/test_var/shape": (10, 10),
+ "group/test_var/dimensions": ("x", "y"),
+ "group/test_var/dtype": "float32",
+ }
+ twv_handler.ignored_patterns = []
+ twv_handler.process_variable("group/test_var")
+ assert "group/test_var" in twv_handler.variable_desc
+
+ def test_ignore_scalar_variable(self, twv_handler):
+ """Test ignoring of scalar variable."""
+ twv_handler.file_content = {
+ "group/test_var/shape": (1, 1),
+ "group/test_var/dimensions": ("x", "y"),
+ "group/test_var/dtype": "float32",
+ }
+ twv_handler.ignored_patterns = []
+ twv_handler.process_variable("group/test_var")
+ assert "group/test_var" not in twv_handler.variable_desc
+
+ def test_ignore_pattern_variable(self, twv_handler):
+ """Test ignoring of pattern in variable."""
+ twv_handler.file_content = {
+ "group/test_var/shape": (10, 10),
+ "group/test_var/dimensions": ("x", "y"),
+ "group/test_var/dtype": "float32",
+ }
+ twv_handler.ignored_patterns = [re.compile(r"test_")]
+ twv_handler.process_variable("group/test_var")
+ assert "group/test_var" not in twv_handler.variable_desc
+
+ def test_parse_file_content(self, twv_handler):
+ """Test the parse_file_content method."""
+ twv_handler.file_content = {
+ "dim/dimension/test_dim": 10,
+ "var/attr/test_attr": "value",
+ "grp/test_var": "my_data",
+ "grp/test_var/shape": (10, 10),
+ "grp/test_var/dimensions": ("x", "y"),
+ "grp/test_var/dtype": "float32",
+ }
+
+ twv_handler.parse_file_content()
+ assert "test_dim" in twv_handler.dimensions_desc
+ assert "grp/test_var" in twv_handler.variable_desc
+
+ def test_register_available_datasets(self, twv_handler):
+ """Test the register_available_datasets method."""
+ twv_handler.dataset_infos = None
+ twv_handler.variable_desc = {
+ "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}}
+ }
+ twv_handler.dataset_aliases = {}
+ twv_handler.register_available_datasets()
+ assert "test_var" in twv_handler.dataset_infos
+
+ def test_ignored_register_available_datasets(self, twv_handler):
+ """Test ignoring register_available_datasets method if done already."""
+ twv_handler.variable_desc = {
+ "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}}
+ }
+ twv_handler.dataset_aliases = {}
+ twv_handler.register_available_datasets()
+ assert "test_var" not in twv_handler.dataset_infos
+
+ def test_register_available_datasets_alias(self, twv_handler):
+ """Test the register_available_datasets method with alias."""
+ twv_handler.dataset_infos = None
+ twv_handler.variable_desc = {
+ "var/test_var": {"var_name": "test_var", "attribs": {"units": "test_unit"}}
+ }
+ twv_handler.dataset_aliases = {re.compile(r"var/(.+)$"): "${VAR_NAME}_oem"}
+ twv_handler.register_available_datasets()
+ assert "test_var_oem" in twv_handler.dataset_infos
+
+ def test_get_dataset_infos(self, twv_handler):
+ """Test the get_dataset_infos method."""
+ twv_handler.dataset_infos = {"test_dataset": {"attr": "value"}}
+ assert twv_handler.get_dataset_infos("test_dataset") == {"attr": "value"}
+
+ with pytest.raises(KeyError):
+ twv_handler.get_dataset_infos("non_existent_dataset")
+
+ def test_variable_path_exists(self, twv_handler):
+ """Test the variable_path_exists method."""
+ twv_handler.file_content = {"test_var": mock.MagicMock()}
+ assert twv_handler.variable_path_exists("test_var")
+ assert not twv_handler.variable_path_exists("/attr/test_var")
+ assert not twv_handler.variable_path_exists("test_var/dtype")
+ assert not twv_handler.variable_path_exists("/grp/a_non_existing_var")
+
+ def test_convert_data_type(self, twv_handler):
+ """Test the convert_data_type method."""
+ data = xr.DataArray(np.array([1, 2, 3], dtype=np.float32))
+ result = twv_handler.convert_data_type(data, dtype="float64")
+ assert result.dtype == np.float64
+
+ data.attrs["scale_factor"] = 2.0
+ result = twv_handler.convert_data_type(data, dtype="auto")
+ assert result.dtype == np.float32
+
+ def test_apply_fill_value(self, twv_handler):
+ """Test the apply_fill_value method."""
+ data = xr.DataArray(
+ np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
+ attrs={"valid_min": 2.0, "valid_max": 4.0},
+ )
+ result = twv_handler.apply_fill_value(data)
+ assert np.isnan(result[0])
+ assert np.isnan(result[4])
+
+ def test_apply_fill_value_range(self, twv_handler):
+ """Test the apply_fill_value method with range."""
+ data = xr.DataArray(
+ np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
+ attrs={"valid_range": [2.0, 4.0]},
+ )
+ result = twv_handler.apply_fill_value(data)
+ assert np.isnan(result[0])
+ assert np.isfinite(result[1])
+ assert np.isnan(result[4])
+
+ def test_apply_rescaling(self, twv_handler):
+ """Test the apply_rescaling method."""
+ data = xr.DataArray(
+ np.array([1, 2, 3]), attrs={"scale_factor": 2, "add_offset": 1}
+ )
+ result = twv_handler.apply_rescaling(data)
+ np.testing.assert_array_equal(result, [3, 5, 7])
+
+ def test_apply_reshaping(self, twv_handler):
+ """Test the apply_reshaping method."""
+ data = xr.DataArray(np.ones((2, 3, 4)), dims=("a", "b", "c"))
+ result = twv_handler.apply_reshaping(data)
+ assert result.dims == ("x", "y")
+ assert result.shape == (2, 12)
+
+ def test_convert_to_datetime(self, twv_handler):
+ """Test the convert_to_datetime method."""
+ data = xr.DataArray(np.array([0, 86400]), dims=("time",))
+ ds_info = {"seconds_since_epoch": "2000-01-01 00:00:00"}
+ result = twv_handler.convert_to_datetime(data, ds_info)
+ assert result[0].values == np.datetime64("2000-01-01T00:00:00")
+ assert result[1].values == np.datetime64("2000-01-02T00:00:00")
+
+ def test_apply_broadcast(self, twv_handler):
+ """Test the apply_broadcast method."""
+ data = xr.DataArray(np.array([1, 2, 3]), dims=("x",))
+ twv_handler.dimensions_desc = {"n_fov": 2}
+ ds_info = {"broadcast_on_dim": "n_fov"}
+ result = twv_handler.apply_broadcast(data, ds_info)
+ assert result.shape == (6,)
+
+ def test_apply_broadcast_failure(self, twv_handler):
+ """Test the apply_broadcast method fails on missing dim."""
+ data = xr.DataArray(np.array([1, 2, 3]), dims=("x",))
+ twv_handler.dimensions_desc = {}
+ ds_info = {"broadcast_on_dim": "n_fov"}
+
+ with pytest.raises(KeyError):
+ twv_handler.apply_broadcast(data, ds_info)
+
+ def test_get_transformed_dataset(self, twv_handler):
+ """Test the get_transformed_dataset method."""
+ ds_info = {
+ "location": "test_var",
+ "seconds_since_epoch": "2000-01-01 00:00:00",
+ "broadcast_on_dim": "n_fov",
+ }
+ twv_handler.variable_path_exists = mock.MagicMock(return_value=True)
+ twv_handler.file_content["test_var"] = xr.DataArray(
+ np.array([[0, 86400]]), dims=("x", "y")
+ )
+
+ twv_handler.dimensions_desc = {"n_fov": 2}
+
+ result = twv_handler.get_transformed_dataset(ds_info)
+ assert result.shape == (1, 4)
+ assert result.dtype == "datetime64[ns]"
+
+ def test_get_transformed_dataset_failure(self, twv_handler):
+ """Test the get_transformed_dataset method fails on invalid path."""
+ ds_info = {
+ "location": "test_var",
+ "seconds_since_epoch": "2000-01-01 00:00:00",
+ "broadcast_on_dim": "n_fov",
+ }
+
+ with pytest.raises(KeyError):
+ twv_handler.get_transformed_dataset(ds_info)
+
+ def test_get_dataset(self, twv_handler):
+ """Test the get_dataset method."""
+ twv_handler.dataset_infos = {
+ "test_dataset": {"name": "test_dataset", "location": "test_var"}
+ }
+ twv_handler.get_transformed_dataset = mock.MagicMock(
+ return_value=xr.DataArray([1, 2, 3])
+ )
+
+ result = twv_handler.get_dataset({"name": "test_dataset"})
+ assert result.equals(xr.DataArray([1, 2, 3]))
+
+ assert twv_handler.get_dataset({"name": "non_existent_dataset"}) is None