From 8861320d1339f32a19ecdb46d61defc54ca71d65 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 18 Oct 2024 08:51:37 +0200 Subject: [PATCH 1/3] Refactor telemetry --- pygac/calibration/noaa.py | 6 +-- pygac/gac_klm.py | 4 +- pygac/klm_reader.py | 30 +++++------- pygac/lac_klm.py | 4 +- pygac/pod_reader.py | 59 +++++++++++++++-------- pygac/reader.py | 99 +++++++-------------------------------- 6 files changed, 77 insertions(+), 125 deletions(-) diff --git a/pygac/calibration/noaa.py b/pygac/calibration/noaa.py index 4ca20d1..e707ca4 100644 --- a/pygac/calibration/noaa.py +++ b/pygac/calibration/noaa.py @@ -79,9 +79,9 @@ def calibrate(ds, custom_coeffs=None, coeffs_file=None): corr ) - prt = ds["prt_counts"].data - ict = ds["ict_counts"].data - space = ds["space_counts"].data + prt = ds["PRT"].mean(dim="PRT_measurement").data + ict = ds["ICT"].mean(dim="back_scan").data[:, -3:] + space = ds["space_counts"].mean(dim="back_scan").data[:, -3:] ir_channels_to_calibrate = [3, 4, 5] diff --git a/pygac/gac_klm.py b/pygac/gac_klm.py index 850987d..434be08 100644 --- a/pygac/gac_klm.py +++ b/pygac/gac_klm.py @@ -138,8 +138,8 @@ ("PRT", ">u2", (3, )), ("ch3_patch_temp", ">u2"), ("spare", ">u2"), ]), - ("back_scan", ">u2", (30, )), - ("space_data", ">u2", (50, )), + ("back_scan", ">u2", (10, 3)), + ("space_data", ">u2", (10, 5)), ("sync_delta", ">u2"), ("zero_fill5", ">i2"), # AVHRR SENSOR DATA diff --git a/pygac/klm_reader.py b/pygac/klm_reader.py index 799056f..673d105 100644 --- a/pygac/klm_reader.py +++ b/pygac/klm_reader.py @@ -36,14 +36,10 @@ import datetime import logging - -try: - from enum import IntFlag -except ImportError: - # python version < 3.6, use a simple object without nice representation - IntFlag = object +from enum import IntFlag import numpy as np +import xarray as xr from pygac.correct_tsm_issue import TSM_AFFECTED_INTERVALS_KLM, get_tsm_idx from pygac.reader import Reader, ReaderError @@ -742,23 +738,21 @@ def get_telemetry(self): space_counts: np.array """ - prt_counts = np.mean(self.scans["telemetry"]["PRT"], axis=1) + hmf_array = self.scans + prt_counts = xr.DataArray(hmf_array["telemetry"]["PRT"], + dims=["scan_line_index", "PRT_measurement"]) # getting ICT counts - - ict_counts = np.zeros((len(self.scans), 3)) - ict_counts[:, 0] = np.mean(self.scans["back_scan"][:, 0::3], axis=1) - ict_counts[:, 1] = np.mean(self.scans["back_scan"][:, 1::3], axis=1) - ict_counts[:, 2] = np.mean(self.scans["back_scan"][:, 2::3], axis=1) + ict_counts = xr.DataArray(hmf_array["back_scan"], + dims=["scan_line_index", "back_scan", "channel_name"], + coords=dict(channel_name=["3b", "4", "5"])) # getting space counts + space_counts = xr.DataArray(self.split_array_along_channel_3(hmf_array["space_data"]), + dims=["scan_line_index", "back_scan", "channel_name"], + coords=dict(channel_name=["1", "2", "3a", "3b", "4", "5"])) - space_counts = np.zeros((len(self.scans), 3)) - space_counts[:, 0] = np.mean(self.scans["space_data"][:, 2::5], axis=1) - space_counts[:, 1] = np.mean(self.scans["space_data"][:, 3::5], axis=1) - space_counts[:, 2] = np.mean(self.scans["space_data"][:, 4::5], axis=1) - - return prt_counts, ict_counts, space_counts + return xr.Dataset(dict(PRT=prt_counts, ICT=ict_counts, space_counts=space_counts)) def _get_lonlat_from_file(self): """Get the longitudes and latitudes.""" diff --git a/pygac/lac_klm.py b/pygac/lac_klm.py index 2d17df0..eca019a 100644 --- a/pygac/lac_klm.py +++ b/pygac/lac_klm.py @@ -137,8 +137,8 @@ ("PRT", ">u2", (3, )), ("ch3_patch_temp", ">u2"), ("spare", ">u2"), ]), - ("back_scan", ">u2", (30, )), - ("space_data", ">u2", (50, )), + ("back_scan", ">u2", (10, 3)), + ("space_data", ">u2", (10, 5)), ("sync_delta", ">u2"), ("zero_fill4", ">i2"), # EARTH OBSERVATIONS diff --git a/pygac/pod_reader.py b/pygac/pod_reader.py index 0990e09..316395b 100644 --- a/pygac/pod_reader.py +++ b/pygac/pod_reader.py @@ -46,6 +46,7 @@ IntFlag = object import numpy as np +import xarray as xr from pyorbital.geoloc import compute_pixels, get_lonlatalt from pyorbital.geoloc_instrument_definitions import avhrr_gac @@ -558,29 +559,49 @@ def get_telemetry(self): space_counts: np.array """ - number_of_scans = self.scans["telemetry"].shape[0] - decode_tele = np.zeros((int(number_of_scans), 105)) - decode_tele[:, ::3] = (self.scans["telemetry"] >> 20) & 1023 - decode_tele[:, 1::3] = (self.scans["telemetry"] >> 10) & 1023 - decode_tele[:, 2::3] = self.scans["telemetry"] & 1023 - - prt_counts = np.mean(decode_tele[:, 17:20], axis=1) + telemetry = self.scans["telemetry"] + decode_tele = self.unpack_telemetry(telemetry) + + hmf_dtype = np.dtype([("frame_sync", "u2", (6, )), + ("id", [("id", "u2"), + ("spare", "u2")]), + ("timecode", "u2", (4, )), + ("telemetry", [("ramp_calibration", "u2", (5, )), + ("PRT", "u2", (3, )), + ("ch3_patch_temp", "u2"), + ("spare", "u2"), ]), + ("back_scan", "u2", (10, 3)), + ("space_data", "u2", (10, 5)), + ("sync", "u2")]) + + hmf_array = decode_tele[:, :103].view(dtype=hmf_dtype).squeeze() + prt_counts = xr.DataArray(hmf_array["telemetry"]["PRT"], + dims=["scan_line_index", "PRT_measurement"]) # getting ICT counts - - ict_counts = np.zeros((int(number_of_scans), 3)) - ict_counts[:, 0] = np.mean(decode_tele[:, 22:50:3], axis=1) - ict_counts[:, 1] = np.mean(decode_tele[:, 23:51:3], axis=1) - ict_counts[:, 2] = np.mean(decode_tele[:, 24:52:3], axis=1) + ict_counts = xr.DataArray(hmf_array["back_scan"], + dims=["scan_line_index", "back_scan", "channel_name"], + coords=dict(channel_name=["3", "4", "5"])) # getting space counts - - space_counts = np.zeros((int(number_of_scans), 3)) - space_counts[:, 0] = np.mean(decode_tele[:, 54:100:5], axis=1) - space_counts[:, 1] = np.mean(decode_tele[:, 55:101:5], axis=1) - space_counts[:, 2] = np.mean(decode_tele[:, 56:102:5], axis=1) - - return prt_counts, ict_counts, space_counts + space_counts = xr.DataArray(hmf_array["space_data"], + dims=["scan_line_index", "back_scan", "channel_name"], + coords=dict(channel_name=["1", "2", "3", "4", "5"])) + + return xr.Dataset(dict(PRT=prt_counts, ICT=ict_counts, space_counts=space_counts)) + + def unpack_telemetry(self, telemetry): + """Unpack the telemetry from 10 to 16 bits.""" + # Documentation (POD guide, section 3) says: + # "The telemetry data are stored in 140 bytes. The first 103 (10 bit) words are packed three (10 bit) words in + # four bytes, right justified. The last four byte group contains one (10 bit) word with 20 trailing bits. All + # unused bits are set to zero." + number_of_scans = telemetry.shape[0] + decode_tele = np.zeros((int(number_of_scans), 105), dtype=np.uint16) + decode_tele[:, ::3] = (telemetry >> 20) & 1023 + decode_tele[:, 1::3] = (telemetry >> 10) & 1023 + decode_tele[:, 2::3] = telemetry & 1023 + return decode_tele @staticmethod def _get_ir_channels_to_calibrate(): diff --git a/pygac/reader.py b/pygac/reader.py index 1883727..c3a5798 100644 --- a/pygac/reader.py +++ b/pygac/reader.py @@ -420,69 +420,20 @@ def get_counts(self): counts[:, 1::3] = ((packed_data >> 10) & 1023)[:, :nb2] counts[:, 2::3] = (packed_data & 1023)[:, :nb3] counts = counts.reshape((-1, self.scan_width, 5)) + return self.split_array_along_channel_3(counts) + + def split_array_along_channel_3(self, array_to_split): try: switch = self.get_ch3_switch() except AttributeError: - return counts - else: - channels = np.zeros((len(self.scans), self.scan_width, 6), - dtype=counts.dtype) - channels[:, :, :2] = counts[:, :, :2] - channels[:, :, -2:] = counts[:, :, -2:] - channels[:, :, 2][switch == 1] = counts[:, :, 2][switch == 1] - channels[:, :, 3][switch == 0] = counts[:, :, 2][switch == 0] - return channels - - @abstractmethod - def _get_times_from_file(self): # pragma: no cover - """Specify how to read scanline timestamps from data. - - Returns: - int: year - int: day of year - int: milliseconds since 00:00 - - """ - raise NotImplementedError - - def get_times(self): - """Read scanline timestamps and try to correct invalid values. - - Returns: - UTC timestamps - - """ - if self._times_as_np_datetime64 is None: - # Read timestamps - year, jday, msec = self._get_times_from_file() - - # Correct invalid values - year, jday, msec = self.correct_times_median(year=year, jday=jday, - msec=msec) - self._times_as_np_datetime64 = self.to_datetime64(year=year, jday=jday, msec=msec) - try: - self._times_as_np_datetime64 = self.correct_times_thresh() - except TimestampMismatch as err: - LOG.error(str(err)) - - return self._times_as_np_datetime64 - - @staticmethod - def to_datetime64(year, jday, msec): - """Convert timestamps to numpy.datetime64. - - Args: - year: Year - jday: Day of the year (1-based) - msec: Milliseconds since 00:00 - - Returns: - numpy.datetime64: Converted timestamps - - """ - return (year.astype(str).astype("datetime64[Y]") - + (jday - 1).astype("timedelta64[D]") - + msec.astype("timedelta64[ms]")) + return array_to_split + vis = array_to_split[:, :, :2] + tir = array_to_split[:, :, -2:] + nir = np.zeros([*array_to_split.shape[:2], 2], dtype=array_to_split.dtype) + nir[:, :, 0][switch == 1] = array_to_split[:, :, 2][switch == 1] + nir[:, :, 1][switch == 0] = array_to_split[:, :, 2][switch == 0] + split_array = np.dstack([vis, nir, tir]) + return split_array @staticmethod def to_datetime(datetime64): @@ -552,10 +503,8 @@ def create_counts_dataset(self): if counts.shape[-1] == 5: channel_names = ["1", "2", "3", "4", "5"] - ir_channel_names = ["3", "4", "5"] else: channel_names = ["1", "2", "3a", "3b", "4", "5"] - ir_channel_names = ["3b", "4", "5"] columns = np.arange(scan_size) channels = xr.DataArray(counts, dims=["scan_line_index", "columns", "channel_name"], @@ -564,20 +513,19 @@ def create_counts_dataset(self): columns=columns, times=("scan_line_index", times))) - prt, ict, space = self._get_telemetry_dataarrays(line_numbers, ir_channel_names) + ds = self.get_telemetry() longitudes, latitudes = self._get_lonlat_dataarrays(line_numbers, columns) if self.interpolate_coords: channels = channels.assign_coords(longitude=(("scan_line_index", "columns"), - longitudes.reindex_like(channels).data), - latitude=(("scan_line_index", "columns"), + longitudes.reindex_like(channels).data), + latitude=(("scan_line_index", "columns"), latitudes.reindex_like(channels).data)) - - ds = xr.Dataset(dict(channels=channels, prt_counts=prt, ict_counts=ict, space_counts=space, - longitude=longitudes, latitude=latitudes), - attrs=head) - + ds["channels"] = channels + ds["longitude"] = longitudes + ds["latitude"] = latitudes + ds.attrs.update(head) ds.attrs["spacecraft_name"] = self.spacecraft_name self._update_meta_data_object(ds.attrs) return ds @@ -604,17 +552,6 @@ def _get_lonlat_dataarrays(self, line_numbers, columns): return longitudes,latitudes - def _get_telemetry_dataarrays(self, line_numbers, ir_channel_names): - prt, ict, space = self.get_telemetry() - - prt = xr.DataArray(prt, dims=["scan_line_index"], coords=dict(scan_line_index=line_numbers)) - ict = xr.DataArray(ict, dims=["scan_line_index", "ir_channel_name"], - coords=dict(ir_channel_name=ir_channel_names, scan_line_index=line_numbers)) - space = xr.DataArray(space, dims=["scan_line_index", "ir_channel_name"], - coords=dict(ir_channel_name=ir_channel_names, scan_line_index=line_numbers)) - - return prt,ict,space - def get_calibrated_channels(self): """Calibrate and return the channels.""" calibrated_ds = self.get_calibrated_dataset() From 91c42ff1d24e2cae9f0bc9f83ddae96d513bea52 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 18 Oct 2024 08:56:20 +0200 Subject: [PATCH 2/3] Style refactor --- pygac/klm_reader.py | 4 +-- pygac/pod_reader.py | 10 ++++--- pygac/reader.py | 60 ++++++++++++++++++++++++++++++++++++-- pygac/tests/test_reader.py | 2 +- 4 files changed, 66 insertions(+), 10 deletions(-) diff --git a/pygac/klm_reader.py b/pygac/klm_reader.py index 673d105..44f4c84 100644 --- a/pygac/klm_reader.py +++ b/pygac/klm_reader.py @@ -658,9 +658,9 @@ def read(self, filename, fileobj=None): self.filename = filename LOG.info("Reading %s", self.filename) with file_opener(fileobj or filename) as fd_: - self.ars_head, self.head = self.read_header( + self.archive_header, self.head = self.read_header( filename, fileobj=fd_) - if self.ars_head: + if self.archive_header: ars_offset = ars_header.itemsize else: ars_offset = 0 diff --git a/pygac/pod_reader.py b/pygac/pod_reader.py index 316395b..ef2062e 100644 --- a/pygac/pod_reader.py +++ b/pygac/pod_reader.py @@ -279,9 +279,9 @@ def read(self, filename, fileobj=None): LOG.info("Reading %s", self.filename) # choose the right header depending on the date with file_opener(fileobj or filename) as fd_: - self.tbm_head, self.head = self.read_header( + self.archive_header, self.head = self.read_header( filename, fileobj=fd_, header_date=self.header_date) - if self.tbm_head: + if self.archive_header: tbm_offset = tbm_header.itemsize else: tbm_offset = 0 @@ -348,8 +348,10 @@ def _validate_tbm_header(cls, potential_tbm_header): return potential_tbm_header.copy() # This will raise a DecodingError if the data_set_name is not valid. - cls._decode_data_set_name(data_set_name) - return potential_tbm_header.copy() + data_set_name = cls._decode_data_set_name(data_set_name) + tbm_header = potential_tbm_header.copy() + tbm_header["data_set_name"] = data_set_name.decode().strip("\x80").encode() + return tbm_header @classmethod diff --git a/pygac/reader.py b/pygac/reader.py index c3a5798..a6a1ac6 100644 --- a/pygac/reader.py +++ b/pygac/reader.py @@ -856,6 +856,60 @@ def get_angles(self): return sat_azi, sat_zenith, sun_azi, sun_zenith, rel_azi + @abstractmethod + def _get_times_from_file(self): # pragma: no cover + """Specify how to read scanline timestamps from data. + + Returns: + int: year + int: day of year + int: milliseconds since 00:00 + + """ + raise NotImplementedError + + def get_times(self): + """Read scanline timestamps and try to correct invalid values. + + Returns: + UTC timestamps + + """ + if self._times_as_np_datetime64 is None: + self._times_as_np_datetime64 = self._get_valid_datetime64s_from_file() + try: + self._times_as_np_datetime64 = self.correct_corrupted_times_using_thresholding() + except TimestampMismatch as err: + LOG.error(str(err)) + + return self._times_as_np_datetime64 + + def _get_valid_datetime64s_from_file(self): + # Read timestamps + year, jday, msec = self._get_times_from_file() + + # Correct invalid values + year, jday, msec = self.correct_times_median(year=year, jday=jday, msec=msec) + times_as_np_datetime64 = self.to_datetime64(year=year, jday=jday, msec=msec) + return times_as_np_datetime64 + + @staticmethod + def to_datetime64(year, jday, msec): + """Convert timestamps to numpy.datetime64. + + Args: + year: Year + jday: Day of the year (1-based) + msec: Milliseconds since 00:00 + + Returns: + numpy.datetime64: Converted timestamps + + """ + return (year.astype(str).astype("datetime64[Y]") + + (jday - 1).astype("timedelta64[D]") + + msec.astype("timedelta64[ms]")) + def correct_times_median(self, year, jday, msec): """Replace invalid timestamps with statistical estimates (using median). @@ -985,9 +1039,9 @@ def correct_scan_line_numbers(self): "nz_diffs": nz_diffs}) return results - def correct_times_thresh(self, max_diff_from_t0_head=6*60*1000, - min_frac_near_t0_head=0.01, - max_diff_from_ideal_t=10*1000): + def correct_corrupted_times_using_thresholding(self, max_diff_from_t0_head=6*60*1000, + min_frac_near_t0_head=0.01, + max_diff_from_ideal_t=10*1000): """Correct corrupted timestamps using a threshold approach. The threshold approach is based on the scanline number and the header diff --git a/pygac/tests/test_reader.py b/pygac/tests/test_reader.py index 68dbf11..16f2534 100644 --- a/pygac/tests/test_reader.py +++ b/pygac/tests/test_reader.py @@ -618,7 +618,7 @@ def test_correct_times_thresh(self, get_header_timestamp): dtype=[("scan_line_number", ">u2")]) # Test correction - self.reader.correct_times_thresh() + self.reader.correct_corrupted_times_using_thresholding() numpy.testing.assert_array_equal(self.reader._times_as_np_datetime64, utcs_expected) def test_calculate_sun_earth_distance_correction(self): From a421bae4862df3f8535de8bfd886ac7b99ad6fc5 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 18 Oct 2024 11:59:48 +0200 Subject: [PATCH 3/3] Fix tests --- pygac/calibration/noaa.py | 4 ++-- pygac/tests/test_klm.py | 8 +++++--- pygac/tests/test_reader.py | 31 +++++++++++++++++++++++-------- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/pygac/calibration/noaa.py b/pygac/calibration/noaa.py index e707ca4..285ee69 100644 --- a/pygac/calibration/noaa.py +++ b/pygac/calibration/noaa.py @@ -525,8 +525,8 @@ def calibrate_thermal(counts, prt, ict, space, line_numbers, channel, cal): nonzeros = np.logical_not(zeros) space[zeros] = np.interp((zeros).nonzero()[0], - (nonzeros).nonzero()[0], - space[nonzeros]) + (nonzeros).nonzero()[0], + space[nonzeros]) # convolving and smoothing PRT, ICT and SPACE values if lines > 51: diff --git a/pygac/tests/test_klm.py b/pygac/tests/test_klm.py index 2074a50..fc71e95 100644 --- a/pygac/tests/test_klm.py +++ b/pygac/tests/test_klm.py @@ -181,13 +181,15 @@ def test_get_ch3_switch(self): def test_calibrate_channels(self): """Test channel calibration.""" + # Switch on 3b + self.reader.scans["scan_line_bit_field"] = 0 # ICT self.reader.scans["back_scan"] = 400 - self.reader.scans["back_scan"][0::5, :] = 0 + self.reader.scans["back_scan"][50, :, :] = 0 # Space self.reader.scans["space_data"] = 400 - self.reader.scans["space_data"][0::5, :] = 0 - + self.reader.scans["space_data"][50, :, :] = 0 + self.reader.get_calibrated_channels() assert np.any(np.isfinite(self.reader.get_calibrated_channels())) def test_calibrate_inactive_3b(self): diff --git a/pygac/tests/test_reader.py b/pygac/tests/test_reader.py index 16f2534..c1f627e 100644 --- a/pygac/tests/test_reader.py +++ b/pygac/tests/test_reader.py @@ -86,11 +86,25 @@ def get_header_timestamp(self): def get_telemetry(self): """Get the telemetry.""" - prt = 51 * np.ones(self.along_track) # prt threshold is 50 - prt[::5] = 0 - ict = 101 * np.ones((self.along_track, 3)) # ict threshold is 100 - space = 101 * np.ones((self.along_track, 3)) # space threshold is 100 - return prt, ict, space + prt = 51 * np.ones((self.along_track, 3)) # prt threshold is 50 + prt[::5, :] = 0 + ict = 101 * np.ones((self.along_track, 10, 3)) # ict threshold is 100 + space = 101 * np.ones((self.along_track, 10, 5)) # space threshold is 100 + + prt_counts = xr.DataArray(prt, + dims=["scan_line_index", "PRT_measurement"]) + + # getting ICT counts + ict_counts = xr.DataArray(ict, + dims=["scan_line_index", "back_scan", "channel_name"], + coords=dict(channel_name=["3", "4", "5"])) + + # getting space counts + space_counts = xr.DataArray(self.split_array_along_channel_3(space), + dims=["scan_line_index", "back_scan", "channel_name"], + coords=dict(channel_name=["1", "2", "3", "4", "5"])) + + return xr.Dataset(dict(PRT=prt_counts, ICT=ict_counts, space_counts=space_counts)) def _adjust_clock_drift(self): pass @@ -835,9 +849,10 @@ def test_read_to_dataset_is_a_dataset_including_channels_and_telemetry(pod_file_ assert "times" in dataset.coords assert "scan_line_index" in dataset.coords assert "channel_name" in dataset.coords - assert dataset["prt_counts"].shape == (3,) - assert dataset["ict_counts"].shape == (3, 3) - assert dataset["space_counts"].shape == (3, 3) + assert dataset["PRT"].shape == (3, 3) + assert dataset["ICT"].shape == (3, 10, 5) + np.testing.assert_array_equal(dataset["ICT"].sel(channel_name=["1", "2"]), np.nan) + assert dataset["space_counts"].shape == (3, 10, 5) def test_read_to_dataset_without_interpolation(pod_file_with_tbm_header, pod_tle):