From cf6ee7ae933d76ef471de1fd95ced0ca66edbc81 Mon Sep 17 00:00:00 2001 From: Zach Sherman Date: Fri, 29 Sep 2023 08:05:12 -0500 Subject: [PATCH] FIX: IO Warning fixes. (#719) * FIX: Fixes for some warnings. * FIX: Fix for more warnings. * MNT: Remove final warnings. * FIX: Add missing dtype conversion. * FIX: Fix for datatype. * MNT: Revert cftime call. --- act/io/armfiles.py | 14 ++++++++------ act/io/noaagml.py | 22 +++++++++++----------- act/io/noaapsl.py | 2 +- act/io/sodar.py | 12 ++++++++++-- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/act/io/armfiles.py b/act/io/armfiles.py index 2ff35b31cf..c2f362b1ef 100644 --- a/act/io/armfiles.py +++ b/act/io/armfiles.py @@ -17,6 +17,7 @@ import tempfile import warnings +from cftime import num2date import numpy as np import xarray as xr import datetime as dt @@ -177,8 +178,8 @@ def read_netcdf( # If requested use base_time and time_offset to derive time. Assumes that the units # of both are in seconds and that the value is number of seconds since epoch. if use_base_time: - time = (ds['base_time'].values + ds['time_offset'].values) * 1000000.0 - time = np.array(time, dtype='datetime64[us]') + time = num2date(ds['base_time'].values + ds['time_offset'].values, ds['base_time'].attrs['units']) + time = time.astype('datetime64[ns]') # Need to use a new Dataset creation to correctly index time for use with # .group and .resample methods in Xarray Datasets. @@ -196,10 +197,8 @@ def read_netcdf( # https://github.com/pydata/xarray/issues/3644 # To ensure the times are read in correctly need to set use_cftime=True. # This will read in time as cftime object. But Xarray uses numpy datetime64 - # natively. This will convert the cftime time values to numpy datetime64. cftime - # does not preserve the time past ms precision. We will use ms precision for - # the conversion. - desired_time_precision = 'datetime64[ms]' + # natively. This will convert the cftime time values to numpy datetime64. + desired_time_precision = 'datetime64[ns]' for var_name in ['time', 'time_offset']: try: if 'time' in ds.dims and type(ds[var_name].values[0]).__module__.startswith('cftime.'): @@ -762,6 +761,9 @@ def write_netcdf( if 'history' in list(write_ds.attrs.keys()): write_ds.attrs['history'] += ''.join(['\n', str(current_time), ' created by ACT ', str(act.__version__), ' act.io.write.write_netcdf']) + + if hasattr(write_ds, 'time_bounds') and not write_ds.time.encoding: + write_ds.time.encoding.update(write_ds.time_bounds.encoding) write_ds.to_netcdf(encoding=encoding, **kwargs) diff --git a/act/io/noaagml.py b/act/io/noaagml.py index 447d0af714..c2f9587ef8 100644 --- a/act/io/noaagml.py +++ b/act/io/noaagml.py @@ -258,7 +258,7 @@ def read_gml_halo(filename, **kwargs): elif var_name.endswith('min'): min_name = var_name - timestamp = np.full(ds[var_names[0]].size, np.nan, dtype='datetime64[s]') + timestamp = np.full(ds[var_names[0]].size, np.nan, dtype="datetime64[ns]") for ii in range(0, len(timestamp)): if min_name is not None: ts = datetime( @@ -285,7 +285,7 @@ def read_gml_halo(filename, **kwargs): ts = datetime( ds[year_name].values[ii], ds[month_name].values[ii], 1) - timestamp[ii] = np.datetime64(ts) + timestamp[ii] = np.datetime64(ts, "ns") for var_name in [year_name, month_name, day_name, hour_name, min_name]: try: @@ -421,7 +421,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs): ds = act.io.csvfiles.read_csv( filename, sep=r'\s+', skiprows=skiprows, **kwargs) - timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]') + timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]") for ii in range(0, len(timestamp)): ts = datetime( ds['year'].values[ii], @@ -431,7 +431,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs): ds['minute'].values[ii], ds['second'].values[ii], ) - timestamp[ii] = np.datetime64(ts) + timestamp[ii] = np.datetime64(ts, "ns") ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) @@ -542,7 +542,7 @@ def read_gml_ozone(filename=None, **kwargs): filename, sep=r'\s+', skiprows=skiprows, **kwargs) ds.attrs['station'] = str(ds['STN'].values[0]).lower() - timestamp = np.full(ds['YEAR'].size, np.nan, dtype='datetime64[s]') + timestamp = np.full(ds['YEAR'].size, np.nan, dtype="datetime64[ns]") for ii in range(0, len(timestamp)): ts = datetime( ds['YEAR'].values[ii], @@ -550,7 +550,7 @@ def read_gml_ozone(filename=None, **kwargs): ds['DAY'].values[ii], ds['HR'].values[ii], ) - timestamp[ii] = np.datetime64(ts) + timestamp[ii] = np.datetime64(ts, "ns") ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) @@ -820,7 +820,7 @@ def read_gml_radiation(filename=None, convert_missing=True, ) ds.attrs['location'] = station - timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]') + timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]") for ii in range(0, len(timestamp)): ts = datetime( ds['year'].values[ii], @@ -829,7 +829,7 @@ def read_gml_radiation(filename=None, convert_missing=True, ds['hour'].values[ii], ds['minute'].values[ii], ) - timestamp[ii] = np.datetime64(ts) + timestamp[ii] = np.datetime64(ts, "ns") ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) @@ -999,7 +999,7 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs): column_names=column_names.keys(), **kwargs) if ds is not None: - timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]') + timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]") for ii in range(0, len(timestamp)): if minutes: ts = datetime( @@ -1017,7 +1017,7 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs): ds['hour'].values[ii], ) - timestamp[ii] = np.datetime64(ts) + timestamp[ii] = np.datetime64(ts, "ns") ds = ds.rename({'index': 'time'}) ds = ds.assign_coords(time=timestamp) @@ -1080,7 +1080,7 @@ def read_surfrad(filename, **kwargs): 'relative_humidity', 'qc_relative_humidity', 'wind_speed', 'qc_wind_speed', 'wind_direction', 'qc_wind_direction', 'pressure', 'qc_pressure'] for i, f in enumerate(filename): - new_df = pd.read_csv(f, names=names, skiprows=2, delimiter='\s+', header=None) + new_df = pd.read_csv(f, names=names, skiprows=2, delimiter=r'\s+', header=None) if i == 0: df = new_df else: diff --git a/act/io/noaapsl.py b/act/io/noaapsl.py index 1a62238ad6..e47b900188 100644 --- a/act/io/noaapsl.py +++ b/act/io/noaapsl.py @@ -513,7 +513,7 @@ def read_psl_surface_met(filenames, conf_file=None): minute = [int(str(ii)[3:]) for ii in hourmin] minute = np.array(minute, dtype='timedelta64[m]') time = time + day + hour + minute - + time = time.astype("datetime64[ns]") # Update Dataset to use "time" coordinate and assigned calculated times ds = ds.assign_coords(index=time) ds = ds.rename(index='time') diff --git a/act/io/sodar.py b/act/io/sodar.py index 52c7cd14de..15dc238f45 100644 --- a/act/io/sodar.py +++ b/act/io/sodar.py @@ -9,6 +9,7 @@ import fsspec import numpy as np import pandas as pd +import xarray as xr from act.io.noaapsl import filter_list @@ -74,7 +75,7 @@ def read_mfas_sodar(filepath): # tmp_columns is used to removed '#' column that causes # columns to move over by one. df = pd.read_table(filepath, - sep='\s+', + sep=r'\s+', skiprows=skip_full_ind, names=tmp_columns, usecols=columns) @@ -103,7 +104,14 @@ def read_mfas_sodar(filepath): # Use unique time and height values to reindex data to be two dimensional. ind = pd.MultiIndex.from_product((time_dim, height_dim), names=('time', 'height')) - ds = ds.assign(Dates=ind).unstack("Dates") + + # Xarray 2023.9 contains new syntax, adding try and except for + # previous version. + try: + mindex_coords = xr.Coordinates.from_pandas_multiindex(ind, 'Dates') + ds = ds.assign_coords(mindex_coords).unstack("Dates") + except AttributeError: + ds = ds.assign(Dates=ind).unstack("Dates") # Add file metadata. for key in file_dict.keys():