From cf6ee7ae933d76ef471de1fd95ced0ca66edbc81 Mon Sep 17 00:00:00 2001
From: Zach Sherman <zsherman@anl.gov>
Date: Fri, 29 Sep 2023 08:05:12 -0500
Subject: [PATCH] FIX: IO Warning fixes. (#719)

* FIX: Fixes for some warnings.

* FIX: Fix for more warnings.

* MNT: Remove final warnings.

* FIX: Add missing dtype conversion.

* FIX: Fix for datatype.

* MNT: Revert cftime call.
---
 act/io/armfiles.py | 14 ++++++++------
 act/io/noaagml.py  | 22 +++++++++++-----------
 act/io/noaapsl.py  |  2 +-
 act/io/sodar.py    | 12 ++++++++++--
 4 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/act/io/armfiles.py b/act/io/armfiles.py
index 2ff35b31cf..c2f362b1ef 100644
--- a/act/io/armfiles.py
+++ b/act/io/armfiles.py
@@ -17,6 +17,7 @@
 import tempfile
 import warnings
 
+from cftime import num2date
 import numpy as np
 import xarray as xr
 import datetime as dt
@@ -177,8 +178,8 @@ def read_netcdf(
     # If requested use base_time and time_offset to derive time. Assumes that the units
     # of both are in seconds and that the value is number of seconds since epoch.
     if use_base_time:
-        time = (ds['base_time'].values + ds['time_offset'].values) * 1000000.0
-        time = np.array(time, dtype='datetime64[us]')
+        time = num2date(ds['base_time'].values + ds['time_offset'].values, ds['base_time'].attrs['units'])
+        time = time.astype('datetime64[ns]')
 
         # Need to use a new Dataset creation to correctly index time for use with
         # .group and .resample methods in Xarray Datasets.
@@ -196,10 +197,8 @@ def read_netcdf(
     # https://github.com/pydata/xarray/issues/3644
     # To ensure the times are read in correctly need to set use_cftime=True.
     # This will read in time as cftime object. But Xarray uses numpy datetime64
-    # natively. This will convert the cftime time values to numpy datetime64. cftime
-    # does not preserve the time past ms precision. We will use ms precision for
-    # the conversion.
-    desired_time_precision = 'datetime64[ms]'
+    # natively. This will convert the cftime time values to numpy datetime64.
+    desired_time_precision = 'datetime64[ns]'
     for var_name in ['time', 'time_offset']:
         try:
             if 'time' in ds.dims and type(ds[var_name].values[0]).__module__.startswith('cftime.'):
@@ -762,6 +761,9 @@ def write_netcdf(
         if 'history' in list(write_ds.attrs.keys()):
             write_ds.attrs['history'] += ''.join(['\n', str(current_time), ' created by ACT ', str(act.__version__),
                                                    ' act.io.write.write_netcdf'])
+        
+        if hasattr(write_ds, 'time_bounds') and not write_ds.time.encoding:
+            write_ds.time.encoding.update(write_ds.time_bounds.encoding)
 
         write_ds.to_netcdf(encoding=encoding, **kwargs)
 
diff --git a/act/io/noaagml.py b/act/io/noaagml.py
index 447d0af714..c2f9587ef8 100644
--- a/act/io/noaagml.py
+++ b/act/io/noaagml.py
@@ -258,7 +258,7 @@ def read_gml_halo(filename, **kwargs):
         elif var_name.endswith('min'):
             min_name = var_name
 
-    timestamp = np.full(ds[var_names[0]].size, np.nan, dtype='datetime64[s]')
+    timestamp = np.full(ds[var_names[0]].size, np.nan, dtype="datetime64[ns]")
     for ii in range(0, len(timestamp)):
         if min_name is not None:
             ts = datetime(
@@ -285,7 +285,7 @@ def read_gml_halo(filename, **kwargs):
             ts = datetime(
                 ds[year_name].values[ii], ds[month_name].values[ii], 1)
 
-        timestamp[ii] = np.datetime64(ts)
+        timestamp[ii] = np.datetime64(ts, "ns")
 
     for var_name in [year_name, month_name, day_name, hour_name, min_name]:
         try:
@@ -421,7 +421,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs):
     ds = act.io.csvfiles.read_csv(
         filename, sep=r'\s+', skiprows=skiprows, **kwargs)
 
-    timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]')
+    timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]")
     for ii in range(0, len(timestamp)):
         ts = datetime(
             ds['year'].values[ii],
@@ -431,7 +431,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs):
             ds['minute'].values[ii],
             ds['second'].values[ii],
         )
-        timestamp[ii] = np.datetime64(ts)
+        timestamp[ii] = np.datetime64(ts, "ns")
 
     ds = ds.rename({'index': 'time'})
     ds = ds.assign_coords(time=timestamp)
@@ -542,7 +542,7 @@ def read_gml_ozone(filename=None, **kwargs):
         filename, sep=r'\s+', skiprows=skiprows, **kwargs)
     ds.attrs['station'] = str(ds['STN'].values[0]).lower()
 
-    timestamp = np.full(ds['YEAR'].size, np.nan, dtype='datetime64[s]')
+    timestamp = np.full(ds['YEAR'].size, np.nan, dtype="datetime64[ns]")
     for ii in range(0, len(timestamp)):
         ts = datetime(
             ds['YEAR'].values[ii],
@@ -550,7 +550,7 @@ def read_gml_ozone(filename=None, **kwargs):
             ds['DAY'].values[ii],
             ds['HR'].values[ii],
         )
-        timestamp[ii] = np.datetime64(ts)
+        timestamp[ii] = np.datetime64(ts, "ns")
 
     ds = ds.rename({'index': 'time'})
     ds = ds.assign_coords(time=timestamp)
@@ -820,7 +820,7 @@ def read_gml_radiation(filename=None, convert_missing=True,
         )
         ds.attrs['location'] = station
 
-        timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]')
+        timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]")
         for ii in range(0, len(timestamp)):
             ts = datetime(
                 ds['year'].values[ii],
@@ -829,7 +829,7 @@ def read_gml_radiation(filename=None, convert_missing=True,
                 ds['hour'].values[ii],
                 ds['minute'].values[ii],
             )
-            timestamp[ii] = np.datetime64(ts)
+            timestamp[ii] = np.datetime64(ts, "ns")
 
         ds = ds.rename({'index': 'time'})
         ds = ds.assign_coords(time=timestamp)
@@ -999,7 +999,7 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs):
         column_names=column_names.keys(), **kwargs)
 
     if ds is not None:
-        timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]')
+        timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]")
         for ii in range(0, len(timestamp)):
             if minutes:
                 ts = datetime(
@@ -1017,7 +1017,7 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs):
                     ds['hour'].values[ii],
                 )
 
-            timestamp[ii] = np.datetime64(ts)
+            timestamp[ii] = np.datetime64(ts, "ns")
 
         ds = ds.rename({'index': 'time'})
         ds = ds.assign_coords(time=timestamp)
@@ -1080,7 +1080,7 @@ def read_surfrad(filename, **kwargs):
              'relative_humidity', 'qc_relative_humidity', 'wind_speed', 'qc_wind_speed', 'wind_direction',
              'qc_wind_direction', 'pressure', 'qc_pressure']
     for i, f in enumerate(filename):
-        new_df = pd.read_csv(f, names=names, skiprows=2, delimiter='\s+', header=None)
+        new_df = pd.read_csv(f, names=names, skiprows=2, delimiter=r'\s+', header=None)
         if i == 0:
             df = new_df
         else:
diff --git a/act/io/noaapsl.py b/act/io/noaapsl.py
index 1a62238ad6..e47b900188 100644
--- a/act/io/noaapsl.py
+++ b/act/io/noaapsl.py
@@ -513,7 +513,7 @@ def read_psl_surface_met(filenames, conf_file=None):
     minute = [int(str(ii)[3:]) for ii in hourmin]
     minute = np.array(minute, dtype='timedelta64[m]')
     time = time + day + hour + minute
-
+    time = time.astype("datetime64[ns]")
     # Update Dataset to use "time" coordinate and assigned calculated times
     ds = ds.assign_coords(index=time)
     ds = ds.rename(index='time')
diff --git a/act/io/sodar.py b/act/io/sodar.py
index 52c7cd14de..15dc238f45 100644
--- a/act/io/sodar.py
+++ b/act/io/sodar.py
@@ -9,6 +9,7 @@
 import fsspec
 import numpy as np
 import pandas as pd
+import xarray as xr
 
 from act.io.noaapsl import filter_list
 
@@ -74,7 +75,7 @@ def read_mfas_sodar(filepath):
     # tmp_columns is used to removed '#' column that causes
     # columns to move over by one.
     df = pd.read_table(filepath,
-                       sep='\s+',
+                       sep=r'\s+',
                        skiprows=skip_full_ind,
                        names=tmp_columns,
                        usecols=columns)
@@ -103,7 +104,14 @@ def read_mfas_sodar(filepath):
 
     # Use unique time and height values to reindex data to be two dimensional.
     ind = pd.MultiIndex.from_product((time_dim, height_dim), names=('time', 'height'))
-    ds = ds.assign(Dates=ind).unstack("Dates")
+
+    # Xarray 2023.9 contains new syntax, adding try and except for
+    # previous version.
+    try:
+        mindex_coords = xr.Coordinates.from_pandas_multiindex(ind, 'Dates')
+        ds = ds.assign_coords(mindex_coords).unstack("Dates")
+    except AttributeError:
+        ds = ds.assign(Dates=ind).unstack("Dates")
 
     # Add file metadata.
     for key in file_dict.keys():