refactor(netcdf): utility package input adjustments (#2106)

* netcdf improved error handling and utl-ncf input adjustments * escape underscores * add autotest assert for netcdf data model * add baseline testing for utl-ncf options * remove in_record attribute from chunk params * set chunk_time param optional --------- Co-authored-by: mjreno <[email protected]>
MODFLOW-USGS · Dec 17, 2024 · 5a0f854 · 5a0f854
1 parent 8d5b621
commit 5a0f854
Show file tree

Hide file tree

Showing 16 changed files with 234 additions and 159 deletions.
diff --git a/autotest/test_netcdf_gwe_cnd.py b/autotest/test_netcdf_gwe_cnd.py
@@ -24,6 +24,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 
 def build_models(idx, test, export, gridded_input):
@@ -57,6 +58,10 @@ def check_output(idx, test, export, gridded_input):
 
     name = "gwe-" + test.name
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{name}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{name}.nc"

diff --git a/autotest/test_netcdf_gwf_disv.py b/autotest/test_netcdf_gwf_disv.py
@@ -14,6 +14,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 wkt = (
     'PROJCS["NAD83 / UTM zone 18N", '
@@ -53,7 +54,15 @@ def build_models(idx, test, export, gridded_input):
         gwf.name_file.nc_mesh2d_filerecord = f"{name}.nc"
 
     # netcdf config
-    ncf = flopy.mf6.ModflowUtlncf(gwf.disv, ogc_wkt=wkt, filename=f"{name}.disv.ncf")
+    ncf = flopy.mf6.ModflowUtlncf(
+        gwf.disv,
+        deflate=9,
+        shuffle=True,
+        chunk_time=1,
+        chunk_face=3,
+        wkt=wkt,
+        filename=f"{name}.disv.ncf",
+    )
 
     # output control
     oc = flopy.mf6.ModflowGwfoc(
@@ -72,6 +81,16 @@ def check_output(idx, test, export, gridded_input):
 
     name = test.name
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{name}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+        cmpr = ds.variables["head_l1"].filters()
+        chnk = ds.variables["head_l1"].chunking()
+        assert cmpr["shuffle"]
+        assert cmpr["complevel"] == 9
+        assert chnk == [1, 3]
+        assert ds.variables["projection"].getncattr("wkt").lower() == wkt.lower()
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{name}.nc"

diff --git a/autotest/test_netcdf_gwf_lak_wetlakbedarea02.py b/autotest/test_netcdf_gwf_lak_wetlakbedarea02.py
@@ -24,6 +24,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 
 def build_models(idx, test, export, gridded_input):
@@ -61,6 +62,10 @@ def check_output(idx, test, export, gridded_input):
     name = cases[idx]
     gwfname = "gwf-" + name
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{gwfname}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{gwfname}.nc"

diff --git a/autotest/test_netcdf_gwf_rch01.py b/autotest/test_netcdf_gwf_rch01.py
@@ -24,6 +24,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 
 def build_models(idx, test, export, gridded_input):
@@ -58,6 +59,10 @@ def check_output(idx, test, export, gridded_input):
 
     name = "rch"
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{name}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{name}.nc"

diff --git a/autotest/test_netcdf_gwf_rch03.py b/autotest/test_netcdf_gwf_rch03.py
@@ -24,6 +24,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 
 def build_models(idx, test, export, gridded_input):
@@ -59,6 +60,10 @@ def check_output(idx, test, export, gridded_input):
 
     name = "rch"
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{name}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{name}.nc"

diff --git a/autotest/test_netcdf_gwf_sto01.py b/autotest/test_netcdf_gwf_sto01.py
@@ -12,6 +12,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 htol = [None for _ in range(len(cases))]
 
@@ -52,22 +53,53 @@ def build_models(idx, test, export, gridded_input):
 
     if export == "ugrid":
         gwf.name_file.nc_mesh2d_filerecord = f"{name}.nc"
+        ncf = flopy.mf6.ModflowUtlncf(
+            gwf.dis,
+            deflate=5,
+            shuffle=True,
+            chunk_time=1,
+            chunk_face=10,
+            wkt=wkt,
+            filename=f"{name}.dis.ncf",
+        )
     elif export == "structured":
         gwf.name_file.nc_structured_filerecord = f"{name}.nc"
-
-    # netcdf config
-    ncf = flopy.mf6.ModflowUtlncf(
-        gwf.dis,
-        ogc_wkt=wkt,
-        filename=f"{name}.dis.ncf",
-    )
+        ncf = flopy.mf6.ModflowUtlncf(
+            gwf.dis,
+            deflate=5,
+            shuffle=True,
+            chunk_time=1,
+            chunk_z=1,
+            chunk_y=5,
+            chunk_x=5,
+            wkt=wkt,
+            filename=f"{name}.dis.ncf",
+        )
 
     return sim, dummy
 
 
 def check_output(idx, test, export, gridded_input):
     from test_gwf_sto01 import check_output as check
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / "gwf_sto01.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+        if export == "structured":
+            cmpr = ds.variables["head"].filters()
+            chnk = ds.variables["head"].chunking()
+            assert chnk == [1, 1, 5, 5]
+            assert (
+                ds.variables["projection"].getncattr("crs_wkt").lower() == wkt.lower()
+            )
+        elif export == "ugrid":
+            cmpr = ds.variables["head_l1"].filters()
+            chnk = ds.variables["head_l1"].chunking()
+            assert chnk == [1, 10]
+            assert ds.variables["projection"].getncattr("wkt").lower() == wkt.lower()
+        assert cmpr["shuffle"]
+        assert cmpr["complevel"] == 5
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = "gwf_sto01.nc"

diff --git a/autotest/test_netcdf_gwf_vsc03_sfr.py b/autotest/test_netcdf_gwf_vsc03_sfr.py
@@ -24,6 +24,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 
 def build_models(idx, test, export, gridded_input):
@@ -61,6 +62,10 @@ def check_output(idx, test, export, gridded_input):
 
     name = "gwf-" + test.name
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{name}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{name}.nc"

diff --git a/autotest/test_netcdf_gwt_dsp01.py b/autotest/test_netcdf_gwt_dsp01.py
@@ -12,6 +12,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 
 def build_models(idx, test, export, gridded_input):
@@ -29,14 +30,26 @@ def build_models(idx, test, export, gridded_input):
 
     if export == "ugrid":
         gwt.name_file.nc_mesh2d_filerecord = f"{gwtname}.nc"
+        ncf = flopy.mf6.ModflowUtlncf(
+            gwt.dis,
+            deflate=3,
+            shuffle=False,
+            chunk_time=1,
+            chunk_face=5,
+            filename=f"{gwtname}.dis.ncf",
+        )
     elif export == "structured":
         gwt.name_file.nc_structured_filerecord = f"{gwtname}.nc"
-
-    # netcdf config
-    ncf = flopy.mf6.ModflowUtlncf(
-        gwt.dis,
-        filename=f"{gwtname}.dis.ncf",
-    )
+        ncf = flopy.mf6.ModflowUtlncf(
+            gwt.dis,
+            deflate=3,
+            shuffle=False,
+            chunk_time=1,
+            chunk_z=1,
+            chunk_y=1,
+            chunk_x=20,
+            filename=f"{gwtname}.dis.ncf",
+        )
 
     oc = flopy.mf6.ModflowGwtoc(
         gwt,
@@ -56,6 +69,20 @@ def check_output(idx, test, export, gridded_input):
     name = cases[idx]
     gwtname = "gwt_" + name
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{gwtname}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+        if export == "structured":
+            cmpr = ds.variables["concentration"].filters()
+            chnk = ds.variables["concentration"].chunking()
+            assert chnk == [1, 1, 1, 20]
+        elif export == "ugrid":
+            cmpr = ds.variables["concentration_l1"].filters()
+            chnk = ds.variables["concentration_l1"].chunking()
+            assert chnk == [1, 5]
+        assert not cmpr["shuffle"]
+        assert cmpr["complevel"] == 3
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{gwtname}.nc"

diff --git a/autotest/test_netcdf_gwt_prudic2004t2.py b/autotest/test_netcdf_gwt_prudic2004t2.py
@@ -12,6 +12,7 @@
 
 xa = pytest.importorskip("xarray")
 xu = pytest.importorskip("xugrid")
+nc = pytest.importorskip("netCDF4")
 
 
 def build_models(idx, test, export, gridded_input):
@@ -47,6 +48,10 @@ def check_output(idx, test, export, gridded_input):
     name = test.name
     gwtname = "gwt_" + name
 
+    # verify format of generated netcdf file
+    with nc.Dataset(test.workspace / f"{gwtname}.nc") as ds:
+        assert ds.data_model == "NETCDF4"
+
     if gridded_input == "netcdf":
         # re-run the simulation with model netcdf input
         input_fname = f"{gwtname}.nc"

diff --git a/doc/mf6io/mf6ivar/dfn/utl-ncf.dfn b/doc/mf6io/mf6ivar/dfn/utl-ncf.dfn
@@ -4,7 +4,7 @@
 # --------------------- utl ncf options ---------------------
 
 block options
-name ogc_wkt
+name wkt
 type string
 shape lenbigline
 reader urword
@@ -28,67 +28,45 @@ optional true
 longname
 description is the keyword used to turn on the netcdf variable shuffle filter when the deflate option is also set. The shuffle filter has the effect of storing the first byte of all of a variable's values in a chunk contiguously, followed by all the second bytes, etc. This can be an optimization for compression with certain types of data.
 
-block options
-name chunk_record
-type record chunking chunk_time chunk_face chunk_z chunk_y chunk_x
-reader urword
-optional true
-longname netcdf export chunking record
-description netcdf export chunking record
-
-block options
-name chunking
-type keyword
-in_record true
-reader urword
-optional false
-longname keyword when defining chunking parameters
-description is a keyword for providing netcdf export chunk sizes. Chunking can dramatically impact data access times and optimal chunking is highly dependent on access patterns (timeseries vs spatial, for example). It can also significantly impact compressibility of the data. A valid input record specifies chunk\_time and either chunk\_face (MESH) or chunk\_z, chunk\_y, and chunk\_x (STRUCTURED).
-
 block options
 name chunk_time
 type integer
-in_record true
 reader urword
-optional false
+optional true
 longname chunking parameter for the time dimension
-description is the keyword used to provide a netcdf export time dimension chunk size.
+description is the keyword used to provide a data chunk size for the time dimension in a NETCDF\_MESH2D or NETCDF\_STRUCTURED output file. Must be used in combination with the the chunk\_face parameter (NETCDF\_MESH2D) or the chunk\_z, chunk\_y, and chunk\_x parameter set (NETCDF\_STRUCTURED) to have an effect.
 
 block options
 name chunk_face
 type integer
-in_record true
 reader urword
 optional true
 longname chunking parameter for the mesh face dimension
-description is the keyword used to provide a mesh face dimension chunk size.
+description is the keyword used to provide a data chunk size for the face dimension in a NETCDF\_MESH2D output file. Must be used in combination with the the chunk\_time parameter to have an effect.
 
 block options
 name chunk_z
 type integer
-in_record true
 reader urword
 optional true
 longname chunking parameter for structured z
-description is the keyword used to provide a structured grid z dimensions chunk size.
+description is the keyword used to provide a data chunk size for the z dimension in a NETCDF\_STRUCTURED output file. Must be used in combination with the the chunk\_time, chunk\_x and chunk\_y parameter set to have an effect.
 
 block options
 name chunk_y
 type integer
-in_record true
 reader urword
 optional true
 longname chunking parameter for structured y
-description is the keyword used to provide a structured grid y dimensions chunk size.
+description is the keyword used to provide a data chunk size for the y dimension in a NETCDF\_STRUCTURED output file. Must be used in combination with the the chunk\_time, chunk\_x and chunk\_z parameter set to have an effect.
 
 block options
 name chunk_x
 type integer
-in_record true
 reader urword
 optional true
 longname chunking parameter for structured x
-description is the keyword used to provide a structured grid x dimensions chunk size.
+description is the keyword used to provide a data chunk size for the x dimension in a NETCDF\_STRUCTURED output file. Must be used in combination with the the chunk\_time, chunk\_y and chunk\_z parameter set to have an effect.
 
 block options
 name modflow6_attr_off
@@ -107,12 +85,12 @@ type integer
 optional true
 reader urword
 longname number of cells in layer
-description is the number of cells in a in a projected plane layer.
+description is the number of cells in a projected plane layer.
 
 # --------------------- utl ncf griddata ---------------------
 
 block griddata
-name lat
+name latitude
 type double precision
 shape (ncpl)
 optional true
@@ -121,7 +99,7 @@ longname cell center latitude
 description cell center latitude.
 
 block griddata
-name lon
+name longitude
 type double precision
 shape (ncpl)
 optional true