refresh PR

JuliaGeo · Jan 11, 2025 · 1fa5f64 · 1fa5f64
1 parent 3afbf78
commit 1fa5f64
Show file tree

Hide file tree

Showing 48 changed files with 6,505 additions and 288 deletions.
diff --git a/.github/workflows/ci.yml → .github/workflows/CI.yml b/.github/workflows/ci.yml → .github/workflows/CI.yml
@@ -4,25 +4,22 @@ on:
   - pull_request
 jobs:
   test:
-    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         version:
-          - '1.6'
+          - 'lts'
           - '1'
         os:
           - ubuntu-latest
-        arch:
-          - x64
     steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - uses: actions/cache@v3
+      - uses: actions/cache@v4
         env:
           cache-name: cache-artifacts
         with:
@@ -39,15 +36,17 @@ jobs:
       - uses: julia-actions/julia-runtest@latest
         continue-on-error: ${{ matrix.version == 'nightly' }}
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v3
+      - uses: codecov/codecov-action@v5
         with:
           file: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
+
   docs:
     name: Documentation
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: '1'
       - run: |

diff --git a/.github/workflows/IntegrationTest.yml b/.github/workflows/IntegrationTest.yml
@@ -1,8 +1,6 @@
 name: IntegrationTest
 on:
   push:
-    branches: [main]
-    tags: [v*]
   pull_request:
 
 concurrency:
@@ -24,16 +22,17 @@ jobs:
           - {user: Alexander-Barth, repo: NCDatasets.jl}
           - {user: JuliaGeo,        repo: GRIBDatasets.jl}
           - {user: Alexander-Barth, repo: TIFFDatasets.jl}
+          - {user: JuliaGeo,        repo: ZarrDatasets.jl}
 
     steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
           arch: x64
       - uses: julia-actions/julia-buildpkg@latest
       - name: Clone Downstream
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
           path: downstream

diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 *.jl.cov
 *.jl.*.cov
 *~
+*.html
 
 # Files generated by invoking Julia with --track-allocation
 *.jl.mem

diff --git a/Project.toml b/Project.toml
@@ -1,22 +1,26 @@
 name = "CommonDataModel"
 uuid = "1fbeeb36-5f17-413c-809b-666fb144f157"
-authors = ["Alexander Barth <[email protected]>"]
 keywords = ["netcdf", "GRIB", "climate and forecast conventions", "oceanography", "meteorology", "climatology", "opendap"]
 license = "MIT"
 desc = "CommonDataModel is a module that defines types common to NetCDF and GRIB data"
-version = "0.2.4"
+authors = ["Alexander Barth <[email protected]>"]
+version = "0.3.7"
 
 [deps]
 CFTime = "179af706-886a-5703-950a-314cd64e0468"
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
-julia = "1.6"
-Preferences = "1.3"
-DataStructures = "0.17, 0.18"
-DiskArrays = "0.3"
 CFTime = "0.1.1"
+DataStructures = "0.17, 0.18"
+Dates = "1"
+DiskArrays = "0.4"
+Preferences = "1.3"
+Printf = "1"
+Statistics = "1"
+julia = "1.6"
diff --git a/README.md b/README.md
@@ -1,22 +1,40 @@
 [![Build Status](https://github.com/JuliaGeo/CommonDataModel.jl/workflows/CI/badge.svg)](https://github.com/JuliaGeo/CommonDataModel.jl/actions)
-[![codecov.io](http://codecov.io/github/JuliaGeo/CommonDataModel.jl/coverage.svg?branch=main)](http://app.codecov.io/github/JuliaGeo/CommonDataModel.jl?branch=main)
+[![codecov](https://codecov.io/github/JuliaGeo/CommonDataModel.jl/graph/badge.svg?token=TNU4HSPelE)](https://codecov.io/github/JuliaGeo/CommonDataModel.jl)
+[![documentation stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliageo.github.io/CommonDataModel.jl/stable/)
 [![documentation dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliageo.github.io/CommonDataModel.jl/dev/)
 
 
-This package contains abstracts type definition to ensure compatibility of the package [GRIBDatasets](https://github.com/JuliaGeo/GRIBDatasets.jl) and [NCDatasets](https://github.com/Alexander-Barth/NCDatasets.jl) for manipulating GRIB and NetCDF files. This package aims to follow the [Common Data Model](https://docs.unidata.ucar.edu/netcdf-c/current/netcdf_data_model.html) and the [CF (climate and forecast models) Metadata Conventions](https://cfconventions.org/).
+This package contains abstracts type definition for loading and manipulating GRIB, NetCDF, geoTiff and Zarr files. This package aims to follow the [Common Data Model](https://docs.unidata.ucar.edu/netcdf-c/current/netcdf_data_model.html) and the [CF (climate and forecast models) Metadata Conventions](https://cfconventions.org/).
 
-This package is at a very early state of developpement.
 
-Here is minimal example for loading GRIB or NetCDF files.
+| Format  |      Package | read support | write support |
+|---------|--------------|:------------:|:-------------:|
+| NetCDF  | [`NCDatasets`](https://github.com/Alexander-Barth/NCDatasets.jl)     |            ✔ |             ✔ |
+| OPeNDAP | [`NCDatasets`](https://github.com/Alexander-Barth/NCDatasets.jl)     |            ✔ |             - |
+| GRIB    | [`GRIBDatasets`](https://github.com/JuliaGeo/GRIBDatasets.jl)        |            ✔ |             - |
+| geoTIFF | [`TIFFDatasets`](https://github.com/Alexander-Barth/TIFFDatasets.jl) |            ✔ |             - |
+| Zarr    | [`ZarrDatasets`](https://github.com/JuliaGeo/ZarrDatasets.jl)        |            ✔ |             ✔ |
+
+
+Features include:
+* query and edit metadata of arrays and datasets 
+* virtually concatenating multiple files along a given dimension and merging virtually different datasets
+* create a virtual subset (`view`) by indices or by values of coordinate variables (`CommonDataModel.select`, `CommonDataModel.@select`)
+* group, map and reduce a variable (`CommonDataModel.groupby`, `CommonDataModel.@groupby`) and rolling reductions like running means `CommonDataModel.rolling`)
+
+
+
+
+Here is minimal example for loading files using `CommonDataModel`:
 
 ``` julia
 import CommonDataModel as CDM
-import SomeDatasets # where SomeDatasets is either GRIBDatasets or NCDatasets
+import SomeDatasets # where SomeDatasets is either GRIBDatasets, NCDatasets, ZarrDatasets,...
 
 ds = SomeDatasets.Dataset("file_name")
 
 # ntime is the number of time instances
-ntime = CDM.dims(ds)["time"]
+ntime = ds.dim["time"] # or CDM.dims(ds)["time"]
 
 # create an array-like structure v corresponding to variable temperature
 v = ds["temperature"]
@@ -28,25 +46,25 @@ subdata = v[10:30,30:5:end]
 data = v[:,:]
 
 # load a global attribute
-title = CDM.attribs(ds)["title"]
+title = ds.attrib["title"]  # or CDM.attribs(ds)["title"]
 close(ds)
 ```
 
- Most users would typically import [`GRIBDatasets`](https://github.com/JuliaGeo/GRIBDatasets.jl) and [`NCDatasets`](https://github.com/Alexander-Barth/NCDatasets.jl) directly and not `CommonDataModel`. One should import `CommonDataModel` only to extent the functionality of `GRIBDatasets` and `NCDatasets`.
-
-As a proof-of-concept, there is also an [`TIFFDatasets`](https://github.com/Alexander-Barth/TIFFDatasets.jl) package for GeoTIFF files.
+Most users would typically import [`GRIBDatasets`](https://github.com/JuliaGeo/GRIBDatasets.jl), [`NCDatasets`](https://github.com/Alexander-Barth/NCDatasets.jl)... directly and not `CommonDataModel`.
 
 # File conversions
 
-By implementing a common interface, GRIB files can be converted to NetCDF files using
-`NCDatasets.write`:
+By implementing a common interface, files can be converted from one format to another using the `write` function.
+For example GRIB files can be converted to NetCDF (or Zarr) files:
 
 ```julia
-using NCDatasets
+using NCDatasets # or ZarrDatasets
 using GRIBDatasets
 using Downloads: download
 
 grib_file = download("https://github.com/JuliaGeo/GRIBDatasets.jl/raw/98356af026ea39a5ec0b5e64e4289105492321f8/test/sample-data/era5-levels-members.grib")
 netcdf_file = "test.nc"
-NCDatasets.write(netcdf_file,GRIBDataset(grib_file))
+NCDataset(netcdf_file,"c") do ds
+   write(ds,GRIBDataset(grib_file))
+end
 ```
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,3 +1,15 @@
 [deps]
-Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
+IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[compat]
+NCDatasets = "0.14"
+GLMakie = "0.8"
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,5 +1,19 @@
 using Documenter: Documenter, makedocs, deploydocs
-using CommonDataModel: CommonDataModel
+using CommonDataModel
+using Literate
+
+Literate.markdown(
+    "docs/src/tutorial1.jl","docs/src",
+    execute = true,
+    documenter = true,
+    # We add the credit to Literate.jl the footer
+    credit = false,
+)
+
+if get(ENV, "CI", "false") == "true"
+    # remove datafile on CI
+    rm("docs/src/sst.day.mean.2023.nc")
+end
 
 makedocs(;
     modules=[CommonDataModel],
@@ -9,9 +23,12 @@ makedocs(;
         prettyurls=get(ENV, "CI", "false") == "true",
         canonical="https://juliageo.github.io/CommonDataModel.jl",
         assets=String[],
+        footer = "Powered by [Documenter.jl](https://github.com/JuliaDocs/Documenter.jl), [Literate.jl](https://github.com/fredrikekre/Literate.jl) and the [Julia Programming Language](https://julialang.org/)"
+
     ),
     pages=[
         "Home" => "index.md",
+        "Tutorials" => "tutorial1.md",
     ],
 )
 

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -4,24 +4,47 @@
 In order to implement a new dataset based `CommonDataModel.jl`
 one has to create two types derived from:
 
-1. [`AbstractVariable`](#CommonDataModel.AbstractVariable): a variable with named dimension and metadata
-2. [`AbstractDataset`](#CommonDataModel.AbstractDataset): a collection of variable with named dimension, metadata and sub-groups. The sub-groups are also `AbstractDataset`.
+1. [`AbstractVariable`](@ref CommonDataModel.AbstractVariable): a variable with named dimension and metadata
+2. [`AbstractDataset`](@ref CommonDataModel.AbstractDataset): a collection of variable with named dimension, metadata and sub-groups. The sub-groups are also `AbstractDataset`.
 
 
 `CommonDataModel.jl` also provides a type `CFVariable` which wraps a type derived from `AbstractVariable` and applies the scaling described in
-[`cfvariable`](#CommonDataModel.cfvariable).
+[`cfvariable`](@ref CommonDataModel.cfvariable).
 
 Overview of methods:
 
-|            | get names                                     | get values                              | write / set value                        |
-|------------|-----------------------------------------------|-----------------------------------------|-------------------------------------------|
-| Dimensions | [`dimnames`](#CommonDataModel.dimnames)       | [`dim`](#CommonDataModel.dim)           | [`defDim`](#CommonDataModel.defDim)       |
-| Attributes | [`attribnames`](#CommonDataModel.attribnames) | [`attrib`](#CommonDataModel.attrib)     | [`defAttrib`](#CommonDataModel.defAttrib) |
-| Variables  | [`varnames`](#CommonDataModel.varname   s)    | [`variable`](#CommonDataModel.variable) | [`defVar`](#CommonDataModel.defVar)       |
-| Groups     | [`groupnames`](#CommonDataModel.groupnames)   | [`group`](#CommonDataModel.group)       | [`defGroup`](#CommonDataModel.defGroup)   |
+|            | get names                                     | get values                              | set value                        | property |
+|------------|-----------------------------------------------|-----------------------------------------|-------------------------------------------|--------|
+| Dimensions | [`dimnames`](@ref CommonDataModel.dimnames)       | [`dim`](@ref CommonDataModel.dim)           | [`defDim`](@ref CommonDataModel.defDim)       | `dim`    |
+| Attributes | [`attribnames`](@ref CommonDataModel.attribnames) | [`attrib`](@ref CommonDataModel.attrib)     | [`defAttrib`](@ref CommonDataModel.defAttrib) | `attrib` |
+| Variables  | [`varnames`](@ref CommonDataModel.varnames)    | [`variable`](@ref CommonDataModel.variable) | [`defVar`](@ref CommonDataModel.defVar)       | -      |
+| Groups     | [`groupnames`](@ref CommonDataModel.groupnames)   | [`group`](@ref CommonDataModel.group)       | [`defGroup`](@ref CommonDataModel.defGroup)   | `group`  |
 
-For read-only datasets, the methods in last column are not implemented.
+For read-only datasets, the methods in "set value" column are not to be implemented.
+Attributes can also be delete with the [`delAttrib`](@ref CommonDataModel.delAttrib) functions.
+
+Every struct deriving from `AbstractDataset` have automaticaly the special properties `dim`, `attrib` and `group` which act like dictionaries (unless a field with this name already exists).
+For `attrib`, calls to `keys`, `getindex` and `setindex!`, `delete!` are dispated to `attribnames`, `attrib`,`defAttrib`, and `delAttrib` respectively (and likewise for other properties). For example:
+
+``` julia
+using NCDatasets
+ds = NCDataset("file.nc")
+# setindex!(ds.attrib,...) here automatically calls defAttrib(ds,...)
+ds.attrib["title"] = "my amazing results";
+```
+Variables can be accessed by directly indexing the `AbstractDataset`.
+
+Every struct deriving from `AbstractVariable` has the properties `dim`, and `attrib`.
+
+Current functionalities of CommonDataModel include:
+* virtually concatenating files along a given dimension
+* create a virtual subset (([`view`](@ref Base.view))) by indices or by values of coordinate variables ([`select`](@ref CommonDataModel.select), [`@select`](@ref CommonDataModel.@select))
+* group, map and reduce a variable ([`groupby`](@ref CommonDataModel.groupby), [`@groupby`](@ref CommonDataModel.@groupby), [`rolling`](@ref CommonDataModel.rolling))
+
+
+
+## API
 
 ```@autodocs
-Modules = [CommonDataModel]
+Modules = [CommonDataModel, CommonDataModel.CatArrays]
 ```