Skip to content

Commit

Permalink
Merge pull request #312 from davidhassell/dask2
Browse files Browse the repository at this point in the history
Introduction of Dask for all data manipulations
  • Loading branch information
davidhassell authored Jan 15, 2025
2 parents e0436b0 + 4b9995d commit 170ca84
Show file tree
Hide file tree
Showing 62 changed files with 10,939 additions and 2,366 deletions.
7 changes: 5 additions & 2 deletions Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
Version NEXTVERSION
-------------------

**2024-??-??**
**2024-12-??**

* Introduction of `dask` for all data manipulations
https://github.com/NCAS-CMS/cfdm/issues/317)
* Fix bug that returned incorrect results when an invalid identifer is
provided to `cf.Field.cell_methods`
(https://github.com/NCAS-CMS/cfdm/issues/299)
Expand All @@ -24,10 +26,11 @@ Version NEXTVERSION
attribute (https://github.com/NCAS-CMS/cfdm/issues/303)
* New class `cfdm.H5netcdfArray`
* New class `cfdm.NetCDF4Array`
* Changed dependency: ``numpy>=1.15,<2.0``
* New dependency: ``h5netcdf>=1.3.0``
* New dependency: ``h5py>=3.10.0``
* New dependency: ``s3fs>=2024.6.0``
* New dependency: ``dask>=2024.6.0``
* New dependency: ``dask>=2024.6.0,<=2024.7.1``
* Removed dependency: ``netcdf_flattener``

----
Expand Down
12 changes: 9 additions & 3 deletions cfdm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,12 @@
raise ImportError(_error0 + str(error1))

_minimum_vn = "2024.6.0"
if Version(dask.__version__) < Version(_minimum_vn):
_maximum_vn = "2024.7.1"
_dask_version = Version(dask.__version__)
if not Version(_minimum_vn) <= _dask_version <= Version(_maximum_vn):
raise ValueError(
f"Bad scipy version: cfdm requires dask>={_minimum_vn}. "
f"Got {dask.__version__} at {dask.__file__}"
f"Bad dask version: cfdm requires {_minimum_vn}<=dask<={_maximum_vn}. "
f"Got {_dask_version} at {dask.__file__}"
)

from .constants import masked
Expand All @@ -160,10 +162,12 @@
RTOL,
abspath,
atol,
chunksize,
configuration,
environment,
integer_dtype,
log_level,
parse_indices,
rtol,
unique_constructs,
_disable_logging,
Expand Down Expand Up @@ -238,6 +242,8 @@
from .datum import Datum
from .interiorring import InteriorRing

from .units import Units

from .auxiliarycoordinate import AuxiliaryCoordinate
from .cellconnectivity import CellConnectivity
from .cellmeasure import CellMeasure
Expand Down
2 changes: 1 addition & 1 deletion cfdm/cfdmimplementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,7 +1378,7 @@ def get_data_maximum(self, parent):
Scalar `Data` instance
"""
return parent.data.maximum(squeeze=True)
return parent.data.max(squeeze=True)

def get_data_sum(self, parent):
"""Return the sum of the data.
Expand Down
9 changes: 9 additions & 0 deletions cfdm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from enum import Enum

import numpy as np
from dask import config
from dask.utils import parse_bytes

_CHUNKSIZE = "128 MiB"
config.set({"array.chunk-size": _CHUNKSIZE})

"""A dictionary of useful constants.
Expand All @@ -23,11 +28,15 @@
The minimal level of seriousness for which log messages are
shown. See `cfdm.log_level`.
CHUNKSIZE: `int`
The Dask chunk size (in bytes). See `cfdm.chunksize`.
"""
CONSTANTS = {
"ATOL": sys.float_info.epsilon,
"RTOL": sys.float_info.epsilon,
"LOG_LEVEL": logging.getLevelName(logging.getLogger().level),
"CHUNKSIZE": parse_bytes(_CHUNKSIZE),
}


Expand Down
9 changes: 9 additions & 0 deletions cfdm/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@
raise ImportError(_error0 + str(error1))

_minimum_vn = "1.15"
_maximum_vn = "2.0"
_np_version = Version(np.__version__)
if not Version(_minimum_vn) <= _np_version <= Version(_maximum_vn):
raise ValueError(
"Bad numpy version: cfdm requires "
f"{_minimum_vn}<=numpy<={_maximum_vn}. "
f"Got {_np_version} at {np.__file__}"
)

if Version(np.__version__) < Version(_minimum_vn):
raise ValueError(
f"Bad numpy version: cfdm.core requires numpy>={_minimum_vn}. "
Expand Down
19 changes: 15 additions & 4 deletions cfdm/core/abstract/propertiesdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ def __data__(self):

@property
def data(self):
"""Return the data.
"""The data.
``f.data`` is equivalent to ``f.get_data()``
``f.data`` is equivalent to ``f.get_data()``.
Note that a `Data` instance is returned. Use its `array`
attribute to return the data as a `numpy` array.
Note that a `Data` instance is returned. Use the `array`
attribute to get the data as a `numpy` array.
The units, calendar and fill value properties are, if set,
inserted into the data.
Expand Down Expand Up @@ -143,6 +143,17 @@ def data(self):
"""
return self.get_data()

@data.setter
def data(self, value):
raise AttributeError(
"Can't set attribute 'data'. Use the 'set_data' method, "
"or assignment by indexing."
)

@data.deleter
def data(self):
self.del_data()

@property
def dtype(self):
"""Data-type of the data elements.
Expand Down
67 changes: 3 additions & 64 deletions cfdm/core/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ def __init__(
fill_value = None

if units is not None:
self.set_units(units)
self._set_component("units", units, copy=False)

if calendar is not None:
self.set_calendar(calendar)
self._set_component("calendar", calendar, copy=False)

if fill_value is not None:
self.set_fill_value(fill_value)
self._set_component("fill_value", fill_value, copy=False)

if _use_array and array is not None:
self._set_Array(array, copy=copy)
Expand All @@ -135,9 +135,6 @@ def __data__(self):
"""
return self

# ----------------------------------------------------------------
# Attributes
# ----------------------------------------------------------------
@property
def array(self):
"""Return an independent `numpy` array containing the data.
Expand Down Expand Up @@ -282,9 +279,6 @@ def size(self):
"""
return self._get_Array().size

# ----------------------------------------------------------------
# Methods
# ----------------------------------------------------------------
def copy(self, array=True):
"""Return a deep copy of the data.
Expand Down Expand Up @@ -365,14 +359,6 @@ def del_calendar(self, default=ValueError()):
"""
return self._del_component("calendar", default)

# try:
# return self._del_component("calendar")
# except ValueError:
# print(88888, repr(default))
# return self._default(
# default, f"{self.__class__.__name__!r} has no calendar"
# )

def del_fill_value(self, default=ValueError()):
"""Delete the fill value.
Expand Down Expand Up @@ -414,14 +400,6 @@ def del_fill_value(self, default=ValueError()):
"""
return self._del_component("fill_value", default)

# try:
# return self._del_component("fill_value")
# except ValueError:
# return self._default(
# default,
# "{!r} has no fill value".format(self.__class__.__name__),
# )

def del_units(self, default=ValueError()):
"""Delete the units.
Expand Down Expand Up @@ -460,13 +438,6 @@ def del_units(self, default=ValueError()):
"""
return self._del_component("units", default)

# try:
# return self._del_component("units")
# except ValueError:
# return self._default(
# default, "{!r} has no units".format(self.__class__.__name__)
# )

def get_calendar(self, default=ValueError()):
"""Return the calendar.
Expand Down Expand Up @@ -506,13 +477,6 @@ def get_calendar(self, default=ValueError()):
"""
return self._get_component("calendar", default)

# try:
# return self._get_component("calendar")
# except ValueError:
# return self._default(
# default, "{!r} has no calendar".format(self.__class__.__name__)
# )

def _get_Array(self, default=ValueError()):
"""Return the array object.
Expand All @@ -537,13 +501,6 @@ def _get_Array(self, default=ValueError()):
"""
return self._get_component("array", default)

# try:
# return self._get_component("array")
# except ValueError:
# return self._default(
# default, "{!r} has no array".format(self.__class__.__name__)
# )

def get_fill_value(self, default=ValueError()):
"""Return the missing data value.
Expand Down Expand Up @@ -587,14 +544,6 @@ def get_fill_value(self, default=ValueError()):
"""
return self._get_component("fill_value", default)

# try:
# return self._get_component("fill_value")
# except ValueError:
# return self._default(
# default,
# "{!r} has no fill value".format(self.__class__.__name__),
# )

def get_units(self, default=ValueError()):
"""Return the units.
Expand Down Expand Up @@ -633,13 +582,6 @@ def get_units(self, default=ValueError()):
"""
return self._get_component("units", default)

# try:
# return self._get_component("units")
# except ValueError:
# return self._default(
# default, "{!r} has no units".format(self.__class__.__name__)
# )

def has_units(self):
"""Whether units have been set.
Expand Down Expand Up @@ -834,9 +776,6 @@ def set_fill_value(self, value):
False
"""
# if value is None:
# self.del_fill_value(None)

self._set_component("fill_value", value, copy=False)

def set_units(self, value):
Expand Down
3 changes: 2 additions & 1 deletion cfdm/data/boundsfromnodesarray.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .abstract import MeshArray
from .mixin import CompressedArrayMixin
from .subarray import BoundsFromNodesSubarray


class BoundsFromNodesArray(MeshArray):
class BoundsFromNodesArray(CompressedArrayMixin, MeshArray):
"""An array of cell bounds defined by UGRID node coordinates.
The UGRID node coordinates contain the locations of the nodes of
Expand Down
3 changes: 2 additions & 1 deletion cfdm/data/cellconnectivityarray.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .abstract import MeshArray
from .mixin import CompressedArrayMixin
from .subarray import CellConnectivitySubarray


class CellConnectivityArray(MeshArray):
class CellConnectivityArray(CompressedArrayMixin, MeshArray):
"""A connectivity array derived from a UGRID connectivity variable.
A UGRID connectivity variable contains indices which map each cell
Expand Down
Loading

0 comments on commit 170ca84

Please sign in to comment.