Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
iainrussell authored Jul 19, 2024
2 parents 6f5e9ed + c9d7ef9 commit 2d06f22
Show file tree
Hide file tree
Showing 18 changed files with 221 additions and 26 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
Changelog for cfgrib
====================

0.9.13.0 (2024-06-27)
---------------------

- Allow users to pass of list of values to filter a key by.
See `#384 <https://github.com/ecmwf/cfgrib/pull/384>`_.

- Functionality to ignore keys when reading a grib file
See `#382 <https://github.com/ecmwf/cfgrib/pull/382>`_.

- Preserve coordinate encoding in cfgrib.open_datasets
See `#381 <https://github.com/ecmwf/cfgrib/pull/381>`_.

0.9.12.0 (2024-05-26)
---------------------

Expand Down
2 changes: 1 addition & 1 deletion cfgrib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.9.12.0"
__version__ = "0.9.13.0"

# cfgrib core API depends on the ECMWF ecCodes C-library only
from .abc import Field, Fieldset, Index, MappingFieldset
Expand Down
4 changes: 4 additions & 0 deletions cfgrib/cfmessage.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ def build_valid_time(time, step):
functools.partial(from_grib_date_time, date_key="indexingDate", time_key="indexingTime"),
functools.partial(to_grib_date_time, date_key="indexingDate", time_key="indexingTime"),
),
"valid_month": (
functools.partial(from_grib_date_time, date_key="monthlyVerificationDate", time_key="validityTime"),
functools.partial(to_grib_date_time, date_key="monthlyVerificationDate", time_key="validityTime"),
),
} # type: messages.ComputedKeysType


Expand Down
45 changes: 34 additions & 11 deletions cfgrib/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@
"verifying_time",
"forecastMonth",
"indexing_time",
"valid_month",
]
SPECTRA_KEYS = ["directionNumber", "frequencyNumber"]

Expand Down Expand Up @@ -251,6 +252,12 @@
"standard_name": "time",
"long_name": "time",
},
"valid_month": {
"units": "seconds since 1970-01-01T00:00:00",
"calendar": "proleptic_gregorian",
"standard_name": "time",
"long_name": "time",
},
"verifying_time": {
"units": "seconds since 1970-01-01T00:00:00",
"calendar": "proleptic_gregorian",
Expand Down Expand Up @@ -333,9 +340,9 @@ def get_values_in_order(message, shape):
class OnDiskArray:
index: abc.Index[T.Any, abc.Field]
shape: T.Tuple[int, ...]
field_id_index: T.Dict[
T.Tuple[T.Any, ...], T.List[T.Union[int, T.Tuple[int, int]]]
] = attr.attrib(repr=False)
field_id_index: T.Dict[T.Tuple[T.Any, ...], T.List[T.Union[int, T.Tuple[int, int]]]] = (
attr.attrib(repr=False)
)
missing_value: float
geo_ndim: int = attr.attrib(default=1, repr=False)
dtype = np.dtype("float32")
Expand Down Expand Up @@ -458,10 +465,7 @@ def encode_cf_first(data_var_attrs, encode_cf=("parameter", "time"), time_dims=(
if "GRIB_units" in data_var_attrs:
data_var_attrs["units"] = data_var_attrs["GRIB_units"]
if "time" in encode_cf:
if set(time_dims).issubset(ALL_REF_TIME_KEYS):
coords_map.extend(time_dims)
else:
raise ValueError("time_dims %r not a subset of %r" % (time_dims, ALL_REF_TIME_KEYS))
coords_map.extend(time_dims)
else:
coords_map.extend(DATA_TIME_KEYS)
coords_map.extend(VERTICAL_KEYS)
Expand Down Expand Up @@ -491,6 +495,7 @@ def build_variable_components(
read_keys: T.Iterable[str] = (),
time_dims: T.Sequence[str] = ("time", "step"),
extra_coords: T.Dict[str, str] = {},
coords_as_attributes: T.Dict[str, str] = {},
cache_geo_coords: bool = True,
) -> T.Tuple[T.Dict[str, int], Variable, T.Dict[str, Variable]]:
data_var_attrs = enforce_unique_attributes(index, DATA_ATTRIBUTES_KEYS, filter_by_keys)
Expand All @@ -499,8 +504,9 @@ def build_variable_components(
first = index.first()
extra_attrs = read_data_var_attrs(first, extra_keys)
data_var_attrs.update(**extra_attrs)
coords_map = encode_cf_first(data_var_attrs, encode_cf, time_dims)

coords_map = encode_cf_first(
data_var_attrs, encode_cf, time_dims,
)
coord_name_key_map = {}
coord_vars = {}
for coord_key in coords_map:
Expand All @@ -516,6 +522,9 @@ def build_variable_components(
and "GRIB_typeOfLevel" in data_var_attrs
):
coord_name = data_var_attrs["GRIB_typeOfLevel"]
if coord_name in coords_as_attributes and len(values) == 1:
data_var_attrs[f"GRIB_{coord_name}"] = values
continue
coord_name_key_map[coord_name] = coord_key
attributes = {
"long_name": "original GRIB coordinate for key: %s(%s)" % (orig_name, coord_name),
Expand Down Expand Up @@ -662,12 +671,21 @@ def build_dataset_components(
read_keys: T.Iterable[str] = (),
time_dims: T.Sequence[str] = ("time", "step"),
extra_coords: T.Dict[str, str] = {},
coords_as_attributes: T.Dict[str, str] = {},
cache_geo_coords: bool = True,
) -> T.Tuple[T.Dict[str, int], T.Dict[str, Variable], T.Dict[str, T.Any], T.Dict[str, T.Any]]:
dimensions = {} # type: T.Dict[str, int]
variables = {} # type: T.Dict[str, Variable]
filter_by_keys = index.filter_by_keys

# Warn about time_dims here to prevent repeasted messages in build_variable_components
if errors != "ignore" and not set(time_dims).issubset(ALL_REF_TIME_KEYS):
log.warning(
"Not all time_dimensions are recognised, those which are not in the following list will not "
" be decoded as datetime objects:\n"
f"{ALL_REF_TIME_KEYS}"
)

for param_id in index.get("paramId", []):
var_index = index.subindex(paramId=param_id)
try:
Expand All @@ -680,6 +698,7 @@ def build_dataset_components(
read_keys=read_keys,
time_dims=time_dims,
extra_coords=extra_coords,
coords_as_attributes=coords_as_attributes,
cache_geo_coords=cache_geo_coords,
)
except DatasetBuildError as ex:
Expand Down Expand Up @@ -752,6 +771,7 @@ def open_fieldset(
indexpath: T.Optional[str] = None,
filter_by_keys: T.Dict[str, T.Any] = {},
read_keys: T.Sequence[str] = (),
ignore_keys: T.Sequence[str] = [],
time_dims: T.Sequence[str] = ("time", "step"),
extra_coords: T.Dict[str, str] = {},
computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
Expand All @@ -763,6 +783,7 @@ def open_fieldset(
log.warning(f"indexpath value {indexpath} is ignored")

index_keys = compute_index_keys(time_dims, extra_coords, filter_by_keys)
index_keys = [key for key in index_keys if key not in ignore_keys]
index = messages.FieldsetIndex.from_fieldset(fieldset, index_keys, computed_keys)
filtered_index = index.subindex(filter_by_keys)
return open_from_index(filtered_index, read_keys, time_dims, extra_coords, **kwargs)
Expand All @@ -772,10 +793,12 @@ def open_fileindex(
stream: messages.FileStream,
indexpath: str = messages.DEFAULT_INDEXPATH,
index_keys: T.Sequence[str] = INDEX_KEYS + ["time", "step"],
ignore_keys: T.Sequence[str] = [],
filter_by_keys: T.Dict[str, T.Any] = {},
computed_keys: messages.ComputedKeysType = cfmessage.COMPUTED_KEYS,
) -> messages.FileIndex:
index_keys = sorted(set(index_keys) | set(filter_by_keys))
index_keys = [key for key in index_keys if key not in ignore_keys]
index = messages.FileIndex.from_indexpath_or_filestream(
stream, index_keys, indexpath=indexpath, computed_keys=computed_keys
)
Expand All @@ -790,12 +813,12 @@ def open_file(
read_keys: T.Sequence[str] = (),
time_dims: T.Sequence[str] = ("time", "step"),
extra_coords: T.Dict[str, str] = {},
ignore_keys: T.Sequence[str] = [],
**kwargs: T.Any,
) -> Dataset:
"""Open a GRIB file as a ``cfgrib.Dataset``."""
path = os.fspath(path)
stream = messages.FileStream(path, errors=errors)
index_keys = compute_index_keys(time_dims, extra_coords)
index = open_fileindex(stream, indexpath, index_keys, filter_by_keys=filter_by_keys)

index = open_fileindex(stream, indexpath, index_keys, ignore_keys=ignore_keys, filter_by_keys=filter_by_keys)
return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs)
5 changes: 4 additions & 1 deletion cfgrib/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,10 @@ def subindex(self, filter_by_keys={}, **query):
field_ids_index = []
for header_values, field_ids_values in self.field_ids_index:
for idx, val in raw_query:
if header_values[idx] != val:
# Ensure that the values to be tested is a list or tuple
if not isinstance(val, (list, tuple)):
val = [val]
if header_values[idx] not in val:
break
else:
field_ids_index.append((header_values, field_ids_values))
Expand Down
4 changes: 4 additions & 0 deletions cfgrib/xarray_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,24 +99,28 @@ def open_dataset(
indexpath: str = messages.DEFAULT_INDEXPATH,
filter_by_keys: T.Dict[str, T.Any] = {},
read_keys: T.Iterable[str] = (),
ignore_keys: T.Iterable[str] = (),
encode_cf: T.Sequence[str] = ("parameter", "time", "geography", "vertical"),
squeeze: bool = True,
time_dims: T.Iterable[str] = ("time", "step"),
errors: str = "warn",
extra_coords: T.Dict[str, str] = {},
coords_as_attributes: T.Dict[str, str] = {},
cache_geo_coords: bool = True,
) -> xr.Dataset:
store = CfGribDataStore(
filename_or_obj,
indexpath=indexpath,
filter_by_keys=filter_by_keys,
read_keys=read_keys,
ignore_keys=ignore_keys,
encode_cf=encode_cf,
squeeze=squeeze,
time_dims=time_dims,
lock=lock,
errors=errors,
extra_coords=extra_coords,
coords_as_attributes=coords_as_attributes,
cache_geo_coords=cache_geo_coords,
)
with xr.core.utils.close_on_error(store):
Expand Down
12 changes: 6 additions & 6 deletions ci/requirements-docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@
alabaster==0.7.12 # via sphinx
attrs==19.3.0
babel==2.9.1 # via sphinx
certifi==2023.7.22 # via requests
certifi==2024.07.04 # via requests
cffi==1.14.0
chardet==3.0.4 # via requests
click==7.1.2
docutils==0.16 # via sphinx
idna==2.9 # via requests
idna==3.7 # via requests
imagesize==1.2.0 # via sphinx
jinja2==2.11.3 # via sphinx
jinja2==3.1.4 # via sphinx
markupsafe==1.1.1 # via jinja2
numpy==1.22.0
packaging==20.3 # via sphinx
pandas==1.0.3 # via xarray
pycparser==2.20 # via cffi
pygments==2.7.4 # via sphinx
pygments==2.15.0 # via sphinx
pyparsing==2.4.7 # via packaging
pytest-runner==5.2
python-dateutil==2.8.1 # via pandas
pytz==2020.1 # via babel, pandas
requests==2.31.0 # via sphinx
requests==2.32.0 # via sphinx
six==1.14.0 # via packaging, python-dateutil
snowballstemmer==2.0.0 # via sphinx
sphinx==3.0.3
Expand All @@ -35,5 +35,5 @@ sphinxcontrib-htmlhelp==1.0.3 # via sphinx
sphinxcontrib-jsmath==1.0.1 # via sphinx
sphinxcontrib-qthelp==1.0.3 # via sphinx
sphinxcontrib-serializinghtml==1.1.4 # via sphinx
urllib3==1.26.5 # via requests
urllib3==1.26.19 # via requests
xarray==0.15.1
5 changes: 2 additions & 3 deletions ci/requirements-tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ packaging==20.3 # via pytest
pandas==1.0.3 # via xarray
pep8==1.7.1 # via pytest-pep8
pluggy==0.13.1 # via pytest
py==1.10.0 # via pytest
pycparser==2.20 # via cffi
pyflakes==2.2.0 # via pytest-flakes
pyparsing==2.4.7 # via packaging
Expand All @@ -29,12 +28,12 @@ pytest-flakes==4.0.0
pytest-mccabe==1.0
pytest-pep8==1.0.6
pytest-runner==5.2
pytest==5.4.2
pytest==7.2.0
python-dateutil==2.8.1 # via pandas
pytz==2020.1 # via pandas
scipy==1.8.0
six==1.14.0 # via packaging, python-dateutil
toolz==0.10.0 # via dask
wcwidth==0.1.9 # via pytest
xarray==0.15.1
zipp==3.1.0 # via importlib-metadata
zipp==3.19.1 # via importlib-metadata
2 changes: 1 addition & 1 deletion tests/environment-macos-3.8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ dependencies:
- python_abi=3.8
- pytz=2023.3
- readline=8.2
- requests=2.31.0
- requests=2.32.0
- scipy=1.10.1
- setuptools=68.0.0
- six=1.16.0
Expand Down
2 changes: 1 addition & 1 deletion tests/environment-ubuntu-3.7.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dependencies:
- pip=21.2.2
- pluggy=1.0.0
- psutil=5.8.0
- py=1.10.0
- py=1.11.0
- pycparser=2.21
- pyparsing=3.0.4
- pytest=6.2.5
Expand Down
2 changes: 1 addition & 1 deletion tests/environment-ubuntu-3.8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ dependencies:
- python_abi=3.8
- pytz=2023.3
- readline=8.2
- requests=2.31.0
- requests=2.32.0
- s2n=1.3.48
- scipy=1.10.1
- setuptools=68.0.0
Expand Down
2 changes: 1 addition & 1 deletion tests/environment-windows-3.8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies:
- pip=21.2.2
- pluggy=0.13.1
- psutil=5.8.0
- py=1.10.0
- py=1.11.0
- pycparser=2.21
- pyparsing=3.0.4
- pytest=6.2.4
Expand Down
Binary file added tests/sample-data/cams-egg4-monthly.grib
Binary file not shown.
Binary file added tests/sample-data/era5-levels-members.nc
Binary file not shown.
Binary file added tests/sample-data/soil-surface-level-mix.grib
Binary file not shown.
Loading

0 comments on commit 2d06f22

Please sign in to comment.