You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
and I ran into the issue that the group.create_datasethere cannot set new data to z["time"] becaues zarr expects another shape: Exactly the shape that already exists in the parquet store out.
ValueError Traceback (most recent call last)
Cell In[21], line 12
1 MultiZarrToZarr.append(
2 dflist[11:13],
3 out,
4 coo_map={"time":"cf:time"},
5 concat_dims=['time'],
6 identical_dims=["lat","lon","plev","value"],
7 preprocess=None,
8 remote_protocol="file",
9 #remote_options=dict(lazy=True),
10 #coo_dtypes={"time": "int64"},
11 #postprocess=postprocess
---> 12 ).translate()
File ~/kerchunk/kerchunk/combine.py:604, in MultiZarrToZarr.translate(self, filename, storage_options)
602 self.first_pass()
603 if 2 not in self.done:
--> 604 self.store_coords()
605 if 3 not in self.done:
606 self.second_pass()
File ~/kerchunk/kerchunk/combine.py:417, in MultiZarrToZarr.store_coords(self)
413 elif k in z:
414 # Fall back to existing fill value
415 kw["fill_value"] = z[k].fill_value
--> 417 arr = group.create_dataset(
418 name=k,
419 data=data,
420 overwrite=True,
421 compressor=compression,
422 dtype=self.coo_dtypes.get(k, data.dtype),
423 **kw,
424 )
425 if k in z:
426 # copy attributes if values came from an original variable
427 arr.attrs.update(z[k].attrs)
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/hierarchy.py:1094, in Group.create_dataset(self, name, **kwargs)
1035 """Create an array.
1036
1037 Arrays are known as "datasets" in HDF5 terminology. For compatibility
(...)
1090
1091 """
1092 assert "mode" not in kwargs
-> 1094 return self._write_op(self._create_dataset_nosync, name, **kwargs)
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/hierarchy.py:935, in Group._write_op(self, f, *args, **kwargs)
932 lock = self._synchronizer[group_meta_key]
934 with lock:
--> 935 return f(*args, **kwargs)
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/hierarchy.py:1110, in Group._create_dataset_nosync(self, name, data, **kwargs)
1107 a = create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs)
1109 else:
-> 1110 a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs)
1112 return a
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/creation.py:439, in array(data, **kwargs)
436 z = create(**kwargs)
438 # fill with data
--> 439 z[...] = data
441 # set read_only property afterwards
442 z.read_only = read_only
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/core.py:1497, in Array.__setitem__(self, selection, value)
1495 self.set_orthogonal_selection(pure_selection, value, fields=fields)
1496 else:
-> 1497 self.set_basic_selection(pure_selection, value, fields=fields)
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/core.py:1593, in Array.set_basic_selection(self, selection, value, fields)
1591 return self._set_basic_selection_zd(selection, value, fields=fields)
1592 else:
-> 1593 return self._set_basic_selection_nd(selection, value, fields=fields)
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/core.py:1983, in Array._set_basic_selection_nd(self, selection, value, fields)
1977 def _set_basic_selection_nd(self, selection, value, fields=None):
1978 # implementation of __setitem__ for array with at least one dimension
1979
1980 # setup indexer
1981 indexer = BasicIndexer(selection, self)
-> 1983 self._set_selection(indexer, value, fields=fields)
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/core.py:2011, in Array._set_selection(self, indexer, value, fields)
2009 if not hasattr(value, "shape"):
2010 value = np.asanyarray(value, like=self._meta_array)
-> 2011 check_array_shape("value", value, sel_shape)
2013 # iterate over chunks in range
2014 if (
2015 not hasattr(self.chunk_store, "setitems")
2016 or self._synchronizer is not None
2017 or any(map(lambda x: x == 0, self.shape))
2018 ):
2019 # iterative approach
File /work/bm0021/conda-envs/xesmf/lib/python3.10/site-packages/zarr/util.py:561, in check_array_shape(param, array, shape)
557 raise TypeError(
558 "parameter {!r}: expected an array-like object, got {!r}".format(param, type(array))
559 )
560 if array.shape != shape:
--> 561 raise ValueError(
562 "parameter {!r}: expected array with shape {!r}, got {!r}".format(
563 param, shape, array.shape
564 )
565 )
ValueError: parameter 'value': expected array with shape (87665,), got (105209,)
I was trying to find a work around but I do not know how to prevent zarr from checking this.
The text was updated successfully, but these errors were encountered:
overwrite=True is supposed to deal with this. Can you please show the set of arguments that create_dataset is being called with? Perhaps something got left behind in kwargs.
Hi,
I am trying to append json references to a parquet store with
and I ran into the issue that the
group.create_dataset
here cannot set new data to z["time"] becaues zarr expects another shape: Exactly the shape that already exists in the parquet storeout
.I was trying to find a work around but I do not know how to prevent zarr from checking this.
The text was updated successfully, but these errors were encountered: