From e050dbbad65ea0546b7446f8ebf7de6b6044d600 Mon Sep 17 00:00:00 2001 From: Jan Kleine Date: Fri, 13 Oct 2023 09:10:33 +0000 Subject: [PATCH 1/6] Add tensor storage format abstraction Format abstraction is based on [https://doi.org/10.1145/3276493]. --- dace/data.py | 651 +++++++++++++++++++++++++++++++++ tests/sdfg/data/tensor_test.py | 129 +++++++ 2 files changed, 780 insertions(+) create mode 100644 tests/sdfg/data/tensor_test.py diff --git a/dace/data.py b/dace/data.py index 0a9858458b..5c2a67214b 100644 --- a/dace/data.py +++ b/dace/data.py @@ -1,8 +1,10 @@ # Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +import aenum import copy as cp import ctypes import functools +from abc import ABC, abstractmethod from collections import OrderedDict from numbers import Number from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union @@ -482,6 +484,655 @@ def __getitem__(self, s): if isinstance(s, list) or isinstance(s, tuple): return StructArray(self, tuple(s)) return StructArray(self, (s, )) + + +class TensorIterationTypes(aenum.AutoNumberEnum): + """ + Types of tensor iteration capabilities. + + Value (Coordinate Value Iteration) allows to directly iterate over + coordinates such as when using the Dense index type. + + Position (Coordinate Position Iteratation) iterates over coordinate + positions, at which the actual coordinates lie. This is for example the case + with a compressed index, in which the pos array enables one to iterate over + the positions in the crd array that hold the actual coordinates. + """ + Value = () + Position = () + + +class TensorAssemblyType(aenum.AutoNumberEnum): + """ + Types of possible assembly strategies for the individual indices. + + NoAssembly: Assembly is not possible as such. + + Insert: index allows inserting elements at random (e.g. Dense) + + Append: index allows appending to a list of existing coordinates. Depending + on append order, this affects whether the index is ordered or not. This + could be changed by sorting the index after assembly + """ + NoAssembly = () + Insert = () + Append = () + + +class TensorIndex(ABC): + """ + Abstract base class for tensor index implementations. + """ + + @property + @abstractmethod + def iteration_type(self) -> TensorIterationTypes: + """ + Iteration capability supported by this index. + + See TensorIterationTypes for reference. + """ + pass + + @property + @abstractmethod + def locate(self) -> bool: + """ + True if the index supports locate (aka random access), False otw. + """ + pass + + @property + @abstractmethod + def assembly(self) -> TensorAssemblyType: + """ + What assembly type is supported by the index. + + See TensorAssemblyType for reference. + """ + pass + + @property + @abstractmethod + def full(self) -> bool: + """ + True if the level is full, False otw. + + A level is considered full if it encompasses all valid coordinates along + the corresponding tensor dimension. + """ + pass + + @property + @abstractmethod + def ordered(self) -> bool: + """ + True if the level is ordered, False otw. + + A level is ordered when all coordinates that share the same ancestor are + ordered by increasing value (e.g. in typical CSR). + """ + pass + + @property + @abstractmethod + def unique(sefl) -> bool: + """ + True if coordinate in the level are unique, False otw. + + A level is considered unique if no collection of coordinates that share + the same ancestor contains duplicates. In CSR this is True, in COO it is + not. + """ + pass + + @property + @abstractmethod + def branchless(sefl) -> bool: + """ + True if the level doesn't branch, false otw. + + A level is considered branchless if no coordinate has a sibling (another + coordinate with same ancestor) and all coordinates in parent level have + a child. In other words if there is a bijection between the coordinates + in this level and the parent level. An example of the is the Singelton + index level in the COO format. + """ + pass + + @property + @abstractmethod + def compact(sefl) -> bool: + """ + True if the level is compact, false otw. + + A level is compact if no two coordinates are separated by an unlabled + node that does not encode a coordinate. An example of a compact level + can be found in CSR, while the DIA formats range and offset levels are + not compact (they have entries that would coorespond to entries outside + the tensors index range, e.g. column -1). + """ + pass + + @abstractmethod + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: + """ + Generates the fields needed for the index. + + :returns: an OrderedDict of fields that need to be present in the struct + """ + pass + + +class Dense(TensorIndex): + """ + Dense tensor index. + + Levels of this type encode the the coordinate in the interval [0, N), where + N is the size of the corresponding dimension. This level doesn't need any + index structure beyond the corresponding dimension size. + """ + + _ordered: bool + _unique: bool + + @property + def iteration_type(self) -> TensorIterationTypes: + return TensorIterationTypes.Value + + @property + def locate(self) -> bool: + return True + + @property + def assembly(self) -> TensorAssemblyType: + return TensorAssemblyType.Insert + + @property + def full(self) -> bool: + return True + + @property + def ordered(self) -> bool: + return self._ordered + + @property + def unique(self) -> bool: + return self._unique + + @property + def branchless(sefl) -> bool: + return False + + @property + def compact(sefl) -> bool: + return True + + def __init__(self, ordered: bool = True, unique: bool = True): + self._ordered = ordered + self._unique = unique + + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: + return {} + + def __repr__(self) -> str: + s = "Dense" + + non_defaults = [] + if not self._ordered: + non_defaults.append("¬O") + if not self._unique: + non_defaults.append("¬U") + + if len(non_defaults) > 0: + s += f"({','.join(non_defaults)})" + + return s + + +class Compressed(TensorIndex): + """ + Tensor level that stores coordinates in segmented array. + + Levels of this type are compressed using a segented array. The pos array + holds the start and end positions of the segment in the crd (coordinate) + array that holds the child coordinates corresponding the parent. + """ + + _full: bool + _ordered: bool + _unique: bool + + @property + def iteration_type(self) -> TensorIterationTypes: + return TensorIterationTypes.Position + + @property + def locate(self) -> bool: + return False + + @property + def assembly(self) -> TensorAssemblyType: + return TensorAssemblyType.Append + + @property + def full(self) -> bool: + return self._full + + @property + def ordered(self) -> bool: + return self._ordered + + @property + def unique(self) -> bool: + return self._unique + + @property + def branchless(sefl) -> bool: + return False + + @property + def compact(sefl) -> bool: + return True + + def __init__(self, + full: bool = False, + ordered: bool = True, + unique: bool = True): + self._full = full + self._ordered = ordered + self._unique = unique + + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: + return { + f"idx{lvl}_pos": dtypes.int32[dummy_symbol], # TODO (later) choose better length + f"idx{lvl}_crd": dtypes.int32[dummy_symbol], # TODO (later) choose better length + } + + def __repr__(self) -> str: + s = "Compressed" + + non_defaults = [] + if self._full: + non_defaults.append("F") + if not self._ordered: + non_defaults.append("¬O") + if not self._unique: + non_defaults.append("¬U") + + if len(non_defaults) > 0: + s += f"({','.join(non_defaults)})" + + return s + + +class Singelton(TensorIndex): + """ + Tensor index that encodes a single coordinate per parent coordinate. + + Levels of this type hold exactly one coordinate for every coordinate in the + parent level. An example can be seen in the COO format, where every + coordinate but the first is encoded in this manner. + """ + + _full: bool + _ordered: bool + _unique: bool + + @property + def iteration_type(self) -> TensorIterationTypes: + return TensorIterationTypes.Position + + @property + def locate(self) -> bool: + return False + + @property + def assembly(self) -> TensorAssemblyType: + return TensorAssemblyType.Append + + @property + def full(self) -> bool: + return self._full + + @property + def ordered(self) -> bool: + return self._ordered + + @property + def unique(self) -> bool: + return self._unique + + @property + def branchless(sefl) -> bool: + return True + + @property + def compact(sefl) -> bool: + return True + + def __init__(self, + full: bool = False, + ordered: bool = True, + unique: bool = True): + self._full = full + self._ordered = ordered + self._unique = unique + + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: + return { + f"idx{lvl}_crd": dtypes.int32[dummy_symbol], # TODO (later) choose better length + } + + def __repr__(self) -> str: + s = "Singelton" + + non_defaults = [] + if self._full: + non_defaults.append("F") + if not self._ordered: + non_defaults.append("¬O") + if not self._unique: + non_defaults.append("¬U") + + if len(non_defaults) > 0: + s += f"({','.join(non_defaults)})" + + return s + + +class Range(TensorIndex): + """ + Tensor index that encodes a interval of coordinates for every parent. + + The interval is computed from an offset for each parent together with the + tensor dimension size of this level (M) and the parent level (N) parents + corresponding tensor. Given the parent coordinate i, the level encodes the + range of coordinates between max(0, -offset[i]) and min(N, M - offset[i]). + """ + + _ordered: bool + _unique: bool + + @property + def iteration_type(self) -> TensorIterationTypes: + return TensorIterationTypes.Value + + @property + def locate(self) -> bool: + return False + + @property + def assembly(self) -> TensorAssemblyType: + return TensorAssemblyType.NoAssembly + + @property + def full(self) -> bool: + return False + + @property + def ordered(self) -> bool: + return self._ordered + + @property + def unique(self) -> bool: + return self._unique + + @property + def branchless(sefl) -> bool: + return False + + @property + def compact(sefl) -> bool: + return False + + def __init__(self, ordered: bool = True, unique: bool = True): + self._ordered = ordered + self._unique = unique + + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: + return { + f"idx{lvl}_offset": dtypes.int32[dummy_symbol], # TODO (later) choose better length + } + + def __repr__(self) -> str: + s = "Range" + + non_defaults = [] + if not self._ordered: + non_defaults.append("¬O") + if not self._unique: + non_defaults.append("¬U") + + if len(non_defaults) > 0: + s += f"({','.join(non_defaults)})" + + return s + + +class Offset(TensorIndex): + """ + Tensor index that encodes the next coordinates as offset from parent. + + Given a parent coordinate i and an offset index k, the level encodes the + coordinate j = i + offset[k]. + """ + + _ordered: bool + _unique: bool + + @property + def iteration_type(self) -> TensorIterationTypes: + return TensorIterationTypes.Position + + @property + def locate(self) -> bool: + return False + + @property + def assembly(self) -> TensorAssemblyType: + return TensorAssemblyType.NoAssembly + + @property + def full(self) -> bool: + return False + + @property + def ordered(self) -> bool: + return self._ordered + + @property + def unique(self) -> bool: + return self._unique + + @property + def branchless(sefl) -> bool: + return True + + @property + def compact(sefl) -> bool: + return False + + def __init__(self, ordered: bool = True, unique: bool = True): + self._ordered = ordered + self._unique = unique + + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: + return { + f"idx{lvl}_offset": dtypes.int32[dummy_symbol], # TODO (later) choose better length + } + + def __repr__(self) -> str: + s = "Offset" + + non_defaults = [] + if not self._ordered: + non_defaults.append("¬O") + if not self._unique: + non_defaults.append("¬U") + + if len(non_defaults) > 0: + s += f"({','.join(non_defaults)})" + + return s + + +@make_properties +class Tensor(Structure): + """ + Abstraction for Tensor storage format. + + This abstraction is based on [https://doi.org/10.1145/3276493]. + """ + + value_dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses) + tensor_shape = ShapeProperty(default=[]) + indices = ListProperty(element_type=Union[Tuple[TensorIndex, int], Tuple[TensorIndex, symbolic.SymExpr]]) + value_count = SymbolicProperty + + def __init__( + self, + value_dtype: dtypes.Typeclasses, + tensor_shape, + indices: List[Union[Tuple[TensorIndex, int], Tuple[TensorIndex, symbolic.SymExpr]]], + value_count: symbolic.SymExpr, + name: str, + transient: bool = False, + storage: dtypes.StorageType = dtypes.StorageType.Default, + location: Dict[str, str] = None, + lifetime: dtypes.AllocationLifetime = dtypes.AllocationLifetime.Scope, + debuginfo: dtypes.DebugInfo = None): + """ + Constructor for Tensor storage format. + + Below are examples of common matrix storage formats: + + .. code-block:: python + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + + csr = dace.data.Tensor( + dace.float32, + (M, N), + [(dace.data.Dense(), 0), (dace.data.Compressed(), 1)], + nnz, + "CSR_Matrix", + ) + + csc = dace.data.Tensor( + dace.float32, + (M, N), + [(dace.data.Dense(), 1), (dace.data.Compressed(), 0)], + nnz, + "CSC_Matrix", + ) + + coo = dace.data.Tensor( + dace.float32, + (M, N), + [ + (dace.data.Compressed(unique=False), 0), + (dace.data.Singelton(), 1), + ], + nnz, + "CSC_Matrix", + ) + + num_diags = dace.symbol('num_diags') # number of diagonals stored + + diag = dace.data.Tensor( + dace.float32, + (M, N), + [ + (dace.data.Dense(), num_diags), + (dace.data.Range(), 0), + (dace.data.Offset(), 1), + ], + nnz, + "DIA_Matrix", + ) + + Below you can find examples of common 3rd order tensor storage formats: + + .. code-block:: python + + I, J, K, nnz = (dace.symbol(s) for s in ('I', 'J', 'K', 'nnz')) + + coo = dace.data.Tensor( + dace.float32, + (I, J, K), + [ + (dace.data.Compressed(unique=False), 0), + (dace.data.Singelton(unique=False), 1), + (dace.data.Singelton(), 2), + ], + nnz, + "COO_3D_Tensor", + ) + + csf = dace.data.Tensor( + dace.float32, + (I, J, K), + [ + (dace.data.Compressed(), 0), + (dace.data.Compressed(), 1), + (dace.data.Compressed(), 2), + ], + nnz, + "CSF_3D_Tensor", + ) + + :param value_type: data type of the explicitly stored values. + :param tensor_shape: logical shape of tensor (#rows, #cols, etc...) + :param indices: + a list of tuples, each tuple represents a level in the tensor + storage hirachy, specifying the levels tensor index type, and the + corresponding dimension this level encodes (as index of the + tensor_shape tuple above). The order of the dimensions may differ + from the logical shape of the tensor, e.g. as seen in the CSC + format. If an index's dimension is unrelated to the tensor shape + (e.g. in diagonal format where the first index's dimension is the + number of diagonals stored), a symbol can be specified instead. + :param value_count: number of explicitly stored values. + :param name: name of resulting struct. + :param others: See Structure class for remaining arguments + """ + + self.value_dtype = value_dtype + self.tensor_shape = tensor_shape + self.indices = indices + self.value_count = value_count + + num_dims = len(tensor_shape) + dimension_order = [idx[1] for idx in indices if isinstance(idx[1], int)] + + # all tensor dimensions must occure exactly once in indices + if not sorted(dimension_order) == list(range(num_dims)): + raise TypeError(( + f"All tensor dimensions must be refferenced exactly once in " + f"tensor indices. (referenced dimensions: {dimension_order}; " + f"tensor dimensions: {list(range(num_dims))})" + )) + + # assembling permanent and index specific fields + fields = dict( + order=Scalar(dtypes.int32), + dim_sizes=dtypes.int32[num_dims], + value_count=value_count, + values=dtypes.float32[value_count], + ) + + indices_pure = [idx[0] for idx in indices]; + for (lvl, index) in enumerate(indices_pure): + fields |= index.fields(lvl, value_count) + + abbreviation = ''.join(str(idx)[0] for idx in indices_pure) + + super(Tensor, self).__init__(fields, name, transient, storage, location, + lifetime, debuginfo) + + def __repr__(self): + return f"{self.name} (dtype: {self.value_dtype}, shape: {list(self.tensor_shape)}, indices: {self.indices})" @make_properties diff --git a/tests/sdfg/data/tensor_test.py b/tests/sdfg/data/tensor_test.py new file mode 100644 index 0000000000..6057441b3e --- /dev/null +++ b/tests/sdfg/data/tensor_test.py @@ -0,0 +1,129 @@ +# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved. +import dace +import numpy as np +import pytest + +from scipy import sparse + + +def test_read_csr_tensor(): + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + csr_obj = dace.data.Tensor( + dace.float32, + (M, N), + [(dace.data.Dense(), 0), (dace.data.Compressed(), 1)], + nnz, + "CSR_Tensor") + + sdfg = dace.SDFG('tensor_csr_to_dense') + + sdfg.add_datadesc('A', csr_obj) + sdfg.add_array('B', [M, N], dace.float32) + + sdfg.add_view('vindptr', csr_obj.members['idx1_pos'].shape, csr_obj.members['idx1_pos'].dtype) + sdfg.add_view('vindices', csr_obj.members['idx1_crd'].shape, csr_obj.members['idx1_crd'].dtype) + sdfg.add_view('vdata', csr_obj.members['values'].shape, csr_obj.members['values'].dtype) + + state = sdfg.add_state() + + A = state.add_access('A') + B = state.add_access('B') + + indptr = state.add_access('vindptr') + indices = state.add_access('vindices') + data = state.add_access('vdata') + + state.add_edge(A, None, indptr, 'views', dace.Memlet.from_array('A.idx1_pos', csr_obj.members['idx1_pos'])) + state.add_edge(A, None, indices, 'views', dace.Memlet.from_array('A.idx1_crd', csr_obj.members['idx1_crd'])) + state.add_edge(A, None, data, 'views', dace.Memlet.from_array('A.values', csr_obj.members['values'])) + + ime, imx = state.add_map('i', dict(i='0:M')) + jme, jmx = state.add_map('idx', dict(idx='start:stop')) + jme.add_in_connector('start') + jme.add_in_connector('stop') + t = state.add_tasklet('indirection', {'j', '__val'}, {'__out'}, '__out[i, j] = __val') + + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i'), dst_conn='start') + state.add_memlet_path(indptr, ime, jme, memlet=dace.Memlet(data='vindptr', subset='i+1'), dst_conn='stop') + state.add_memlet_path(indices, ime, jme, t, memlet=dace.Memlet(data='vindices', subset='idx'), dst_conn='j') + state.add_memlet_path(data, ime, jme, t, memlet=dace.Memlet(data='vdata', subset='idx'), dst_conn='__val') + state.add_memlet_path(t, jmx, imx, B, memlet=dace.Memlet(data='B', subset='0:M, 0:N', volume=1), src_conn='__out') + + func = sdfg.compile() + + rng = np.random.default_rng(42) + A = sparse.random(20, 20, density=0.1, format='csr', dtype=np.float32, random_state=rng) + B = np.zeros((20, 20), dtype=np.float32) + + inpA = csr_obj.dtype._typeclass.as_ctypes()(idx1_pos=A.indptr.__array_interface__['data'][0], + idx1_crd=A.indices.__array_interface__['data'][0], + values=A.data.__array_interface__['data'][0]) + + func(A=inpA, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz) + ref = A.toarray() + + assert np.allclose(B, ref) + + +def test_csr_fields(): + + M, N, nnz = (dace.symbol(s) for s in ('M', 'N', 'nnz')) + + csr = dace.data.Tensor( + dace.float32, + (M, N), + [(dace.data.Dense(), 0), (dace.data.Compressed(), 1)], + nnz, + "CSR_Matrix", + ) + + expected_fields = ["idx1_pos", "idx1_crd"] + assert all(key in csr.members.keys() for key in expected_fields) + + +def test_dia_fields(): + + M, N, nnz, num_diags = (dace.symbol(s) for s in ('M', 'N', 'nnz', 'num_diags')) + + diag = dace.data.Tensor( + dace.float32, + (M, N), + [ + (dace.data.Dense(), num_diags), + (dace.data.Range(), 0), + (dace.data.Offset(), 1), + ], + nnz, + "DIA_Matrix", + ) + + expected_fields = ["idx1_offset", "idx2_offset"] + assert all(key in diag.members.keys() for key in expected_fields) + + +def test_coo_fields(): + + I, J, K, nnz = (dace.symbol(s) for s in ('I', 'J', 'K', 'nnz')) + + coo = dace.data.Tensor( + dace.float32, + (I, J, K), + [ + (dace.data.Compressed(unique=False), 0), + (dace.data.Singelton(unique=False), 1), + (dace.data.Singelton(), 2), + ], + nnz, + "COO_3D_Tensor", + ) + + expected_fields = ["idx0_pos", "idx0_crd", "idx1_crd", "idx2_crd"] + assert all(key in coo.members.keys() for key in expected_fields) + + +if __name__ == "__main__": + test_read_csr_tensor() + test_csr_fields() + test_dia_fields() + test_coo_fields() From ee5946bbb6e02e5da95c3405dd6190414189e91c Mon Sep 17 00:00:00 2001 From: Jan Kleine Date: Thu, 19 Oct 2023 08:14:35 +0000 Subject: [PATCH 2/6] Fix type signature from OrderedDict to Dict --- dace/data.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dace/data.py b/dace/data.py index 5c2a67214b..4bef5a9218 100644 --- a/dace/data.py +++ b/dace/data.py @@ -615,7 +615,7 @@ def compact(sefl) -> bool: pass @abstractmethod - def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: """ Generates the fields needed for the index. @@ -672,8 +672,8 @@ def __init__(self, ordered: bool = True, unique: bool = True): self._ordered = ordered self._unique = unique - def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: - return {} + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: + return OrderedDict() def __repr__(self) -> str: s = "Dense" @@ -743,11 +743,11 @@ def __init__(self, self._ordered = ordered self._unique = unique - def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: - return { + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: + return OrderedDict({ f"idx{lvl}_pos": dtypes.int32[dummy_symbol], # TODO (later) choose better length f"idx{lvl}_crd": dtypes.int32[dummy_symbol], # TODO (later) choose better length - } + }) def __repr__(self) -> str: s = "Compressed" @@ -819,10 +819,10 @@ def __init__(self, self._ordered = ordered self._unique = unique - def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: - return { + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: + return OrderedDict({ f"idx{lvl}_crd": dtypes.int32[dummy_symbol], # TODO (later) choose better length - } + }) def __repr__(self) -> str: s = "Singelton" @@ -890,10 +890,10 @@ def __init__(self, ordered: bool = True, unique: bool = True): self._ordered = ordered self._unique = unique - def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: - return { + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: + return OrderedDict({ f"idx{lvl}_offset": dtypes.int32[dummy_symbol], # TODO (later) choose better length - } + }) def __repr__(self) -> str: s = "Range" @@ -957,10 +957,10 @@ def __init__(self, ordered: bool = True, unique: bool = True): self._ordered = ordered self._unique = unique - def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> OrderedDict[str, Data]: - return { + def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: + return OrderedDict({ f"idx{lvl}_offset": dtypes.int32[dummy_symbol], # TODO (later) choose better length - } + }) def __repr__(self) -> str: s = "Offset" From a4b94bbe2750f864493b4259c2944e93942d6cba Mon Sep 17 00:00:00 2001 From: Jan Kleine Date: Thu, 19 Oct 2023 08:17:26 +0000 Subject: [PATCH 3/6] Fix typos sefl and Singelton --- dace/data.py | 38 +++++++++++++++++----------------- tests/sdfg/data/tensor_test.py | 4 ++-- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/dace/data.py b/dace/data.py index 4bef5a9218..77a184cd12 100644 --- a/dace/data.py +++ b/dace/data.py @@ -576,7 +576,7 @@ def ordered(self) -> bool: @property @abstractmethod - def unique(sefl) -> bool: + def unique(self) -> bool: """ True if coordinate in the level are unique, False otw. @@ -588,21 +588,21 @@ def unique(sefl) -> bool: @property @abstractmethod - def branchless(sefl) -> bool: + def branchless(self) -> bool: """ True if the level doesn't branch, false otw. A level is considered branchless if no coordinate has a sibling (another coordinate with same ancestor) and all coordinates in parent level have a child. In other words if there is a bijection between the coordinates - in this level and the parent level. An example of the is the Singelton + in this level and the parent level. An example of the is the Singleton index level in the COO format. """ pass @property @abstractmethod - def compact(sefl) -> bool: + def compact(self) -> bool: """ True if the level is compact, false otw. @@ -661,11 +661,11 @@ def unique(self) -> bool: return self._unique @property - def branchless(sefl) -> bool: + def branchless(self) -> bool: return False @property - def compact(sefl) -> bool: + def compact(self) -> bool: return True def __init__(self, ordered: bool = True, unique: bool = True): @@ -728,11 +728,11 @@ def unique(self) -> bool: return self._unique @property - def branchless(sefl) -> bool: + def branchless(self) -> bool: return False @property - def compact(sefl) -> bool: + def compact(self) -> bool: return True def __init__(self, @@ -766,7 +766,7 @@ def __repr__(self) -> str: return s -class Singelton(TensorIndex): +class Singleton(TensorIndex): """ Tensor index that encodes a single coordinate per parent coordinate. @@ -804,11 +804,11 @@ def unique(self) -> bool: return self._unique @property - def branchless(sefl) -> bool: + def branchless(self) -> bool: return True @property - def compact(sefl) -> bool: + def compact(self) -> bool: return True def __init__(self, @@ -825,7 +825,7 @@ def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: }) def __repr__(self) -> str: - s = "Singelton" + s = "Singleton" non_defaults = [] if self._full: @@ -879,11 +879,11 @@ def unique(self) -> bool: return self._unique @property - def branchless(sefl) -> bool: + def branchless(self) -> bool: return False @property - def compact(sefl) -> bool: + def compact(self) -> bool: return False def __init__(self, ordered: bool = True, unique: bool = True): @@ -946,11 +946,11 @@ def unique(self) -> bool: return self._unique @property - def branchless(sefl) -> bool: + def branchless(self) -> bool: return True @property - def compact(sefl) -> bool: + def compact(self) -> bool: return False def __init__(self, ordered: bool = True, unique: bool = True): @@ -1032,7 +1032,7 @@ def __init__( (M, N), [ (dace.data.Compressed(unique=False), 0), - (dace.data.Singelton(), 1), + (dace.data.Singleton(), 1), ], nnz, "CSC_Matrix", @@ -1063,8 +1063,8 @@ def __init__( (I, J, K), [ (dace.data.Compressed(unique=False), 0), - (dace.data.Singelton(unique=False), 1), - (dace.data.Singelton(), 2), + (dace.data.Singleton(unique=False), 1), + (dace.data.Singleton(), 2), ], nnz, "COO_3D_Tensor", diff --git a/tests/sdfg/data/tensor_test.py b/tests/sdfg/data/tensor_test.py index 6057441b3e..e6e564bde7 100644 --- a/tests/sdfg/data/tensor_test.py +++ b/tests/sdfg/data/tensor_test.py @@ -111,8 +111,8 @@ def test_coo_fields(): (I, J, K), [ (dace.data.Compressed(unique=False), 0), - (dace.data.Singelton(unique=False), 1), - (dace.data.Singelton(), 2), + (dace.data.Singleton(unique=False), 1), + (dace.data.Singleton(), 2), ], nnz, "COO_3D_Tensor", From 0b696cfa783844b338fb7e08e7f9e1b10a86fe71 Mon Sep 17 00:00:00 2001 From: Jan Kleine Date: Thu, 19 Oct 2023 09:27:50 +0000 Subject: [PATCH 4/6] Remove OrderedDict in favor of Dict --- dace/data.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dace/data.py b/dace/data.py index 77a184cd12..ee9d841eaf 100644 --- a/dace/data.py +++ b/dace/data.py @@ -619,7 +619,7 @@ def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: """ Generates the fields needed for the index. - :returns: an OrderedDict of fields that need to be present in the struct + :returns: a Dict of fields that need to be present in the struct """ pass @@ -673,7 +673,7 @@ def __init__(self, ordered: bool = True, unique: bool = True): self._unique = unique def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: - return OrderedDict() + return {} def __repr__(self) -> str: s = "Dense" @@ -744,10 +744,10 @@ def __init__(self, self._unique = unique def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: - return OrderedDict({ + return { f"idx{lvl}_pos": dtypes.int32[dummy_symbol], # TODO (later) choose better length f"idx{lvl}_crd": dtypes.int32[dummy_symbol], # TODO (later) choose better length - }) + } def __repr__(self) -> str: s = "Compressed" @@ -820,9 +820,9 @@ def __init__(self, self._unique = unique def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: - return OrderedDict({ + return { f"idx{lvl}_crd": dtypes.int32[dummy_symbol], # TODO (later) choose better length - }) + } def __repr__(self) -> str: s = "Singleton" @@ -891,9 +891,9 @@ def __init__(self, ordered: bool = True, unique: bool = True): self._unique = unique def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: - return OrderedDict({ + return { f"idx{lvl}_offset": dtypes.int32[dummy_symbol], # TODO (later) choose better length - }) + } def __repr__(self) -> str: s = "Range" @@ -958,9 +958,9 @@ def __init__(self, ordered: bool = True, unique: bool = True): self._unique = unique def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: - return OrderedDict({ + return { f"idx{lvl}_offset": dtypes.int32[dummy_symbol], # TODO (later) choose better length - }) + } def __repr__(self) -> str: s = "Offset" From 33aa5b3b2b70fa7827a428ad3dc990764e85a47f Mon Sep 17 00:00:00 2001 From: Jan Kleine Date: Thu, 19 Oct 2023 10:36:42 +0000 Subject: [PATCH 5/6] Replace |= with .update() for backwards compatibility --- dace/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace/data.py b/dace/data.py index ee9d841eaf..5cffa3fb7c 100644 --- a/dace/data.py +++ b/dace/data.py @@ -1124,7 +1124,7 @@ def __init__( indices_pure = [idx[0] for idx in indices]; for (lvl, index) in enumerate(indices_pure): - fields |= index.fields(lvl, value_count) + fields.update(index.fields(lvl, value_count)) abbreviation = ''.join(str(idx)[0] for idx in indices_pure) From 7586113b5bf4b3045da2abb2b8890609531180c2 Mon Sep 17 00:00:00 2001 From: Jan Kleine Date: Thu, 26 Oct 2023 12:12:16 +0000 Subject: [PATCH 6/6] Fix serialization issues --- dace/data.py | 98 +++++++++++++++++++++++++--------- tests/sdfg/data/tensor_test.py | 18 ++++--- 2 files changed, 82 insertions(+), 34 deletions(-) diff --git a/dace/data.py b/dace/data.py index 5cffa3fb7c..199e7dabd4 100644 --- a/dace/data.py +++ b/dace/data.py @@ -624,7 +624,38 @@ def fields(self, lvl: int, dummy_symbol: symbolic.SymExpr) -> Dict[str, Data]: pass -class Dense(TensorIndex): + def to_json(self): + attrs = serialize.all_properties_to_json(self) + + retdict = {"type": type(self).__name__, "attributes": attrs} + + return retdict + + + @classmethod + def from_json(cls, json_obj, context=None): + + # Selecting proper subclass + if json_obj['type'] == "TensorIndexDense": + self = TensorIndexDense.__new__(TensorIndexDense) + elif json_obj['type'] == "TensorIndexCompressed": + self = TensorIndexCompressed.__new__(TensorIndexCompressed) + elif json_obj['type'] == "TensorIndexSingleton": + self = TensorIndexSingleton.__new__(TensorIndexSingleton) + elif json_obj['type'] == "TensorIndexRange": + self = TensorIndexRange.__new__(TensorIndexRange) + elif json_obj['type'] == "TensorIndexOffset": + self = TensorIndexOffset.__new__(TensorIndexOffset) + else: + raise TypeError(f"Invalid data type, got: {json_obj['type']}") + + serialize.set_properties_from_json(self, json_obj['attributes'], context=context) + + return self + + +@make_properties +class TensorIndexDense(TensorIndex): """ Dense tensor index. @@ -633,8 +664,8 @@ class Dense(TensorIndex): index structure beyond the corresponding dimension size. """ - _ordered: bool - _unique: bool + _ordered = Property(dtype=bool, default=False) + _unique = Property(dtype=bool) @property def iteration_type(self) -> TensorIterationTypes: @@ -690,7 +721,8 @@ def __repr__(self) -> str: return s -class Compressed(TensorIndex): +@make_properties +class TensorIndexCompressed(TensorIndex): """ Tensor level that stores coordinates in segmented array. @@ -699,9 +731,9 @@ class Compressed(TensorIndex): array that holds the child coordinates corresponding the parent. """ - _full: bool - _ordered: bool - _unique: bool + _full = Property(dtype=bool, default=False) + _ordered = Property(dtype=bool, default=False) + _unique = Property(dtype=bool, default=False) @property def iteration_type(self) -> TensorIterationTypes: @@ -766,7 +798,8 @@ def __repr__(self) -> str: return s -class Singleton(TensorIndex): +@make_properties +class TensorIndexSingleton(TensorIndex): """ Tensor index that encodes a single coordinate per parent coordinate. @@ -775,9 +808,9 @@ class Singleton(TensorIndex): coordinate but the first is encoded in this manner. """ - _full: bool - _ordered: bool - _unique: bool + _full = Property(dtype=bool, default=False) + _ordered = Property(dtype=bool, default=False) + _unique = Property(dtype=bool, default=False) @property def iteration_type(self) -> TensorIterationTypes: @@ -841,7 +874,8 @@ def __repr__(self) -> str: return s -class Range(TensorIndex): +@make_properties +class TensorIndexRange(TensorIndex): """ Tensor index that encodes a interval of coordinates for every parent. @@ -851,8 +885,8 @@ class Range(TensorIndex): range of coordinates between max(0, -offset[i]) and min(N, M - offset[i]). """ - _ordered: bool - _unique: bool + _ordered = Property(dtype=bool, default=False) + _unique = Property(dtype=bool, default=False) @property def iteration_type(self) -> TensorIterationTypes: @@ -910,7 +944,8 @@ def __repr__(self) -> str: return s -class Offset(TensorIndex): +@make_properties +class TensorIndexOffset(TensorIndex): """ Tensor index that encodes the next coordinates as offset from parent. @@ -918,8 +953,8 @@ class Offset(TensorIndex): coordinate j = i + offset[k]. """ - _ordered: bool - _unique: bool + _ordered = Property(dtype=bool, default=False) + _unique = Property(dtype=bool, default=False) @property def iteration_type(self) -> TensorIterationTypes: @@ -987,14 +1022,15 @@ class Tensor(Structure): value_dtype = TypeClassProperty(default=dtypes.int32, choices=dtypes.Typeclasses) tensor_shape = ShapeProperty(default=[]) - indices = ListProperty(element_type=Union[Tuple[TensorIndex, int], Tuple[TensorIndex, symbolic.SymExpr]]) - value_count = SymbolicProperty + indices = ListProperty(element_type=TensorIndex) + index_ordering = ListProperty(element_type=symbolic.SymExpr) + value_count = SymbolicProperty(default=0) def __init__( self, value_dtype: dtypes.Typeclasses, tensor_shape, - indices: List[Union[Tuple[TensorIndex, int], Tuple[TensorIndex, symbolic.SymExpr]]], + indices: List[Tuple[TensorIndex, Union[int, symbolic.SymExpr]]], value_count: symbolic.SymExpr, name: str, transient: bool = False, @@ -1100,11 +1136,13 @@ def __init__( self.value_dtype = value_dtype self.tensor_shape = tensor_shape - self.indices = indices self.value_count = value_count + indices, index_ordering = zip(*indices) + self.indices, self.index_ordering = list(indices), list(index_ordering) + num_dims = len(tensor_shape) - dimension_order = [idx[1] for idx in indices if isinstance(idx[1], int)] + dimension_order = [idx for idx in self.index_ordering if isinstance(idx, int)] # all tensor dimensions must occure exactly once in indices if not sorted(dimension_order) == list(range(num_dims)): @@ -1122,11 +1160,8 @@ def __init__( values=dtypes.float32[value_count], ) - indices_pure = [idx[0] for idx in indices]; - for (lvl, index) in enumerate(indices_pure): + for (lvl, index) in enumerate(indices): fields.update(index.fields(lvl, value_count)) - - abbreviation = ''.join(str(idx)[0] for idx in indices_pure) super(Tensor, self).__init__(fields, name, transient, storage, location, lifetime, debuginfo) @@ -1134,6 +1169,17 @@ def __init__( def __repr__(self): return f"{self.name} (dtype: {self.value_dtype}, shape: {list(self.tensor_shape)}, indices: {self.indices})" + @staticmethod + def from_json(json_obj, context=None): + if json_obj['type'] != 'Tensor': + raise TypeError("Invalid data type") + + # Create dummy object + tensor = Tensor.__new__(Tensor) + serialize.set_properties_from_json(tensor, json_obj, context=context) + + return tensor + @make_properties class StructureView(Structure): diff --git a/tests/sdfg/data/tensor_test.py b/tests/sdfg/data/tensor_test.py index e6e564bde7..06d3363a8b 100644 --- a/tests/sdfg/data/tensor_test.py +++ b/tests/sdfg/data/tensor_test.py @@ -12,7 +12,7 @@ def test_read_csr_tensor(): csr_obj = dace.data.Tensor( dace.float32, (M, N), - [(dace.data.Dense(), 0), (dace.data.Compressed(), 1)], + [(dace.data.TensorIndexDense(), 0), (dace.data.TensorIndexCompressed(), 1)], nnz, "CSR_Tensor") @@ -63,6 +63,8 @@ def test_read_csr_tensor(): func(A=inpA, B=B, M=A.shape[0], N=A.shape[1], nnz=A.nnz) ref = A.toarray() + sdfg.save("./tensor.json") + assert np.allclose(B, ref) @@ -73,7 +75,7 @@ def test_csr_fields(): csr = dace.data.Tensor( dace.float32, (M, N), - [(dace.data.Dense(), 0), (dace.data.Compressed(), 1)], + [(dace.data.TensorIndexDense(), 0), (dace.data.TensorIndexCompressed(), 1)], nnz, "CSR_Matrix", ) @@ -90,9 +92,9 @@ def test_dia_fields(): dace.float32, (M, N), [ - (dace.data.Dense(), num_diags), - (dace.data.Range(), 0), - (dace.data.Offset(), 1), + (dace.data.TensorIndexDense(), num_diags), + (dace.data.TensorIndexRange(), 0), + (dace.data.TensorIndexOffset(), 1), ], nnz, "DIA_Matrix", @@ -110,9 +112,9 @@ def test_coo_fields(): dace.float32, (I, J, K), [ - (dace.data.Compressed(unique=False), 0), - (dace.data.Singleton(unique=False), 1), - (dace.data.Singleton(), 2), + (dace.data.TensorIndexCompressed(unique=False), 0), + (dace.data.TensorIndexSingleton(unique=False), 1), + (dace.data.TensorIndexSingleton(), 2), ], nnz, "COO_3D_Tensor",