From a3e8c81ff5c7c841955523d2be7032962a6a3468 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Fri, 17 Nov 2023 15:20:20 +0100 Subject: [PATCH 1/3] [bug] Missing symbols for data access on inter-state edge After uplift to dace v0.15, one SDFG which was working before started to show compilation errors. The latest DaCe is moving a data access to an inter-state edge. For the data-access, the symbols that define array strides are needed for code generation. The SDFG was validated, before and after the simplify pass, but it did not compile for CPU. When skipping the simplify pass, the compilation did work. The problem has been narrowed down to the scalar-to-symbol promotion, which is moving a data access to an inter-state edge. Then, the method _used_symbols_internal needs to be update to account for data containers, including symbolic shape and strides. This commit contains a unit test to reproduce the issue and verify the proposed fix. --- dace/sdfg/state.py | 4 + tests/passes/scalar_to_symbol_test.py | 167 +++++++++++++++++++++++++- 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/dace/sdfg/state.py b/dace/sdfg/state.py index 097365fbc3..5602bfed2c 100644 --- a/dace/sdfg/state.py +++ b/dace/sdfg/state.py @@ -2449,6 +2449,10 @@ def _used_symbols_internal(self, # subracting the (true) free symbols from the edge's assignment keys. This way we can correctly # compute the symbols that are used before being assigned. efsyms = e.data.used_symbols(all_symbols) + # collect symbols representing data containers + dsyms = {sym for sym in efsyms if sym in self.arrays} + for d in dsyms: + efsyms |= {str(sym) for sym in self.arrays[d].used_symbols(all_symbols)} defined_syms |= set(e.data.assignments.keys()) - (efsyms | state_symbols) used_before_assignment.update(efsyms - defined_syms) free_syms |= efsyms diff --git a/tests/passes/scalar_to_symbol_test.py b/tests/passes/scalar_to_symbol_test.py index 02cc57a204..d65b7e9493 100644 --- a/tests/passes/scalar_to_symbol_test.py +++ b/tests/passes/scalar_to_symbol_test.py @@ -1,7 +1,7 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. """ Tests the scalar to symbol promotion functionality. """ import dace -from dace.transformation.passes import scalar_to_symbol +from dace.transformation.passes import constant_propagation, fusion_inline, scalar_to_symbol from dace.sdfg.state import SDFGState from dace.transformation import transformation as xf, interstate as isxf from dace.transformation.interstate import loop_detection as ld @@ -692,6 +692,170 @@ def test_ternary_expression(compile_time_evaluatable): sdfg.compile() +def test_indirection_with_tasklet(): + dtype = dace.float64 + + sdfg = dace.SDFG('top') + N_EDGES, N_VERTICES, N_E2V_NEIGHBORS = (dace.symbol(s) for s in ['N_EDGES', 'N_VERTICES', 'N_E2V_NEIGHBORS']) + sdfg.add_array('EDGES', (N_EDGES,), dtype) + sdfg.add_array('VERTICES', (N_VERTICES,), dtype) + sdfg.add_array('E2V_TABLE', (N_EDGES, N_E2V_NEIGHBORS), dace.int32) + + + nsdfg = dace.SDFG('nsdfg') + [nsdfg.add_symbol(s, dace.int32) for s in ['___sym_indirect_idx', '___sym_offset']] + nsdfg.add_array('_field', sdfg.arrays['VERTICES'].shape, dtype) + nsdfg.add_array('_table', sdfg.arrays['E2V_TABLE'].shape, dace.int32) + nsdfg.add_scalar('_result', dtype) + nstate = nsdfg.add_state() + nsdfg.add_scalar('_shift_idx', dace.int32, transient=True) + shift_idx_node = nstate.add_access('_shift_idx') + nsdfg.add_scalar('_shift_offset', dace.int32, transient=True) + shift_offset_node = nstate.add_access('_shift_offset') + nsdfg.add_scalar('_field_idx', dace.int32, transient=True) + field_idx_node = nstate.add_access('_field_idx') + shift_node = nstate.add_tasklet('shift', {'_inp', '_idx', '_offset'}, {'_out'}, '_out = _inp[_idx, _offset]') + deref_node = nstate.add_tasklet('deref', {'_inp', '_idx'}, {'_out'}, '_out = _inp[_idx]') + nstate.add_edge( + nstate.add_access('_table'), + None, + shift_node, + '_inp', + dace.Memlet.from_array('_table', nsdfg.arrays['_table']) + ) + nstate.add_edge( + nstate.add_tasklet('get_shift_index', {}, {'_out'}, '_out = ___sym_indirect_idx'), + '_out', + shift_idx_node, + None, + dace.Memlet.simple('_shift_idx', '0') + ) + nstate.add_edge( + shift_idx_node, + None, + shift_node, + '_idx', + dace.Memlet.simple('_shift_idx', '0') + ) + nstate.add_edge( + nstate.add_tasklet('get_shift_offset', {}, {'_out'}, "_out = ___sym_offset"), + '_out', + shift_offset_node, + None, + dace.Memlet.simple('_shift_offset', '0') + ) + nstate.add_edge( + shift_offset_node, + None, + shift_node, + '_offset', + dace.Memlet.simple('_shift_offset', '0') + ) + nstate.add_edge( + shift_node, + '_out', + field_idx_node, + None, + dace.Memlet.simple('_field_idx', '0') + ) + nstate.add_edge( + nstate.add_access('_field'), + None, + deref_node, + '_inp', + dace.Memlet.from_array('_field', nsdfg.arrays['_field']) + ) + nstate.add_edge( + field_idx_node, + None, + deref_node, + '_idx', + dace.Memlet.simple('_field_idx', '0') + ) + nstate.add_edge( + deref_node, + '_out', + nstate.add_access('_result'), + None, + dace.Memlet.simple('_result', '0', wcr_str="lambda x, y: x + y") + ) + + state = sdfg.add_state('main') + me, mx = state.add_map('closure', dict(_edge_idx="0:N_EDGES", _neighbor_idx=f"0:N_E2V_NEIGHBORS")) + nsdfg_node = state.add_nested_sdfg( + nsdfg, + sdfg, + inputs={'_field', '_table'}, + outputs={'_result'}, + symbol_mapping={ + '___sym_indirect_idx': '_edge_idx', + '___sym_offset': '_neighbor_idx', + } + ) + state.add_memlet_path( + state.add_access('VERTICES'), + me, + nsdfg_node, + dst_conn='_field', + memlet=dace.Memlet.from_array('VERTICES', sdfg.arrays['VERTICES']) + ) + state.add_memlet_path( + state.add_access('E2V_TABLE'), + me, + nsdfg_node, + dst_conn='_table', + memlet=dace.Memlet.from_array('E2V_TABLE', sdfg.arrays['E2V_TABLE']) + ) + state.add_memlet_path( + nsdfg_node, + mx, + state.add_access('EDGES'), + src_conn='_result', + memlet=dace.Memlet.simple('EDGES', '_edge_idx') + ) + + + N_EDGES = np.int32(5) + N_VERTICES = np.int32(4) + N_E2V_NEIGHBORS = np.int32(2) + + from numpy.random import default_rng + rng = default_rng(42) + + vertices = rng.random((N_VERTICES, )) + edges = rng.random((N_EDGES, )) + e2v_table = np.random.randint(0, N_VERTICES, (N_EDGES, N_E2V_NEIGHBORS), np.int32) + + reference_edges = np.asarray([np.sum(vertices[e2v_table[idx, :]], initial=edges[idx]) for idx in range(N_EDGES)]) + + assert sdfg.is_valid() + # apply scalar-to-symbol promotion + assert scalar_to_symbol.find_promotable_scalars(nsdfg) == {'_shift_idx', '_shift_offset'} + scalar_to_symbol.ScalarToSymbolPromotion().apply_pass(nsdfg, {}) + assert scalar_to_symbol.find_promotable_scalars(nsdfg) == {'_field_idx'} + scalar_to_symbol.ScalarToSymbolPromotion().apply_pass(nsdfg, {}) + # some cleanup + constant_propagation.ConstantPropagation().apply_pass(nsdfg, {}) + assert fusion_inline.fuse_states(nsdfg) == 2 + + # check that the table access has become an inter-state assignment + assert len(nsdfg.out_edges(nsdfg.start_state)) == 1 + assert nsdfg.out_edges(nsdfg.start_state)[0].data.assignments == { + '_field_idx':'_table[___sym_indirect_idx, ___sym_offset]' + } + + sdfg( + EDGES=edges, + VERTICES=vertices, + E2V_TABLE=e2v_table, + N_EDGES=N_EDGES, + N_VERTICES=N_VERTICES, + N_E2V_NEIGHBORS=N_E2V_NEIGHBORS, + ) + + assert np.allclose(edges, reference_edges) + + if __name__ == '__main__': test_find_promotable() test_promote_simple() @@ -715,3 +879,4 @@ def test_ternary_expression(compile_time_evaluatable): test_dynamic_mapind() test_ternary_expression(False) test_ternary_expression(True) + test_indirection_with_tasklet() From 977117f14c7cdbe9c7bf8ab5c1a76bbf36d9ba12 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Thu, 30 Nov 2023 09:40:59 +0100 Subject: [PATCH 2/3] [test] Improve unit-test (review comment) Unit-test reduced to reproduce the origin of this issue. Also, unit-test moved to codegen test file. --- tests/codegen/codegen_used_symbols_test.py | 42 ++++++ tests/passes/scalar_to_symbol_test.py | 167 +-------------------- 2 files changed, 43 insertions(+), 166 deletions(-) diff --git a/tests/codegen/codegen_used_symbols_test.py b/tests/codegen/codegen_used_symbols_test.py index afa0ca0a05..a42404d591 100644 --- a/tests/codegen/codegen_used_symbols_test.py +++ b/tests/codegen/codegen_used_symbols_test.py @@ -88,8 +88,50 @@ def test_codegen_used_symbols_gpu(): pass +def test_codegen_edge_assignment_with_indirection(): + rng = numpy.random.default_rng(42) + (M, N, K) = (dace.symbol(x, dace.int32) for x in ['M', 'N', 'K']) + + sdfg = dace.SDFG('edge_assignment_with_indirection') + [sdfg.add_symbol(x, dace.int32) for x in {'__indirect_idx', '__neighbor_idx'}] + sdfg.add_array('_field', (M,), dace.float64) + sdfg.add_array('_table', (N,K), dace.int32) + sdfg.add_array('_out', (N,), dace.float64) + + state0 = sdfg.add_state(is_start_block=True) + state1 = sdfg.add_state() + sdfg.add_edge(state0, state1, dace.InterstateEdge( + assignments={'_field_idx': '_table[__indirect_idx, __neighbor_idx]'} + )) + state1.add_memlet_path( + state1.add_access('_field'), + state1.add_access('_out'), + memlet=dace.Memlet(data='_out', subset='__indirect_idx', other_subset='_field_idx', wcr='lambda x, y: x + y') + ) + + M, N, K = (5, 4, 2) + field = rng.random((M,)) + out = rng.random((N,)) + table = numpy.random.randint(0, M, (N, K), numpy.int32) + + TEST_INDIRECT_IDX = numpy.random.randint(0, N) + TEST_NEIGHBOR_IDX = numpy.random.randint(0, K) + + reference = numpy.asarray([out[i] + field[table[i, TEST_NEIGHBOR_IDX]] if i == TEST_INDIRECT_IDX else out[i] for i in range(N)]) + + sdfg( + _field=field, _table=table, _out=out, M=M, N=N, K=K, + __indirect_idx=TEST_INDIRECT_IDX, + __neighbor_idx=TEST_NEIGHBOR_IDX + ) + + assert numpy.allclose(out, reference) + + if __name__ == "__main__": test_codegen_used_symbols_cpu() test_codegen_used_symbols_cpu_2() test_codegen_used_symbols_gpu() + test_codegen_edge_assignment_with_indirection() + diff --git a/tests/passes/scalar_to_symbol_test.py b/tests/passes/scalar_to_symbol_test.py index d65b7e9493..02cc57a204 100644 --- a/tests/passes/scalar_to_symbol_test.py +++ b/tests/passes/scalar_to_symbol_test.py @@ -1,7 +1,7 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. """ Tests the scalar to symbol promotion functionality. """ import dace -from dace.transformation.passes import constant_propagation, fusion_inline, scalar_to_symbol +from dace.transformation.passes import scalar_to_symbol from dace.sdfg.state import SDFGState from dace.transformation import transformation as xf, interstate as isxf from dace.transformation.interstate import loop_detection as ld @@ -692,170 +692,6 @@ def test_ternary_expression(compile_time_evaluatable): sdfg.compile() -def test_indirection_with_tasklet(): - dtype = dace.float64 - - sdfg = dace.SDFG('top') - N_EDGES, N_VERTICES, N_E2V_NEIGHBORS = (dace.symbol(s) for s in ['N_EDGES', 'N_VERTICES', 'N_E2V_NEIGHBORS']) - sdfg.add_array('EDGES', (N_EDGES,), dtype) - sdfg.add_array('VERTICES', (N_VERTICES,), dtype) - sdfg.add_array('E2V_TABLE', (N_EDGES, N_E2V_NEIGHBORS), dace.int32) - - - nsdfg = dace.SDFG('nsdfg') - [nsdfg.add_symbol(s, dace.int32) for s in ['___sym_indirect_idx', '___sym_offset']] - nsdfg.add_array('_field', sdfg.arrays['VERTICES'].shape, dtype) - nsdfg.add_array('_table', sdfg.arrays['E2V_TABLE'].shape, dace.int32) - nsdfg.add_scalar('_result', dtype) - nstate = nsdfg.add_state() - nsdfg.add_scalar('_shift_idx', dace.int32, transient=True) - shift_idx_node = nstate.add_access('_shift_idx') - nsdfg.add_scalar('_shift_offset', dace.int32, transient=True) - shift_offset_node = nstate.add_access('_shift_offset') - nsdfg.add_scalar('_field_idx', dace.int32, transient=True) - field_idx_node = nstate.add_access('_field_idx') - shift_node = nstate.add_tasklet('shift', {'_inp', '_idx', '_offset'}, {'_out'}, '_out = _inp[_idx, _offset]') - deref_node = nstate.add_tasklet('deref', {'_inp', '_idx'}, {'_out'}, '_out = _inp[_idx]') - nstate.add_edge( - nstate.add_access('_table'), - None, - shift_node, - '_inp', - dace.Memlet.from_array('_table', nsdfg.arrays['_table']) - ) - nstate.add_edge( - nstate.add_tasklet('get_shift_index', {}, {'_out'}, '_out = ___sym_indirect_idx'), - '_out', - shift_idx_node, - None, - dace.Memlet.simple('_shift_idx', '0') - ) - nstate.add_edge( - shift_idx_node, - None, - shift_node, - '_idx', - dace.Memlet.simple('_shift_idx', '0') - ) - nstate.add_edge( - nstate.add_tasklet('get_shift_offset', {}, {'_out'}, "_out = ___sym_offset"), - '_out', - shift_offset_node, - None, - dace.Memlet.simple('_shift_offset', '0') - ) - nstate.add_edge( - shift_offset_node, - None, - shift_node, - '_offset', - dace.Memlet.simple('_shift_offset', '0') - ) - nstate.add_edge( - shift_node, - '_out', - field_idx_node, - None, - dace.Memlet.simple('_field_idx', '0') - ) - nstate.add_edge( - nstate.add_access('_field'), - None, - deref_node, - '_inp', - dace.Memlet.from_array('_field', nsdfg.arrays['_field']) - ) - nstate.add_edge( - field_idx_node, - None, - deref_node, - '_idx', - dace.Memlet.simple('_field_idx', '0') - ) - nstate.add_edge( - deref_node, - '_out', - nstate.add_access('_result'), - None, - dace.Memlet.simple('_result', '0', wcr_str="lambda x, y: x + y") - ) - - state = sdfg.add_state('main') - me, mx = state.add_map('closure', dict(_edge_idx="0:N_EDGES", _neighbor_idx=f"0:N_E2V_NEIGHBORS")) - nsdfg_node = state.add_nested_sdfg( - nsdfg, - sdfg, - inputs={'_field', '_table'}, - outputs={'_result'}, - symbol_mapping={ - '___sym_indirect_idx': '_edge_idx', - '___sym_offset': '_neighbor_idx', - } - ) - state.add_memlet_path( - state.add_access('VERTICES'), - me, - nsdfg_node, - dst_conn='_field', - memlet=dace.Memlet.from_array('VERTICES', sdfg.arrays['VERTICES']) - ) - state.add_memlet_path( - state.add_access('E2V_TABLE'), - me, - nsdfg_node, - dst_conn='_table', - memlet=dace.Memlet.from_array('E2V_TABLE', sdfg.arrays['E2V_TABLE']) - ) - state.add_memlet_path( - nsdfg_node, - mx, - state.add_access('EDGES'), - src_conn='_result', - memlet=dace.Memlet.simple('EDGES', '_edge_idx') - ) - - - N_EDGES = np.int32(5) - N_VERTICES = np.int32(4) - N_E2V_NEIGHBORS = np.int32(2) - - from numpy.random import default_rng - rng = default_rng(42) - - vertices = rng.random((N_VERTICES, )) - edges = rng.random((N_EDGES, )) - e2v_table = np.random.randint(0, N_VERTICES, (N_EDGES, N_E2V_NEIGHBORS), np.int32) - - reference_edges = np.asarray([np.sum(vertices[e2v_table[idx, :]], initial=edges[idx]) for idx in range(N_EDGES)]) - - assert sdfg.is_valid() - # apply scalar-to-symbol promotion - assert scalar_to_symbol.find_promotable_scalars(nsdfg) == {'_shift_idx', '_shift_offset'} - scalar_to_symbol.ScalarToSymbolPromotion().apply_pass(nsdfg, {}) - assert scalar_to_symbol.find_promotable_scalars(nsdfg) == {'_field_idx'} - scalar_to_symbol.ScalarToSymbolPromotion().apply_pass(nsdfg, {}) - # some cleanup - constant_propagation.ConstantPropagation().apply_pass(nsdfg, {}) - assert fusion_inline.fuse_states(nsdfg) == 2 - - # check that the table access has become an inter-state assignment - assert len(nsdfg.out_edges(nsdfg.start_state)) == 1 - assert nsdfg.out_edges(nsdfg.start_state)[0].data.assignments == { - '_field_idx':'_table[___sym_indirect_idx, ___sym_offset]' - } - - sdfg( - EDGES=edges, - VERTICES=vertices, - E2V_TABLE=e2v_table, - N_EDGES=N_EDGES, - N_VERTICES=N_VERTICES, - N_E2V_NEIGHBORS=N_E2V_NEIGHBORS, - ) - - assert np.allclose(edges, reference_edges) - - if __name__ == '__main__': test_find_promotable() test_promote_simple() @@ -879,4 +715,3 @@ def test_indirection_with_tasklet(): test_dynamic_mapind() test_ternary_expression(False) test_ternary_expression(True) - test_indirection_with_tasklet() From a96d3e53dc262b23549d8ffe99a72ab4ceac13f2 Mon Sep 17 00:00:00 2001 From: Edoardo Paone Date: Thu, 30 Nov 2023 09:51:09 +0100 Subject: [PATCH 3/3] Code formatting --- tests/codegen/codegen_used_symbols_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/codegen/codegen_used_symbols_test.py b/tests/codegen/codegen_used_symbols_test.py index a42404d591..1e216e9508 100644 --- a/tests/codegen/codegen_used_symbols_test.py +++ b/tests/codegen/codegen_used_symbols_test.py @@ -117,7 +117,12 @@ def test_codegen_edge_assignment_with_indirection(): TEST_INDIRECT_IDX = numpy.random.randint(0, N) TEST_NEIGHBOR_IDX = numpy.random.randint(0, K) - reference = numpy.asarray([out[i] + field[table[i, TEST_NEIGHBOR_IDX]] if i == TEST_INDIRECT_IDX else out[i] for i in range(N)]) + reference = numpy.asarray( + [ + out[i] + field[table[i, TEST_NEIGHBOR_IDX]] if i == TEST_INDIRECT_IDX else out[i] + for i in range(N) + ] + ) sdfg( _field=field, _table=table, _out=out, M=M, N=N, K=K,