Skip to content

Commit

Permalink
Adjust tests
Browse files Browse the repository at this point in the history
  • Loading branch information
adam-smnk committed Sep 5, 2024
1 parent 97f05a1 commit 669872d
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 77 deletions.
53 changes: 3 additions & 50 deletions test/Passes/DefaultPipeline/default-tpp-passes.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ func.func @matmul(%A: tensor<4x8xf32>,
%B: tensor<8x4xf32>, %C: tensor<4x4xf32>) -> tensor<4x4xf32> {
// CHECK: %[[C0:.+]] = arith.constant 0 : index
// CHECK: call @xsmm_gemm_dispatch
// CHECK: %[[ptr0:.*]] = memref.extract_aligned_pointer_as_index %[[ARG0]]
// CHECK: %[[ptr0:.*]] = memref.extract_aligned_pointer_as_index
// CHECK-NEXT: %[[cast_ptr0:.*]] = arith.index_cast %[[ptr0]] : index to i64
// CHECK-NEXT: %[[llvm_ptr0:.*]] = llvm.inttoptr %[[cast_ptr0]] : i64 to !llvm.ptr

// CHECK: %[[ptr1:.*]] = memref.extract_aligned_pointer_as_index %[[ARG1]]
// CHECK: %[[ptr1:.*]] = memref.extract_aligned_pointer_as_index
// CHECK-NEXT: %[[cast_ptr1:.*]] = arith.index_cast %[[ptr1]] : index to i64
// CHECK-NEXT: %[[llvm_ptr1:.*]] = llvm.inttoptr %[[cast_ptr1]] : i64 to !llvm.ptr

// CHECK: %[[ptr2:.*]] = memref.extract_aligned_pointer_as_index %[[ARG2]]
// CHECK: %[[ptr2:.*]] = memref.extract_aligned_pointer_as_index
// CHECK-NEXT: %[[cast_ptr2:.*]] = arith.index_cast %[[ptr2]] : index to i64
// CHECK-NEXT: %[[llvm_ptr2:.*]] = llvm.inttoptr %[[cast_ptr2]] : i64 to !llvm.ptr

Expand Down Expand Up @@ -90,53 +90,6 @@ func.func @conv2d_1x1(

// -----

#map = affine_map<(d0, d1) -> (d0 + d1)>

// CHECK-LABEL: @conv2d_1x1_decomposed(
// CHECK-SAME: %[[arg:.*]]: memref<1x7x7x2048xf32>) -> memref<1x7x7x512xf32> {
func.func @conv2d_1x1_decomposed(
%arg0 : tensor<1x7x7x2048xf32>) -> tensor<1x7x7x512xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c7 = arith.constant 7 : index

// Conv2D weights
%cst = arith.constant dense<0.00332225906> : tensor<2048x512xf32>

// 1x1 Conv2D
// CHECK: call @xsmm_gemm_dispatch
// CHECK: scf.for
// CHECK: %[[ptr0:.*]] = llvm.inttoptr %{{.+}} : i64 to !llvm.ptr
// CHECK: %[[ptr1:.*]] = llvm.inttoptr %{{.+}} : i64 to !llvm.ptr
// CHECK: %[[ptr2:.*]] = llvm.inttoptr %{{.+}} : i64 to !llvm.ptr
// CHECK: call @xsmm_gemm_invoke({{.*}}%[[ptr0]], %{{.+}}, %[[ptr1]], %{{.+}}, %[[ptr2]], %{{.+}}
%cst_0 = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x7x7x512xf32>
%1 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<1x7x7x512xf32>) -> tensor<1x7x7x512xf32>
%2 = scf.for %arg1 = %c0 to %c1 step %c1 iter_args(%arg2 = %1) -> (tensor<1x7x7x512xf32>) {
%3 = scf.for %arg3 = %c0 to %c7 step %c1 iter_args(%arg4 = %arg2) -> (tensor<1x7x7x512xf32>) {
%4 = scf.for %arg5 = %c0 to %c1 step %c1 iter_args(%arg6 = %arg4) -> (tensor<1x7x7x512xf32>) {
%5 = scf.for %arg7 = %c0 to %c1 step %c1 iter_args(%arg8 = %arg6) -> (tensor<1x7x7x512xf32>) {
%6 = affine.apply #map(%arg3, %arg5)
%extracted_slice = tensor.extract_slice %arg0[%arg1, %6, %arg7, 0] [1, 1, 7, 2048] [1, 1, 1, 1] : tensor<1x7x7x2048xf32> to tensor<7x2048xf32>
%extracted_slice_1 = tensor.extract_slice %arg8[%arg1, %arg3, 0, 0] [1, 1, 7, 512] [1, 1, 1, 1] : tensor<1x7x7x512xf32> to tensor<7x512xf32>
%7 = linalg.matmul ins(%extracted_slice, %cst : tensor<7x2048xf32>, tensor<2048x512xf32>) outs(%extracted_slice_1 : tensor<7x512xf32>) -> tensor<7x512xf32>
%inserted_slice = tensor.insert_slice %7 into %arg8[%arg1, %arg3, 0, 0] [1, 1, 7, 512] [1, 1, 1, 1] : tensor<7x512xf32> into tensor<1x7x7x512xf32>
scf.yield %inserted_slice : tensor<1x7x7x512xf32>
}
scf.yield %5 : tensor<1x7x7x512xf32>
}
scf.yield %4 : tensor<1x7x7x512xf32>
}
scf.yield %3 : tensor<1x7x7x512xf32>
}

// CHECK: return {{.*}} : memref<1x7x7x512xf32>
return %2 : tensor<1x7x7x512xf32>
}

// -----

#map0 = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d2)>
Expand Down
7 changes: 3 additions & 4 deletions test/Passes/DefaultPipeline/linalg-to-xsmm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,14 @@ func.func @gemm_with_zero(%arg0: tensor<3x3xf32>, %arg1: tensor<3x3xf32>) -> ten
// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : i64
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : i64
// CHECK-NOT: xsmm_unary_dispatch
// CHECK: %[[ALLOC:.+]] = memref.alloc() {alignment = 64 : i64} : memref<3x3xf32>
// CHECK: %[[DIS:.+]] = call @xsmm_gemm_dispatch(%[[C1]], %[[C3]], %[[C3]], %[[C3]], %[[C3]], %[[C3]], %[[C3]], %[[C4]])
// CHECK: %[[INT_PTR_ARG0:.+]] = memref.extract_aligned_pointer_as_index %[[ARG0]] : memref<3x3xf32> -> index
// CHECK: %[[INT_PTR_ARG0:.+]] = memref.extract_aligned_pointer_as_index
// CHECK: %[[CAST_ARG0:.+]] = arith.index_cast %[[INT_PTR_ARG0]] : index to i64
// CHECK: %[[LLVM_PTR_ARG0:.+]] = llvm.inttoptr %[[CAST_ARG0]] : i64 to !llvm.ptr
// CHECK: %[[INT_PTR_ARG1:.+]] = memref.extract_aligned_pointer_as_index %[[ARG1]] : memref<3x3xf32> -> index
// CHECK: %[[INT_PTR_ARG1:.+]] = memref.extract_aligned_pointer_as_index
// CHECK: %[[CAST_ARG1:.+]] = arith.index_cast %[[INT_PTR_ARG1]] : index to i64
// CHECK: %[[LLVM_PTR_ARG1:.+]] = llvm.inttoptr %[[CAST_ARG1]] : i64 to !llvm.ptr
// CHECK: %[[INT_PTR_ALLOC:.+]] = memref.extract_aligned_pointer_as_index %[[ALLOC]] : memref<3x3xf32> -> index
// CHECK: %[[INT_PTR_ALLOC:.+]] = memref.extract_aligned_pointer_as_index
// CHECK: %[[CAST_ALLOC:.+]] = arith.index_cast %[[INT_PTR_ALLOC]] : index to i64
// CHECK: %[[LLVM_PTR_ALLOC:.+]] = llvm.inttoptr %[[CAST_ALLOC]] : i64 to !llvm.ptr
// CHECK: call @xsmm_gemm_invoke(%[[C1]], %[[DIS]], %[[LLVM_PTR_ARG0]], %[[C0]], %[[LLVM_PTR_ARG1]], %[[C0]], %[[LLVM_PTR_ALLOC]], %[[C0]])
36 changes: 36 additions & 0 deletions test/Passes/pass-matmul-blocking-default.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,39 @@ func.func @block_linalg_matmul_transpose_b(
// CHECK: %[[VAL:.+]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]], #[[MAP5]]], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%[[PACK0]], %[[PACK1]] : tensor<4x4x32x32xf32>, tensor<4x4x32x32xf32>) outs(%[[PACK2]] : tensor<4x4x32x32xf32>)
// CHECK: %[[OUT:.+]] = tensor.unpack %[[VAL]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[ARG2]] : tensor<4x4x32x32xf32> -> tensor<128x128xf32>
// CHECK: return %[[OUT]] : tensor<128x128xf32>

// -----

func.func @block_linalg_matmul_dynamic(
%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
-> tensor<?x?xf32> {
%0 = linalg.matmul ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2: tensor<?x?xf32>)
-> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}

// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d5, d4)>
// CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>

// CHECK-LABEL: func @block_linalg_matmul_dynamic(
// CHECK-SAME: %[[ARG0:[0-9a-z]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG1:[0-9a-z]+]]: tensor<?x?xf32>
// CHECK-SAME: %[[ARG2:[0-9a-z]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
// CHECK-DAG: %[[PAD:.+]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[PACK0:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PAD]] : f32)
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1]
// CHECK-SAME: inner_tiles = [32, 32] into {{.*}} : tensor<?x?xf32> -> tensor<?x?x32x32xf32>
// CHECK: %[[PACK1:.+]] = tensor.pack %[[ARG1]] padding_value(%[[PAD]] : f32)
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
// CHECK-SAME: inner_tiles = [32, 32] into {{.*}} : tensor<?x?xf32> -> tensor<?x?x32x32xf32>
// CHECK: %[[PACK2:.+]] = tensor.pack %[[ARG2]] padding_value(%[[PAD]] : f32)
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 32]
// CHECK-SAME: into {{.*}} : tensor<?x?xf32> -> tensor<?x?x32x32xf32>
// CHECK: %[[VAL:.+]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]], #[[MAP5]]],
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]}
// CHECK-SAME: ins(%[[PACK0]], %[[PACK1]] : tensor<?x?x32x32xf32>, tensor<?x?x32x32xf32>) outs(%[[PACK2]] : tensor<?x?x32x32xf32>)
// CHECK: %[[OUT:.+]] = tensor.unpack %[[VAL]] inner_dims_pos = [0, 1] inner_tiles = [32, 32]
// CHECK-SAME: into %[[ARG2]] : tensor<?x?x32x32xf32> -> tensor<?x?xf32>
// CHECK: return %[[OUT]] : tensor<?x?xf32>
43 changes: 22 additions & 21 deletions test/Passes/pass-matmul-blocking.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ func.func @block_dims_equal_to_factors(

// -----

// We don't expect to block as the blocking factor do not create full tiles.
func.func @block_linalg_matmul(
// Adapt tile sizes to small dimensions.
// Assume that there is separate cost function that controls
// if packing should take place at all.
func.func @block_small_dims_matmul(
%arg0: tensor<5x6xf32>, %arg1: tensor<6x5xf32>, %arg2: tensor<5x5xf32>)
-> tensor<5x5xf32> {
%0 = linalg.matmul ins(%arg0, %arg1: tensor<5x6xf32>, tensor<6x5xf32>)
Expand All @@ -70,13 +72,24 @@ func.func @block_linalg_matmul(
return %0 : tensor<5x5xf32>
}

// CHECK-LABEL: func.func @block_linalg_matmul(
// CHECK-SAME: %[[ARG0:[0-9a-z]+]]: tensor<5x6xf32>,
// CHECK-SAME: %[[ARG1:[0-9a-z]+]]: tensor<6x5xf32>,
// CHECK-SAME: %[[ARG2:[0-9a-z]+]]: tensor<5x5xf32>) -> tensor<5x5xf32> {
// CHECK: %{{.+}} = linalg.matmul
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[ARG2]]
// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d5, d4)>
// CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>

// CHECK-LABEL: func @block_small_dims_matmul(
// CHECK-SAME: %[[ARG0:[0-9a-z]+]]: tensor<5x6xf32>
// CHECK-SAME: %[[ARG1:[0-9a-z]+]]: tensor<6x5xf32>
// CHECK-SAME: %[[ARG2:[0-9a-z]+]]: tensor<5x5xf32>) -> tensor<5x5xf32> {
// CHECK: %[[BUF0:.+]] = tensor.empty() : tensor<1x1x5x6xf32>
// CHECK: %[[PACK0:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [5, 6] into %[[BUF0]] : tensor<5x6xf32> -> tensor<1x1x5x6xf32>
// CHECK: %[[BUF1:.*]] = tensor.empty() : tensor<1x1x6x5xf32>
// CHECK: %[[PACK1:.+]] = tensor.pack %[[ARG1]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [6, 5] into %[[BUF1]] : tensor<6x5xf32> -> tensor<1x1x6x5xf32>
// CHECK: %[[BUF2:.+]] = tensor.empty() : tensor<1x1x5x5xf32>
// CHECK: %[[PACK2:.+]] = tensor.pack %[[ARG2]] inner_dims_pos = [0, 1] inner_tiles = [5, 5] into %[[BUF2]] : tensor<5x5xf32> -> tensor<1x1x5x5xf32>
// CHECK: %[[VAL:.+]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]], #[[MAP5]]], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%[[PACK0]], %[[PACK1]] : tensor<1x1x5x6xf32>, tensor<1x1x6x5xf32>) outs(%[[PACK2]] : tensor<1x1x5x5xf32>)
// CHECK: %[[OUT:.+]] = tensor.unpack %[[VAL]] inner_dims_pos = [0, 1] inner_tiles = [5, 5] into %[[ARG2]] : tensor<1x1x5x5xf32> -> tensor<5x5xf32>
// CHECK: return %[[OUT]] : tensor<5x5xf32>
// CHECK: }

// -----

Expand Down Expand Up @@ -183,15 +196,3 @@ func.func @batch_matmul_rewrite(%arg0: tensor<512x64x128xf32>, %arg1: tensor<512
// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[GEN]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 32]
// CHECK-SAME: into %[[OUT]] : tensor<512x2x2x32x32xf32> -> tensor<512x64x64xf32>

// -----

// CHECK-LABEL: batch_matmul_invalid_tiles
func.func @batch_matmul_invalid_tiles(%arg0: tensor<5x5x5xf32>, %arg1: tensor<5x5x5xf32>) -> tensor<5x5x5xf32> {
%0 = tensor.empty() : tensor<5x5x5xf32>
// CHECK: linalg.batch_matmul
// CHECK-NOT: linalg.generic
%1 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<5x5x5xf32>, tensor<5x5x5xf32>)
outs(%0 : tensor<5x5x5xf32>) -> tensor<5x5x5xf32>
return %1 : tensor<5x5x5xf32>
}
5 changes: 3 additions & 2 deletions test/Passes/tpp-mapping.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ func.func @conv_to_matmul(%img: tensor<1x5x5x3xf32>, %filter: tensor<3x3x3x8xf32
// CHECK: scf.for
// CHECK: tensor.extract_slice{{[^:]+}}: tensor<1x5x5x3xf32> to tensor<3x3xf32>
// CHECK: tensor.extract_slice{{[^:]+}}: tensor<3x3x3x8xf32> to tensor<3x8xf32>
// CHECK: tensor.extract_slice{{[^:]+}}: tensor<1x3x3x8xf32> to tensor<3x8xf32>
// CHECK: tensor.extract_slice{{[^:]+}}: tensor<1x1x3x8xf32> to tensor<3x8xf32>
// CHECK: linalg.matmul{{.*}} -> tensor<3x8xf32>
// CHECK: tensor.insert_slice{{[^:]+}}: tensor<3x8xf32> into tensor<1x3x3x8xf32>
// CHECK: tensor.insert_slice{{[^:]+}}: tensor<3x8xf32> into tensor<1x1x3x8xf32>
// CHECK: tensor.insert_slice{{[^:]+}}: tensor<1x1x3x8xf32> into tensor<1x3x3x8xf32>
// CHECK: }

// -----
Expand Down

0 comments on commit 669872d

Please sign in to comment.