iree-org · erman-gurses · Oct 30, 2024 · Oct 21, 2024 · Oct 21, 2024 · Oct 21, 2024
diff --git a/linalg_ops/convolution/CMakeLists.txt b/linalg_ops/convolution/CMakeLists.txt
@@ -73,3 +73,77 @@ foreach(_DTYPE IN LISTS _DTYPES)
     )
   endforeach()
 endforeach()
+
+###############################################################################
+#
+# GPU - ROCm/HIP, default flags.
+#
+###############################################################################
+
+# To distinguish between CDNA(gfx9) and RDNA3(gfx11)
+if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx9")
+
+set(_SIZES)
+list(APPEND _SIZES "large")
+
+set(_DTYPES_AND_LAYOUTS)
+list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32")
+list(APPEND _DTYPES_AND_LAYOUTS "f16_nchw_f16_fchw_f32")
+list(APPEND _DTYPES_AND_LAYOUTS "i8_nhwc_i8_hwcf_i32")
+
+
+foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS)
+  foreach(_SIZE IN LISTS _SIZES)
+    iree_test_suites_runner_test(
+      NAME
+        conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}
+      TESTS_SRC
+        "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir"
+      CALLS_SRC
+        "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir"
+      TEST_RUNNER
+        iree-test-suites_iree-e2e-conv2d-test
+      TARGET_BACKEND
+        "rocm"
+      DRIVER
+        "hip"
+      COMPILER_FLAGS
+        "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}"
+      RUNNER_FLAGS
+      LABELS
+    )
+  endforeach()
+endforeach()
+
+elseif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx11")
+
+set(_SIZES)
+list(APPEND _SIZES "large")
+
+set(_DTYPES_AND_LAYOUTS)
+list(APPEND _DTYPES_AND_LAYOUTS "f16_nhwc_f16_hwcf_f32")
+
+foreach(_DTYPE_AND_LAYOUT IN LISTS _DTYPES_AND_LAYOUTS)
+  foreach(_SIZE IN LISTS _SIZES)
+    iree_test_suites_runner_test(
+      NAME
+        conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}
+      TESTS_SRC
+        "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}.mlir"
+      CALLS_SRC
+        "generated/${_DTYPE_AND_LAYOUT}/conv2d_${_DTYPE_AND_LAYOUT}_${_SIZE}_calls.mlir"
+      TEST_RUNNER
+        iree-test-suites_iree-e2e-conv2d-test
+      TARGET_BACKEND
+        "rocm"
+      DRIVER
+        "hip"
+      COMPILER_FLAGS
+        "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}"
+      RUNNER_FLAGS
+      LABELS
+    )
+  endforeach()
+endforeach()
+
+endif()
diff --git a/linalg_ops/convolution/generate_test_mlir_files.sh b/linalg_ops/convolution/generate_test_mlir_files.sh
@@ -76,3 +76,38 @@ for type_combination in ${type_combinations[@]}; do
       --shapes=${shape}
   done
 done
+
+shapes=(
+  "large"
+)
+# input_type;input_layout;kernel_type;kernel_layout;acc_type
+type_and_layout_combinations=(
+  "f16;nhwc;f16;hwcf;f32"
+  "f16;nchw;f16;fchw;f32"
+  "i8;nhwc;i8;hwcf;i32"
+)
+for type_and_layout_combination in ${type_and_layout_combinations[@]}; do
+  IFS=";" read -r -a combination <<< "${type_and_layout_combination}"
+  input_type="${combination[0]}"
+  input_layout="${combination[1]}"
+  kernel_type="${combination[2]}"
+  kernel_layout="${combination[3]}"
+  acc_type="${combination[4]}"
+  type_layout_name="${input_type}_${input_layout}_${kernel_type}_${kernel_layout}_${acc_type}"
+  #layout_name="${input_layout}_${kernel_layout}"
+  type_combination_dir="${generated_dir_root}/${type_layout_name}"
+  mkdir -p ${type_combination_dir}
+  for shape in ${shapes[@]}; do
+    echo "Generating conv2d test files for ${type_layout_name}_${shape}"
+    name="conv2d_${type_layout_name}_${shape}"
+    python ${this_dir}/generate_e2e_conv2d_tests.py \
+      --output_conv2d_mlir=${type_combination_dir}/${name}.mlir \
+      --output_calls_mlir=${type_combination_dir}/${name}_calls.mlir \
+      --input_type=${input_type} \
+      --input_layout=${input_layout} \
+      --kernel_type=${kernel_type} \
+      --kernel_layout=${kernel_layout} \
+      --acc_type=${acc_type} \
+      --shapes=${shape}
+  done
+done
diff --git a/...g_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir b/...g_ops/convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large.mlir
@@ -0,0 +1,8 @@
+func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%lhs: tensor<2x4x128x128xf16>, %rhs: tensor<8x4x3x3xf16>, %acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x4x128x128xf16>, tensor<8x4x3x3xf16>) outs(%acc: tensor<2x8x126x126xf32>) -> tensor<2x8x126x126xf32>
+  return %result: tensor<2x8x126x126xf32>
+}
+func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%lhs: tensor<2x3x128x128xf16>, %rhs: tensor<12x3x3x3xf16>, %acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32> {
+  %result = linalg.conv_2d_nchw_fchw {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x3x128x128xf16>, tensor<12x3x3x3xf16>) outs(%acc: tensor<2x12x126x126xf32>) -> tensor<2x12x126x126xf32>
+  return %result: tensor<2x12x126x126xf32>
+}
diff --git a/...convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir b/...convolution/generated/f16_nchw_f16_fchw_f32/conv2d_f16_nchw_f16_fchw_f32_large_calls.mlir
@@ -0,0 +1,108 @@
+builtin.module @calls attributes {
+
+} {
+
+func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view
+func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)
+func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
+func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
+
+func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32_2_4_128_128_8_3_3_acc_0() attributes {
+  iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"}
+} {
+  %device_index = arith.constant 0 : index
+  %device = hal.devices.get %device_index : !hal.device
+  %input_dim0 = arith.constant 2 : i64
+  %input_dim1 = arith.constant 4 : i64
+  %input_dim2 = arith.constant 128 : i64
+  %input_dim3 = arith.constant 128 : i64
+  %input_element_type = hal.element_type<f16> : i32
+  %input_seed = arith.constant 2 : i32
+  %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %kernel_dim0 = arith.constant 8 : i64
+  %kernel_dim1 = arith.constant 4 : i64
+  %kernel_dim2 = arith.constant 3 : i64
+  %kernel_dim3 = arith.constant 3 : i64
+  %kernel_element_type = hal.element_type<f16> : i32
+  %kernel_seed = arith.constant 3 : i32
+  %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_dim0 = arith.constant 2 : i64
+  %acc_dim1 = arith.constant 8 : i64
+  %acc_dim2 = arith.constant 126 : i64
+  %acc_dim3 = arith.constant 126 : i64
+  %acc_element_type = hal.element_type<f32> : i32
+  %acc_seed = arith.constant 4 : i32
+  %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_copy_dim0 = arith.constant 2 : i64
+  %acc_copy_dim1 = arith.constant 8 : i64
+  %acc_copy_dim2 = arith.constant 126 : i64
+  %acc_copy_dim3 = arith.constant 126 : i64
+  %acc_copy_element_type = hal.element_type<f32> : i32
+  %acc_copy_seed = arith.constant 4 : i32
+  %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
+  %n = arith.constant 2 : i64
+  %c = arith.constant 4 : i64
+  %h = arith.constant 128 : i64
+  %w = arith.constant 128 : i64
+  %f = arith.constant 8 : i64
+  %kh = arith.constant 3 : i64
+  %kw = arith.constant 3 : i64
+  %layout = arith.constant 0 : i64
+  %sh = arith.constant 1 : i64
+  %sw = arith.constant 1 : i64
+  %dh = arith.constant 1 : i64
+  %dw = arith.constant 1 : i64
+  call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
+  return
+}
+func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32_2_3_128_128_12_3_3_acc_1() attributes {
+  iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"}
+} {
+  %device_index = arith.constant 0 : index
+  %device = hal.devices.get %device_index : !hal.device
+  %input_dim0 = arith.constant 2 : i64
+  %input_dim1 = arith.constant 3 : i64
+  %input_dim2 = arith.constant 128 : i64
+  %input_dim3 = arith.constant 128 : i64
+  %input_element_type = hal.element_type<f16> : i32
+  %input_seed = arith.constant 5 : i32
+  %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %kernel_dim0 = arith.constant 12 : i64
+  %kernel_dim1 = arith.constant 3 : i64
+  %kernel_dim2 = arith.constant 3 : i64
+  %kernel_dim3 = arith.constant 3 : i64
+  %kernel_element_type = hal.element_type<f16> : i32
+  %kernel_seed = arith.constant 6 : i32
+  %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_dim0 = arith.constant 2 : i64
+  %acc_dim1 = arith.constant 12 : i64
+  %acc_dim2 = arith.constant 126 : i64
+  %acc_dim3 = arith.constant 126 : i64
+  %acc_element_type = hal.element_type<f32> : i32
+  %acc_seed = arith.constant 7 : i32
+  %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_copy_dim0 = arith.constant 2 : i64
+  %acc_copy_dim1 = arith.constant 12 : i64
+  %acc_copy_dim2 = arith.constant 126 : i64
+  %acc_copy_dim3 = arith.constant 126 : i64
+  %acc_copy_element_type = hal.element_type<f32> : i32
+  %acc_copy_seed = arith.constant 7 : i32
+  %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
+  %n = arith.constant 2 : i64
+  %c = arith.constant 3 : i64
+  %h = arith.constant 128 : i64
+  %w = arith.constant 128 : i64
+  %f = arith.constant 12 : i64
+  %kh = arith.constant 3 : i64
+  %kw = arith.constant 3 : i64
+  %layout = arith.constant 0 : i64
+  %sh = arith.constant 1 : i64
+  %sw = arith.constant 1 : i64
+  %dh = arith.constant 1 : i64
+  %dw = arith.constant 1 : i64
+  call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
+  return
+}
+}
diff --git a/...g_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir b/...g_ops/convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large.mlir
@@ -0,0 +1,8 @@
+func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%lhs: tensor<2x128x128x4xf16>, %rhs: tensor<3x3x4x8xf16>, %acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32> {
+  %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x4xf16>, tensor<3x3x4x8xf16>) outs(%acc: tensor<2x126x126x8xf32>) -> tensor<2x126x126x8xf32>
+  return %result: tensor<2x126x126x8xf32>
+}
+func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%lhs: tensor<2x128x128x3xf16>, %rhs: tensor<3x3x3x12xf16>, %acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32> {
+  %result = linalg.conv_2d_nhwc_hwcf {dilations = dense<[1, 1]> : tensor<2xi64>, strides = dense<[1, 1]> : tensor<2xi64>} ins(%lhs, %rhs: tensor<2x128x128x3xf16>, tensor<3x3x3x12xf16>) outs(%acc: tensor<2x126x126x12xf32>) -> tensor<2x126x126x12xf32>
+  return %result: tensor<2x126x126x12xf32>
+}
diff --git a/...convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir b/...convolution/generated/f16_nhwc_f16_hwcf_f32/conv2d_f16_nhwc_f16_hwcf_f32_large_calls.mlir
@@ -0,0 +1,108 @@
+builtin.module @calls attributes {
+
+} {
+
+func.func private @conv2d_test.generate_random_tensor(%device: !hal.device, %dim0: i64, %dim1: i64, %dim2: i64, %dim3: i64, %element_type: i32, %seed: i32) -> !hal.buffer_view
+func.func private @conv2d_test.check_conv2d_results(%device: !hal.device, %n: i64, %c: i64, %h: i64, %w: i64, %f:i64, %kh:i64, %kw:i64, %layout:i64, %sh:i64, %sw:i64, %dh:i64, %dw:i64, %input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view, %actual_result: !hal.buffer_view)
+func.func private @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
+func.func private @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input: !hal.buffer_view, %kernel: !hal.buffer_view, %acc: !hal.buffer_view) -> !hal.buffer_view
+
+func.func @conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32_2_4_128_128_8_3_3_acc_0() attributes {
+  iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x4x128x128x8x3x3"}
+} {
+  %device_index = arith.constant 0 : index
+  %device = hal.devices.get %device_index : !hal.device
+  %input_dim0 = arith.constant 2 : i64
+  %input_dim1 = arith.constant 128 : i64
+  %input_dim2 = arith.constant 128 : i64
+  %input_dim3 = arith.constant 4 : i64
+  %input_element_type = hal.element_type<f16> : i32
+  %input_seed = arith.constant 2 : i32
+  %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %kernel_dim0 = arith.constant 3 : i64
+  %kernel_dim1 = arith.constant 3 : i64
+  %kernel_dim2 = arith.constant 4 : i64
+  %kernel_dim3 = arith.constant 8 : i64
+  %kernel_element_type = hal.element_type<f16> : i32
+  %kernel_seed = arith.constant 3 : i32
+  %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_dim0 = arith.constant 2 : i64
+  %acc_dim1 = arith.constant 126 : i64
+  %acc_dim2 = arith.constant 126 : i64
+  %acc_dim3 = arith.constant 8 : i64
+  %acc_element_type = hal.element_type<f32> : i32
+  %acc_seed = arith.constant 4 : i32
+  %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_copy_dim0 = arith.constant 2 : i64
+  %acc_copy_dim1 = arith.constant 126 : i64
+  %acc_copy_dim2 = arith.constant 126 : i64
+  %acc_copy_dim3 = arith.constant 8 : i64
+  %acc_copy_element_type = hal.element_type<f32> : i32
+  %acc_copy_seed = arith.constant 4 : i32
+  %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %result = call @module.conv2d_accumulate_2_4_128_128_times_3_3_8_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
+  %n = arith.constant 2 : i64
+  %c = arith.constant 4 : i64
+  %h = arith.constant 128 : i64
+  %w = arith.constant 128 : i64
+  %f = arith.constant 8 : i64
+  %kh = arith.constant 3 : i64
+  %kw = arith.constant 3 : i64
+  %layout = arith.constant 1 : i64
+  %sh = arith.constant 1 : i64
+  %sw = arith.constant 1 : i64
+  %dh = arith.constant 1 : i64
+  %dw = arith.constant 1 : i64
+  call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
+  return
+}
+func.func @conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32_2_3_128_128_12_3_3_acc_1() attributes {
+  iree.reflection = {description = "Conv2d shape (NxCxHxWxFxKHxKW): 2x3x128x128x12x3x3"}
+} {
+  %device_index = arith.constant 0 : index
+  %device = hal.devices.get %device_index : !hal.device
+  %input_dim0 = arith.constant 2 : i64
+  %input_dim1 = arith.constant 128 : i64
+  %input_dim2 = arith.constant 128 : i64
+  %input_dim3 = arith.constant 3 : i64
+  %input_element_type = hal.element_type<f16> : i32
+  %input_seed = arith.constant 5 : i32
+  %input = call @conv2d_test.generate_random_tensor(%device, %input_dim0, %input_dim1, %input_dim2, %input_dim3, %input_element_type, %input_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %kernel_dim0 = arith.constant 3 : i64
+  %kernel_dim1 = arith.constant 3 : i64
+  %kernel_dim2 = arith.constant 3 : i64
+  %kernel_dim3 = arith.constant 12 : i64
+  %kernel_element_type = hal.element_type<f16> : i32
+  %kernel_seed = arith.constant 6 : i32
+  %kernel = call @conv2d_test.generate_random_tensor(%device, %kernel_dim0, %kernel_dim1, %kernel_dim2, %kernel_dim3, %kernel_element_type, %kernel_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_dim0 = arith.constant 2 : i64
+  %acc_dim1 = arith.constant 126 : i64
+  %acc_dim2 = arith.constant 126 : i64
+  %acc_dim3 = arith.constant 12 : i64
+  %acc_element_type = hal.element_type<f32> : i32
+  %acc_seed = arith.constant 7 : i32
+  %acc = call @conv2d_test.generate_random_tensor(%device, %acc_dim0, %acc_dim1, %acc_dim2, %acc_dim3, %acc_element_type, %acc_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %acc_copy_dim0 = arith.constant 2 : i64
+  %acc_copy_dim1 = arith.constant 126 : i64
+  %acc_copy_dim2 = arith.constant 126 : i64
+  %acc_copy_dim3 = arith.constant 12 : i64
+  %acc_copy_element_type = hal.element_type<f32> : i32
+  %acc_copy_seed = arith.constant 7 : i32
+  %acc_copy = call @conv2d_test.generate_random_tensor(%device, %acc_copy_dim0, %acc_copy_dim1, %acc_copy_dim2, %acc_copy_dim3, %acc_copy_element_type, %acc_copy_seed) : (!hal.device, i64, i64, i64, i64, i32, i32) -> !hal.buffer_view
+  %result = call @module.conv2d_accumulate_2_3_128_128_times_3_3_12_dtype_f16_f16_f32(%input, %kernel, %acc_copy) : (!hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> !hal.buffer_view
+  %n = arith.constant 2 : i64
+  %c = arith.constant 3 : i64
+  %h = arith.constant 128 : i64
+  %w = arith.constant 128 : i64
+  %f = arith.constant 12 : i64
+  %kh = arith.constant 3 : i64
+  %kw = arith.constant 3 : i64
+  %layout = arith.constant 1 : i64
+  %sh = arith.constant 1 : i64
+  %sw = arith.constant 1 : i64
+  %dh = arith.constant 1 : i64
+  %dw = arith.constant 1 : i64
+  call @conv2d_test.check_conv2d_results(%device, %n, %c, %h, %w, %f, %kh, %kw, %layout, %sh, %sw, %dh, %dw, %input, %kernel, %acc, %result) : (!hal.device, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view, !hal.buffer_view) -> ()
+  return
+}
+}