From 17f1f8b816efb1085f4a64e5a4820a6e13a8f88a Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Wed, 2 Oct 2024 17:19:10 -0400 Subject: [PATCH] Drop mlir files (#6) These are generated by the benchmark frameworks, so no need to keep them stored in git. --- .gitignore | 6 +- .../attention_128x1024x128x128x1024xf16.mlir | 26 ---- ...tion_128x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_128x1024x64x64x1024xf16.mlir | 26 ---- ...ention_128x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- ...attention_128x16384x128x128x16384xf16.mlir | 26 ---- ...on_128x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_128x16384x64x64x16384xf16.mlir | 26 ---- ...tion_128x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_128x2048x128x128x2048xf16.mlir | 26 ---- ...tion_128x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_128x2048x64x64x2048xf16.mlir | 26 ---- ...ention_128x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_128x4096x128x128x4096xf16.mlir | 26 ---- ...tion_128x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_128x4096x64x64x4096xf16.mlir | 26 ---- ...ention_128x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_128x8192x128x128x8192xf16.mlir | 26 ---- ...tion_128x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_128x8192x64x64x8192xf16.mlir | 26 ---- ...ention_128x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_12x384x64x64x384xf16.mlir | 26 ---- ...attention_12x384x64x64x384xf8E4M3FNUZ.mlir | 25 --- .../attention_16x1024x128x128x1024xf16.mlir | 26 ---- ...ntion_16x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_16x1024x64x64x1024xf16.mlir | 26 ---- ...tention_16x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_16x16384x128x128x16384xf16.mlir | 26 ---- ...ion_16x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_16x16384x64x64x16384xf16.mlir | 26 ---- ...ntion_16x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_16x2048x128x128x2048xf16.mlir | 26 ---- ...ntion_16x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_16x2048x64x64x2048xf16.mlir | 26 ---- ...tention_16x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_16x4096x128x128x4096xf16.mlir | 26 ---- ...ntion_16x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_16x4096x64x64x4096xf16.mlir | 26 ---- ...tention_16x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_16x8192x128x128x8192xf16.mlir | 26 ---- ...ntion_16x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_16x8192x64x64x8192xf16.mlir | 26 ---- ...tention_16x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_192x1024x128x128x1024xf16.mlir | 26 ---- ...tion_192x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_192x1024x64x64x1024xf16.mlir | 26 ---- ...ention_192x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- ...attention_192x16384x128x128x16384xf16.mlir | 26 ---- ...on_192x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_192x16384x64x64x16384xf16.mlir | 26 ---- ...tion_192x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_192x2048x128x128x2048xf16.mlir | 26 ---- ...tion_192x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_192x2048x64x64x2048xf16.mlir | 26 ---- ...ention_192x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_192x4096x128x128x4096xf16.mlir | 26 ---- ...tion_192x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_192x4096x64x64x4096xf16.mlir | 26 ---- ...ention_192x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_192x8192x128x128x8192xf16.mlir | 26 ---- ...tion_192x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_192x8192x64x64x8192xf16.mlir | 26 ---- ...ention_192x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_1x1024x128x128x1024xf16.mlir | 26 ---- ...ention_1x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_1x1024x64x64x1024xf16.mlir | 26 ---- ...ttention_1x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_1x16384x128x128x16384xf16.mlir | 26 ---- ...tion_1x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_1x16384x64x64x16384xf16.mlir | 26 ---- ...ention_1x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_1x2048x128x128x2048xf16.mlir | 26 ---- ...ention_1x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_1x2048x64x64x2048xf16.mlir | 26 ---- ...ttention_1x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_1x4096x128x128x4096xf16.mlir | 26 ---- ...ention_1x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_1x4096x64x64x4096xf16.mlir | 26 ---- ...ttention_1x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_1x4096x64x64x64xf16.mlir | 26 ---- .../attention_1x4096x64x64x64xf8E4M3FNUZ.mlir | 25 --- .../attention_1x8192x128x128x8192xf16.mlir | 26 ---- ...ention_1x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_1x8192x64x64x8192xf16.mlir | 26 ---- ...ttention_1x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_20x4096x64x64x4096xf16.mlir | 26 ---- ...tention_20x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_20x4096x64x64x64xf16.mlir | 26 ---- ...attention_20x4096x64x64x64xf8E4M3FNUZ.mlir | 25 --- .../attention_2x1024x128x128x1024xf16.mlir | 26 ---- ...ention_2x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_2x1024x64x64x1024xf16.mlir | 26 ---- ...ttention_2x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_2x1024x64x64x64xf16.mlir | 26 ---- .../attention_2x1024x64x64x64xf8E4M3FNUZ.mlir | 25 --- .../attention_2x16384x128x128x16384xf16.mlir | 26 ---- ...tion_2x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_2x16384x64x64x16384xf16.mlir | 26 ---- ...ention_2x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_2x2048x128x128x2048xf16.mlir | 26 ---- ...ention_2x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_2x2048x64x64x2048xf16.mlir | 26 ---- ...ttention_2x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_2x4096x128x128x4096xf16.mlir | 26 ---- ...ention_2x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_2x4096x64x64x4096xf16.mlir | 26 ---- ...ttention_2x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_2x8192x128x128x8192xf16.mlir | 26 ---- ...ention_2x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_2x8192x64x64x8192xf16.mlir | 26 ---- ...ttention_2x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_32x1024x128x128x1024xf16.mlir | 26 ---- ...ntion_32x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_32x1024x64x64x1024xf16.mlir | 26 ---- ...tention_32x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_32x16384x128x128x16384xf16.mlir | 26 ---- ...ion_32x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_32x16384x64x64x16384xf16.mlir | 26 ---- ...ntion_32x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_32x2048x128x128x2048xf16.mlir | 26 ---- ...ntion_32x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_32x2048x64x64x2048xf16.mlir | 26 ---- ...tention_32x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_32x4096x128x128x4096xf16.mlir | 26 ---- ...ntion_32x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_32x4096x64x64x4096xf16.mlir | 26 ---- ...tention_32x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_32x8192x128x128x8192xf16.mlir | 26 ---- ...ntion_32x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_32x8192x64x64x8192xf16.mlir | 26 ---- ...tention_32x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_40x1024x64x64x1024xf16.mlir | 26 ---- ...tention_40x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_40x1024x64x64x64xf16.mlir | 26 ---- ...attention_40x1024x64x64x64xf8E4M3FNUZ.mlir | 25 --- .../attention_48x1024x128x128x1024xf16.mlir | 26 ---- ...ntion_48x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_48x1024x64x64x1024xf16.mlir | 26 ---- ...tention_48x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_48x16384x128x128x16384xf16.mlir | 26 ---- ...ion_48x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_48x16384x64x64x16384xf16.mlir | 26 ---- ...ntion_48x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_48x2048x128x128x2048xf16.mlir | 26 ---- ...ntion_48x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_48x2048x64x64x2048xf16.mlir | 26 ---- ...tention_48x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_48x4096x128x128x4096xf16.mlir | 26 ---- ...ntion_48x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_48x4096x64x64x4096xf16.mlir | 26 ---- ...tention_48x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_48x8192x128x128x8192xf16.mlir | 26 ---- ...ntion_48x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_48x8192x64x64x8192xf16.mlir | 26 ---- ...tention_48x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_4x1024x128x128x1024xf16.mlir | 26 ---- ...ention_4x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_4x1024x64x64x1024xf16.mlir | 26 ---- ...ttention_4x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_4x16384x128x128x16384xf16.mlir | 26 ---- ...tion_4x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_4x16384x64x64x16384xf16.mlir | 26 ---- ...ention_4x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_4x2048x128x128x2048xf16.mlir | 26 ---- ...ention_4x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_4x2048x64x64x2048xf16.mlir | 26 ---- ...ttention_4x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_4x4096x128x128x4096xf16.mlir | 26 ---- ...ention_4x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_4x4096x64x64x4096xf16.mlir | 26 ---- ...ttention_4x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_4x4096x64x64x64xf16.mlir | 26 ---- .../attention_4x4096x64x64x64xf8E4M3FNUZ.mlir | 25 --- .../attention_4x8192x128x128x8192xf16.mlir | 26 ---- ...ention_4x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_4x8192x64x64x8192xf16.mlir | 26 ---- ...ttention_4x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_64x1024x128x128x1024xf16.mlir | 26 ---- ...ntion_64x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_64x1024x64x64x1024xf16.mlir | 26 ---- ...tention_64x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_64x16384x128x128x16384xf16.mlir | 26 ---- ...ion_64x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_64x16384x64x64x16384xf16.mlir | 26 ---- ...ntion_64x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_64x2048x128x128x2048xf16.mlir | 26 ---- ...ntion_64x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_64x2048x64x64x2048xf16.mlir | 26 ---- ...tention_64x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_64x4096x128x128x4096xf16.mlir | 26 ---- ...ntion_64x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_64x4096x64x64x4096xf16.mlir | 26 ---- ...tention_64x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_64x8192x128x128x8192xf16.mlir | 26 ---- ...ntion_64x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_64x8192x64x64x8192xf16.mlir | 26 ---- ...tention_64x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_768x4096x64x64x64xf16.mlir | 26 ---- ...ttention_768x4096x64x64x64xf8E4M3FNUZ.mlir | 25 --- .../attention_8x1024x128x128x1024xf16.mlir | 26 ---- ...ention_8x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_8x1024x64x64x1024xf16.mlir | 26 ---- ...ttention_8x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_8x1024x64x64x64xf16.mlir | 26 ---- .../attention_8x1024x64x64x64xf8E4M3FNUZ.mlir | 25 --- .../attention_8x16384x128x128x16384xf16.mlir | 26 ---- ...tion_8x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_8x16384x64x64x16384xf16.mlir | 26 ---- ...ention_8x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_8x2048x128x128x2048xf16.mlir | 26 ---- ...ention_8x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_8x2048x64x64x2048xf16.mlir | 26 ---- ...ttention_8x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_8x4096x128x128x4096xf16.mlir | 26 ---- ...ention_8x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_8x4096x64x64x4096xf16.mlir | 26 ---- ...ttention_8x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_8x8192x128x128x8192xf16.mlir | 26 ---- ...ention_8x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../mlir/attention_8x8192x64x64x8192xf16.mlir | 26 ---- ...ttention_8x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_96x1024x128x128x1024xf16.mlir | 26 ---- ...ntion_96x1024x128x128x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_96x1024x64x64x1024xf16.mlir | 26 ---- ...tention_96x1024x64x64x1024xf8E4M3FNUZ.mlir | 25 --- .../attention_96x16384x128x128x16384xf16.mlir | 26 ---- ...ion_96x16384x128x128x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_96x16384x64x64x16384xf16.mlir | 26 ---- ...ntion_96x16384x64x64x16384xf8E4M3FNUZ.mlir | 25 --- .../attention_96x2048x128x128x2048xf16.mlir | 26 ---- ...ntion_96x2048x128x128x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_96x2048x64x64x2048xf16.mlir | 26 ---- ...tention_96x2048x64x64x2048xf8E4M3FNUZ.mlir | 25 --- .../attention_96x4096x128x128x4096xf16.mlir | 26 ---- ...ntion_96x4096x128x128x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_96x4096x64x64x4096xf16.mlir | 26 ---- ...tention_96x4096x64x64x4096xf8E4M3FNUZ.mlir | 25 --- .../attention_96x8192x128x128x8192xf16.mlir | 26 ---- ...ntion_96x8192x128x128x8192xf8E4M3FNUZ.mlir | 25 --- .../attention_96x8192x64x64x8192xf16.mlir | 26 ---- ...tention_96x8192x64x64x8192xf8E4M3FNUZ.mlir | 25 --- ...x112x112x64x7x7x3_f32xf32xf32_stride2.mlir | 7 - ...4x14x1024x1x1x512_f32xf32xf32_stride2.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride1.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride2.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride1.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride2.mlir | 7 - ...28x28x512x1x1x256_f32xf32xf32_stride2.mlir | 7 - ...6x56x56x64x3x3x64_f32xf32xf32_stride1.mlir | 7 - ...7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir | 7 - ...6x7x7x512x3x3x512_f32xf32xf32_stride1.mlir | 7 - ...6x7x7x512x3x3x512_f32xf32xf32_stride2.mlir | 7 - ...x112x112x64x7x7x3_f32xf32xf32_stride2.mlir | 7 - ...4x14x1024x1x1x512_f32xf32xf32_stride2.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride1.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride2.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride1.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride2.mlir | 7 - ...28x28x512x1x1x256_f32xf32xf32_stride2.mlir | 7 - ...1x56x56x64x3x3x64_f32xf32xf32_stride1.mlir | 7 - ..._1x56x56x64x7x7x3_f32xf32xf32_stride1.mlir | 8 - ...7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir | 7 - ...1x7x7x512x3x3x512_f32xf32xf32_stride1.mlir | 7 - ...1x7x7x512x3x3x512_f32xf32xf32_stride2.mlir | 7 - ...x112x112x64x7x7x3_f32xf32xf32_stride2.mlir | 7 - ...4x14x1024x1x1x512_f32xf32xf32_stride2.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride1.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride2.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride1.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride2.mlir | 7 - ...28x28x512x1x1x256_f32xf32xf32_stride2.mlir | 7 - ...2x56x56x64x3x3x64_f32xf32xf32_stride1.mlir | 7 - ...7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir | 7 - ...2x7x7x512x3x3x512_f32xf32xf32_stride1.mlir | 7 - ...2x7x7x512x3x3x512_f32xf32xf32_stride2.mlir | 7 - ...x112x112x64x7x7x3_f32xf32xf32_stride2.mlir | 7 - ...4x14x1024x1x1x512_f32xf32xf32_stride2.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride1.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride2.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride1.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride2.mlir | 7 - ...28x28x512x1x1x256_f32xf32xf32_stride2.mlir | 7 - ...2x56x56x64x3x3x64_f32xf32xf32_stride1.mlir | 7 - ...7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir | 7 - ...2x7x7x512x3x3x512_f32xf32xf32_stride1.mlir | 7 - ...2x7x7x512x3x3x512_f32xf32xf32_stride2.mlir | 7 - ...x112x112x64x7x7x3_f32xf32xf32_stride2.mlir | 7 - ...4x14x1024x1x1x512_f32xf32xf32_stride2.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride1.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride2.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride1.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride2.mlir | 7 - ...28x28x512x1x1x256_f32xf32xf32_stride2.mlir | 7 - ...8x56x56x64x3x3x64_f32xf32xf32_stride1.mlir | 7 - ...7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir | 7 - ...8x7x7x512x3x3x512_f32xf32xf32_stride1.mlir | 7 - ...8x7x7x512x3x3x512_f32xf32xf32_stride2.mlir | 7 - ...x112x112x64x7x7x3_f32xf32xf32_stride2.mlir | 7 - ...4x14x1024x1x1x512_f32xf32xf32_stride2.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride1.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride2.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride1.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride2.mlir | 7 - ...28x28x512x1x1x256_f32xf32xf32_stride2.mlir | 7 - ...4x56x56x64x3x3x64_f32xf32xf32_stride1.mlir | 7 - ...7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir | 7 - ...4x7x7x512x3x3x512_f32xf32xf32_stride1.mlir | 7 - ...4x7x7x512x3x3x512_f32xf32xf32_stride2.mlir | 7 - ...x112x112x64x7x7x3_f32xf32xf32_stride2.mlir | 7 - ...4x14x1024x1x1x512_f32xf32xf32_stride2.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride1.mlir | 7 - ...14x14x256x3x3x256_f32xf32xf32_stride2.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride1.mlir | 7 - ...28x28x128x3x3x128_f32xf32xf32_stride2.mlir | 7 - ...28x28x512x1x1x256_f32xf32xf32_stride2.mlir | 7 - ...8x56x56x64x3x3x64_f32xf32xf32_stride1.mlir | 7 - ...7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir | 7 - ...8x7x7x512x3x3x512_f32xf32xf32_stride1.mlir | 7 - ...8x7x7x512x3x3x512_f32xf32xf32_stride2.mlir | 7 - ...16x112x112x64x7x7x3_i8xi8xi32_stride2.mlir | 8 - ...x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir | 8 - ...6x14x14x256x3x3x256_i8xi8xi32_stride1.mlir | 8 - ...6x14x14x256x3x3x256_i8xi8xi32_stride2.mlir | 8 - ...6x28x28x128x3x3x128_i8xi8xi32_stride1.mlir | 8 - ...6x28x28x128x3x3x128_i8xi8xi32_stride2.mlir | 8 - ...6x28x28x512x1x1x256_i8xi8xi32_stride2.mlir | 8 - ..._16x56x56x64x3x3x64_i8xi8xi32_stride1.mlir | 8 - ...6x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir | 8 - ..._16x7x7x512x3x3x512_i8xi8xi32_stride1.mlir | 8 - ..._16x7x7x512x3x3x512_i8xi8xi32_stride2.mlir | 8 - ..._1x112x112x64x7x7x3_i8xi8xi32_stride2.mlir | 8 - ...x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir | 8 - ...1x14x14x256x3x3x256_i8xi8xi32_stride1.mlir | 8 - ...1x14x14x256x3x3x256_i8xi8xi32_stride2.mlir | 8 - ...1x28x28x128x3x3x128_i8xi8xi32_stride1.mlir | 8 - ...1x28x28x128x3x3x128_i8xi8xi32_stride2.mlir | 8 - ...1x28x28x512x1x1x256_i8xi8xi32_stride2.mlir | 8 - ...q_1x56x56x64x3x3x64_i8xi8xi32_stride1.mlir | 8 - ...1x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir | 8 - ...q_1x7x7x512x3x3x512_i8xi8xi32_stride1.mlir | 8 - ...q_1x7x7x512x3x3x512_i8xi8xi32_stride2.mlir | 8 - ..._2x112x112x64x7x7x3_i8xi8xi32_stride2.mlir | 8 - ...x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir | 8 - ...2x14x14x256x3x3x256_i8xi8xi32_stride1.mlir | 8 - ...2x14x14x256x3x3x256_i8xi8xi32_stride2.mlir | 8 - ...2x28x28x128x3x3x128_i8xi8xi32_stride1.mlir | 8 - ...2x28x28x128x3x3x128_i8xi8xi32_stride2.mlir | 8 - ...2x28x28x512x1x1x256_i8xi8xi32_stride2.mlir | 8 - ...q_2x56x56x64x3x3x64_i8xi8xi32_stride1.mlir | 8 - ...2x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir | 8 - ...q_2x7x7x512x3x3x512_i8xi8xi32_stride1.mlir | 8 - ...q_2x7x7x512x3x3x512_i8xi8xi32_stride2.mlir | 8 - ...32x112x112x64x7x7x3_i8xi8xi32_stride2.mlir | 8 - ...x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir | 8 - ...2x14x14x256x3x3x256_i8xi8xi32_stride1.mlir | 8 - ...2x14x14x256x3x3x256_i8xi8xi32_stride2.mlir | 8 - ...2x28x28x128x3x3x128_i8xi8xi32_stride1.mlir | 8 - ...2x28x28x128x3x3x128_i8xi8xi32_stride2.mlir | 8 - ...2x28x28x512x1x1x256_i8xi8xi32_stride2.mlir | 8 - ..._32x56x56x64x3x3x64_i8xi8xi32_stride1.mlir | 8 - ...2x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir | 8 - ..._32x7x7x512x3x3x512_i8xi8xi32_stride1.mlir | 8 - ..._32x7x7x512x3x3x512_i8xi8xi32_stride2.mlir | 8 - ...48x112x112x64x7x7x3_i8xi8xi32_stride2.mlir | 8 - ...x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir | 8 - ...8x14x14x256x3x3x256_i8xi8xi32_stride1.mlir | 8 - ...8x14x14x256x3x3x256_i8xi8xi32_stride2.mlir | 8 - ...8x28x28x128x3x3x128_i8xi8xi32_stride1.mlir | 8 - ...8x28x28x128x3x3x128_i8xi8xi32_stride2.mlir | 8 - ...8x28x28x512x1x1x256_i8xi8xi32_stride2.mlir | 8 - ..._48x56x56x64x3x3x64_i8xi8xi32_stride1.mlir | 8 - ...8x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir | 8 - ..._48x7x7x512x3x3x512_i8xi8xi32_stride1.mlir | 8 - ..._48x7x7x512x3x3x512_i8xi8xi32_stride2.mlir | 8 - ..._4x112x112x64x7x7x3_i8xi8xi32_stride2.mlir | 8 - ...x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir | 8 - ...4x14x14x256x3x3x256_i8xi8xi32_stride1.mlir | 8 - ...4x14x14x256x3x3x256_i8xi8xi32_stride2.mlir | 8 - ...4x28x28x128x3x3x128_i8xi8xi32_stride1.mlir | 8 - ...4x28x28x128x3x3x128_i8xi8xi32_stride2.mlir | 8 - ...4x28x28x512x1x1x256_i8xi8xi32_stride2.mlir | 8 - ...q_4x56x56x64x3x3x64_i8xi8xi32_stride1.mlir | 8 - ...4x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir | 8 - ...q_4x7x7x512x3x3x512_i8xi8xi32_stride1.mlir | 8 - ...q_4x7x7x512x3x3x512_i8xi8xi32_stride2.mlir | 8 - ..._8x112x112x64x7x7x3_i8xi8xi32_stride2.mlir | 8 - ...x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir | 8 - ...8x14x14x256x3x3x256_i8xi8xi32_stride1.mlir | 8 - ...8x14x14x256x3x3x256_i8xi8xi32_stride2.mlir | 8 - ...8x28x28x128x3x3x128_i8xi8xi32_stride1.mlir | 8 - ...8x28x28x128x3x3x128_i8xi8xi32_stride2.mlir | 8 - ...8x28x28x512x1x1x256_i8xi8xi32_stride2.mlir | 8 - ...q_8x56x56x64x3x3x64_i8xi8xi32_stride1.mlir | 8 - ...8x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir | 8 - ...q_8x7x7x512x3x3x512_i8xi8xi32_stride1.mlir | 8 - ...q_8x7x7x512x3x3x512_i8xi8xi32_stride2.mlir | 8 - gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_10240_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_10240_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_10240_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_10240_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_10240_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_10240_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_1024_5120_640_f16_tB.mlir | 145 ------------------ gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1280_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1280_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1280_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1280_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1280_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1280_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_128_1280_2048_bf16.mlir | 9 -- gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir | 10 -- gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir | 10 -- gemm/mlir/gemm_128_1280_2048_f16.mlir | 9 -- gemm/mlir/gemm_128_1280_2048_f16_tA.mlir | 10 -- gemm/mlir/gemm_128_1280_2048_f16_tB.mlir | 144 ----------------- gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_13824_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_13824_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_13824_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_13824_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_13824_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_13824_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_14336_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_14336_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_14336_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_14336_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_14336_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_14336_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_15360_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_15360_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_15360_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_15360_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_15360_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_15360_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_16000_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1920_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1920_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1920_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1920_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1920_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_1920_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_2048_10240_1280_bf16.mlir | 9 -- gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir | 10 -- gemm/mlir/gemm_2048_10240_1280_f16.mlir | 9 -- gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir | 145 ------------------ gemm/mlir/gemm_2048_1280_1280_bf16.mlir | 9 -- gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir | 10 -- gemm/mlir/gemm_2048_1280_1280_f16.mlir | 9 -- gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir | 145 ------------------ gemm/mlir/gemm_2048_1280_5120_bf16.mlir | 9 -- gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir | 10 -- gemm/mlir/gemm_2048_1280_5120_f16.mlir | 9 -- gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir | 145 ------------------ gemm/mlir/gemm_2048_2048_1024_f16.mlir | 9 -- gemm/mlir/gemm_2048_2048_65536_f16.mlir | 9 -- gemm/mlir/gemm_2048_2048_8192_f16.mlir | 9 -- gemm/mlir/gemm_2048_8192_1024_f16.mlir | 9 -- gemm/mlir/gemm_2048_8192_65536_f16.mlir | 9 -- gemm/mlir/gemm_2048_8192_8192_f16.mlir | 9 -- gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2560_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2560_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2560_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2560_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2560_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_2560_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_27648_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_27648_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_27648_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_27648_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_27648_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_27648_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_28672_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_28672_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_28672_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_28672_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_28672_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_28672_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir | 10 -- gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir | 10 -- gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir | 10 -- gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_32000_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3456_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3456_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3456_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3456_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3456_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3456_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3840_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3840_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3840_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3840_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3840_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_3840_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4000_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_4096_20480_2560_f16_tB.mlir | 145 ------------------ gemm/mlir/gemm_4096_4096_8192_bf16.mlir | 9 -- gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir | 10 -- gemm/mlir/gemm_4096_4096_8192_f16.mlir | 9 -- gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir | 10 -- gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_13824_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_1728_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_2560_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_3456_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_640_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_640_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_6912_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_13824_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_1728_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_2560_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_3456_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_640_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_640_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_6912_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_13824_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_1728_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_2560_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_3456_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_640_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_640_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_6912_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_13824_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_1728_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_2560_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_3456_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_640_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_640_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_6912_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_13824_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_1728_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_2560_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_3456_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_640_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_640_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_6912_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_1280_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_13824_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_1728_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_2560_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_3456_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_640_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_640_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_6912_f16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_5120_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_57344_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_57344_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_57344_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_57344_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_57344_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_57344_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_6912_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_6912_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_6912_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_6912_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_6912_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_6912_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7168_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7168_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7168_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7168_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7168_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7168_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7680_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7680_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7680_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7680_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7680_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_7680_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_16_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_1_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_2_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_32_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_4_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_8_5120_f16_tA.mlir | 10 -- gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8000_8_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_1024_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_14336_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_2048_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_28672_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_3584_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_4096_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_7168_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_16_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_1024_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_14336_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_2048_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_28672_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_3584_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_4096_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_7168_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_1_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2048_1024_f16.mlir | 9 -- gemm/mlir/gemm_8192_2048_65536_f16.mlir | 9 -- gemm/mlir/gemm_8192_2048_8192_f16.mlir | 9 -- gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_1024_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_14336_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_2048_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_28672_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_3584_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_4096_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_7168_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_2_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_1024_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_14336_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_2048_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_28672_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_3584_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_4096_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_7168_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_32_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_1024_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_14336_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_2048_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_28672_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_3584_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_4096_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_7168_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_4_8192_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_5120_640_bf16.mlir | 9 -- gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir | 10 -- gemm/mlir/gemm_8192_5120_640_f16.mlir | 9 -- gemm/mlir/gemm_8192_5120_640_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_5120_640_f16_tB.mlir | 145 ------------------ gemm/mlir/gemm_8192_8192_1024_f16.mlir | 9 -- gemm/mlir/gemm_8192_8192_65536_f16.mlir | 9 -- gemm/mlir/gemm_8192_8192_8192_f16.mlir | 9 -- gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_1024_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_14336_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_2048_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_28672_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_3584_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_4096_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_7168_f16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir | 10 -- gemm/mlir/gemm_8192_8_8192_f16_tA.mlir | 10 -- 929 files changed, 3 insertions(+), 13536 deletions(-) delete mode 100644 attention/mlir/attention_128x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_128x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_128x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_128x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_128x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_128x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_128x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_128x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_128x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_128x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_128x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_128x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_12x384x64x64x384xf16.mlir delete mode 100644 attention/mlir/attention_12x384x64x64x384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_16x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_16x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_16x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_16x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_16x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_16x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_16x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_16x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_16x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_16x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_16x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_192x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_192x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_192x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_192x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_192x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_192x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_192x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_192x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_192x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_192x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_192x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_1x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_1x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_1x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_1x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_1x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_1x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_1x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_1x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x4096x64x64x64xf16.mlir delete mode 100644 attention/mlir/attention_1x4096x64x64x64xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_1x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_1x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_1x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_20x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_20x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_20x4096x64x64x64xf16.mlir delete mode 100644 attention/mlir/attention_20x4096x64x64x64xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_2x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_2x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x1024x64x64x64xf16.mlir delete mode 100644 attention/mlir/attention_2x1024x64x64x64xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_2x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_2x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_2x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_2x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_2x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_2x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_2x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_2x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_2x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_32x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_32x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_32x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_32x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_32x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_32x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_32x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_32x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_32x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_32x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_32x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_40x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_40x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_40x1024x64x64x64xf16.mlir delete mode 100644 attention/mlir/attention_40x1024x64x64x64xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_48x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_48x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_48x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_48x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_48x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_48x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_48x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_48x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_48x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_48x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_48x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_4x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_4x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_4x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_4x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_4x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_4x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_4x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_4x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x4096x64x64x64xf16.mlir delete mode 100644 attention/mlir/attention_4x4096x64x64x64xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_4x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_4x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_4x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_64x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_64x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_64x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_64x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_64x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_64x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_64x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_64x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_64x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_64x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_64x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_768x4096x64x64x64xf16.mlir delete mode 100644 attention/mlir/attention_768x4096x64x64x64xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_8x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_8x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x1024x64x64x64xf16.mlir delete mode 100644 attention/mlir/attention_8x1024x64x64x64xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_8x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_8x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_8x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_8x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_8x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_8x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_8x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_8x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_8x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x1024x128x128x1024xf16.mlir delete mode 100644 attention/mlir/attention_96x1024x128x128x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x1024x64x64x1024xf16.mlir delete mode 100644 attention/mlir/attention_96x1024x64x64x1024xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x16384x128x128x16384xf16.mlir delete mode 100644 attention/mlir/attention_96x16384x128x128x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x16384x64x64x16384xf16.mlir delete mode 100644 attention/mlir/attention_96x16384x64x64x16384xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x2048x128x128x2048xf16.mlir delete mode 100644 attention/mlir/attention_96x2048x128x128x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x2048x64x64x2048xf16.mlir delete mode 100644 attention/mlir/attention_96x2048x64x64x2048xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x4096x128x128x4096xf16.mlir delete mode 100644 attention/mlir/attention_96x4096x128x128x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x4096x64x64x4096xf16.mlir delete mode 100644 attention/mlir/attention_96x4096x64x64x4096xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x8192x128x128x8192xf16.mlir delete mode 100644 attention/mlir/attention_96x8192x128x128x8192xf8E4M3FNUZ.mlir delete mode 100644 attention/mlir/attention_96x8192x64x64x8192xf16.mlir delete mode 100644 attention/mlir/attention_96x8192x64x64x8192xf8E4M3FNUZ.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x56x56x64x7x7x3_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.mlir delete mode 100644 conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.mlir delete mode 100644 gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_10240_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1024_5120_640_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1280_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_128_1280_2048_bf16.mlir delete mode 100644 gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_128_1280_2048_f16.mlir delete mode 100644 gemm/mlir/gemm_128_1280_2048_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_128_1280_2048_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_13824_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_14336_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_15360_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_16000_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_1920_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2048_10240_1280_bf16.mlir delete mode 100644 gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_2048_10240_1280_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_1280_bf16.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_1280_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_5120_bf16.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_5120_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_2048_2048_1024_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_2048_65536_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_2048_8192_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_8192_1024_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_8192_65536_f16.mlir delete mode 100644 gemm/mlir/gemm_2048_8192_8192_f16.mlir delete mode 100644 gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_2560_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_27648_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_28672_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_32000_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3456_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_3840_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4000_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4096_20480_2560_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_4096_4096_8192_bf16.mlir delete mode 100644 gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_4096_4096_8192_f16.mlir delete mode 100644 gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_13824_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_1728_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_2560_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_3456_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_640_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_640_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_6912_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_13824_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_1728_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_2560_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_3456_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_640_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_640_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_6912_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_13824_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_1728_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_2560_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_3456_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_640_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_640_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_6912_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_13824_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_1728_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_2560_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_3456_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_640_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_640_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_6912_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_13824_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_1728_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_2560_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_3456_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_640_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_640_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_6912_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_1280_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_13824_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_1728_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_2560_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_3456_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_640_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_640_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_6912_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_5120_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_57344_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_6912_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7168_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_7680_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_16_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_1_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_2_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_32_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_4_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_8_5120_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8000_8_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_1024_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_14336_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_2048_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_28672_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_3584_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_4096_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_7168_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_16_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_1024_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_14336_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_2048_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_28672_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_3584_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_4096_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_7168_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_1_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2048_1024_f16.mlir delete mode 100644 gemm/mlir/gemm_8192_2048_65536_f16.mlir delete mode 100644 gemm/mlir/gemm_8192_2048_8192_f16.mlir delete mode 100644 gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_1024_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_14336_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_2048_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_28672_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_3584_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_4096_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_7168_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_2_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_1024_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_14336_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_2048_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_28672_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_3584_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_4096_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_7168_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_32_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_1024_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_14336_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_2048_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_28672_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_3584_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_4096_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_7168_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_4_8192_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_5120_640_bf16.mlir delete mode 100644 gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir delete mode 100644 gemm/mlir/gemm_8192_5120_640_f16.mlir delete mode 100644 gemm/mlir/gemm_8192_5120_640_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_5120_640_f16_tB.mlir delete mode 100644 gemm/mlir/gemm_8192_8192_1024_f16.mlir delete mode 100644 gemm/mlir/gemm_8192_8192_65536_f16.mlir delete mode 100644 gemm/mlir/gemm_8192_8192_8192_f16.mlir delete mode 100644 gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_1024_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_14336_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_2048_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_28672_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_3584_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_4096_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_7168_f16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir delete mode 100644 gemm/mlir/gemm_8192_8_8192_f16_tA.mlir diff --git a/.gitignore b/.gitignore index b42c298..5c0612d 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,7 @@ wheelhouse bench_venv/ # Bench Artifacts -gemm/vmfb/ -attention/vmfb/ -conv/vmfb/ +attention/ +conv/ +gemm/ results/ diff --git a/attention/mlir/attention_128x1024x128x128x1024xf16.mlir b/attention/mlir/attention_128x1024x128x128x1024xf16.mlir deleted file mode 100644 index f4cf768..0000000 --- a/attention/mlir/attention_128x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x1024x128xf16> -!K = tensor<128x1024x128xf16> -!V = tensor<128x1024x128xf16> -!O = tensor<128x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 1e07e22..0000000 --- a/attention/mlir/attention_128x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x1024x128xf8E4M3FNUZ> -!K = tensor<128x1024x128xf8E4M3FNUZ> -!V = tensor<128x1024x128xf8E4M3FNUZ> -!O = tensor<128x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x1024x64x64x1024xf16.mlir b/attention/mlir/attention_128x1024x64x64x1024xf16.mlir deleted file mode 100644 index 7b8f32e..0000000 --- a/attention/mlir/attention_128x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x1024x64xf16> -!K = tensor<128x1024x64xf16> -!V = tensor<128x1024x64xf16> -!O = tensor<128x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 743e8bc..0000000 --- a/attention/mlir/attention_128x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x1024x64xf8E4M3FNUZ> -!K = tensor<128x1024x64xf8E4M3FNUZ> -!V = tensor<128x1024x64xf8E4M3FNUZ> -!O = tensor<128x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x16384x128x128x16384xf16.mlir b/attention/mlir/attention_128x16384x128x128x16384xf16.mlir deleted file mode 100644 index 9b26573..0000000 --- a/attention/mlir/attention_128x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x16384x128xf16> -!K = tensor<128x16384x128xf16> -!V = tensor<128x16384x128xf16> -!O = tensor<128x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index cf96125..0000000 --- a/attention/mlir/attention_128x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x16384x128xf8E4M3FNUZ> -!K = tensor<128x16384x128xf8E4M3FNUZ> -!V = tensor<128x16384x128xf8E4M3FNUZ> -!O = tensor<128x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x16384x64x64x16384xf16.mlir b/attention/mlir/attention_128x16384x64x64x16384xf16.mlir deleted file mode 100644 index 1961900..0000000 --- a/attention/mlir/attention_128x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x16384x64xf16> -!K = tensor<128x16384x64xf16> -!V = tensor<128x16384x64xf16> -!O = tensor<128x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index be49c2e..0000000 --- a/attention/mlir/attention_128x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x16384x64xf8E4M3FNUZ> -!K = tensor<128x16384x64xf8E4M3FNUZ> -!V = tensor<128x16384x64xf8E4M3FNUZ> -!O = tensor<128x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x2048x128x128x2048xf16.mlir b/attention/mlir/attention_128x2048x128x128x2048xf16.mlir deleted file mode 100644 index 0d60d01..0000000 --- a/attention/mlir/attention_128x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x2048x128xf16> -!K = tensor<128x2048x128xf16> -!V = tensor<128x2048x128xf16> -!O = tensor<128x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 68d7e5f..0000000 --- a/attention/mlir/attention_128x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x2048x128xf8E4M3FNUZ> -!K = tensor<128x2048x128xf8E4M3FNUZ> -!V = tensor<128x2048x128xf8E4M3FNUZ> -!O = tensor<128x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x2048x64x64x2048xf16.mlir b/attention/mlir/attention_128x2048x64x64x2048xf16.mlir deleted file mode 100644 index e4ac23f..0000000 --- a/attention/mlir/attention_128x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x2048x64xf16> -!K = tensor<128x2048x64xf16> -!V = tensor<128x2048x64xf16> -!O = tensor<128x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 57c6543..0000000 --- a/attention/mlir/attention_128x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x2048x64xf8E4M3FNUZ> -!K = tensor<128x2048x64xf8E4M3FNUZ> -!V = tensor<128x2048x64xf8E4M3FNUZ> -!O = tensor<128x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x4096x128x128x4096xf16.mlir b/attention/mlir/attention_128x4096x128x128x4096xf16.mlir deleted file mode 100644 index a3af00a..0000000 --- a/attention/mlir/attention_128x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x4096x128xf16> -!K = tensor<128x4096x128xf16> -!V = tensor<128x4096x128xf16> -!O = tensor<128x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index c4a680d..0000000 --- a/attention/mlir/attention_128x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x4096x128xf8E4M3FNUZ> -!K = tensor<128x4096x128xf8E4M3FNUZ> -!V = tensor<128x4096x128xf8E4M3FNUZ> -!O = tensor<128x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x4096x64x64x4096xf16.mlir b/attention/mlir/attention_128x4096x64x64x4096xf16.mlir deleted file mode 100644 index 4c7aa7b..0000000 --- a/attention/mlir/attention_128x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x4096x64xf16> -!K = tensor<128x4096x64xf16> -!V = tensor<128x4096x64xf16> -!O = tensor<128x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 2e612dc..0000000 --- a/attention/mlir/attention_128x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x4096x64xf8E4M3FNUZ> -!K = tensor<128x4096x64xf8E4M3FNUZ> -!V = tensor<128x4096x64xf8E4M3FNUZ> -!O = tensor<128x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x8192x128x128x8192xf16.mlir b/attention/mlir/attention_128x8192x128x128x8192xf16.mlir deleted file mode 100644 index 1dda46d..0000000 --- a/attention/mlir/attention_128x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x8192x128xf16> -!K = tensor<128x8192x128xf16> -!V = tensor<128x8192x128xf16> -!O = tensor<128x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index fa1b3e0..0000000 --- a/attention/mlir/attention_128x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x8192x128xf8E4M3FNUZ> -!K = tensor<128x8192x128xf8E4M3FNUZ> -!V = tensor<128x8192x128xf8E4M3FNUZ> -!O = tensor<128x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x8192x64x64x8192xf16.mlir b/attention/mlir/attention_128x8192x64x64x8192xf16.mlir deleted file mode 100644 index 2158d02..0000000 --- a/attention/mlir/attention_128x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<128x8192x64xf16> -!K = tensor<128x8192x64xf16> -!V = tensor<128x8192x64xf16> -!O = tensor<128x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_128x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_128x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 54339bd..0000000 --- a/attention/mlir/attention_128x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<128x8192x64xf8E4M3FNUZ> -!K = tensor<128x8192x64xf8E4M3FNUZ> -!V = tensor<128x8192x64xf8E4M3FNUZ> -!O = tensor<128x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_12x384x64x64x384xf16.mlir b/attention/mlir/attention_12x384x64x64x384xf16.mlir deleted file mode 100644 index b005a07..0000000 --- a/attention/mlir/attention_12x384x64x64x384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<12x384x64xf16> -!K = tensor<12x384x64xf16> -!V = tensor<12x384x64xf16> -!O = tensor<12x384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_12x384x64x64x384xf8E4M3FNUZ.mlir b/attention/mlir/attention_12x384x64x64x384xf8E4M3FNUZ.mlir deleted file mode 100644 index 377da3e..0000000 --- a/attention/mlir/attention_12x384x64x64x384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<12x384x64xf8E4M3FNUZ> -!K = tensor<12x384x64xf8E4M3FNUZ> -!V = tensor<12x384x64xf8E4M3FNUZ> -!O = tensor<12x384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x1024x128x128x1024xf16.mlir b/attention/mlir/attention_16x1024x128x128x1024xf16.mlir deleted file mode 100644 index 4623470..0000000 --- a/attention/mlir/attention_16x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x1024x128xf16> -!K = tensor<16x1024x128xf16> -!V = tensor<16x1024x128xf16> -!O = tensor<16x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 0b6bded..0000000 --- a/attention/mlir/attention_16x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x1024x128xf8E4M3FNUZ> -!K = tensor<16x1024x128xf8E4M3FNUZ> -!V = tensor<16x1024x128xf8E4M3FNUZ> -!O = tensor<16x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x1024x64x64x1024xf16.mlir b/attention/mlir/attention_16x1024x64x64x1024xf16.mlir deleted file mode 100644 index 50c9a78..0000000 --- a/attention/mlir/attention_16x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x1024x64xf16> -!K = tensor<16x1024x64xf16> -!V = tensor<16x1024x64xf16> -!O = tensor<16x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 86ef7f7..0000000 --- a/attention/mlir/attention_16x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x1024x64xf8E4M3FNUZ> -!K = tensor<16x1024x64xf8E4M3FNUZ> -!V = tensor<16x1024x64xf8E4M3FNUZ> -!O = tensor<16x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x16384x128x128x16384xf16.mlir b/attention/mlir/attention_16x16384x128x128x16384xf16.mlir deleted file mode 100644 index f8ade10..0000000 --- a/attention/mlir/attention_16x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x16384x128xf16> -!K = tensor<16x16384x128xf16> -!V = tensor<16x16384x128xf16> -!O = tensor<16x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 9e46f03..0000000 --- a/attention/mlir/attention_16x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x16384x128xf8E4M3FNUZ> -!K = tensor<16x16384x128xf8E4M3FNUZ> -!V = tensor<16x16384x128xf8E4M3FNUZ> -!O = tensor<16x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x16384x64x64x16384xf16.mlir b/attention/mlir/attention_16x16384x64x64x16384xf16.mlir deleted file mode 100644 index 047fb68..0000000 --- a/attention/mlir/attention_16x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x16384x64xf16> -!K = tensor<16x16384x64xf16> -!V = tensor<16x16384x64xf16> -!O = tensor<16x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 6c876c0..0000000 --- a/attention/mlir/attention_16x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x16384x64xf8E4M3FNUZ> -!K = tensor<16x16384x64xf8E4M3FNUZ> -!V = tensor<16x16384x64xf8E4M3FNUZ> -!O = tensor<16x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x2048x128x128x2048xf16.mlir b/attention/mlir/attention_16x2048x128x128x2048xf16.mlir deleted file mode 100644 index ea249c7..0000000 --- a/attention/mlir/attention_16x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x2048x128xf16> -!K = tensor<16x2048x128xf16> -!V = tensor<16x2048x128xf16> -!O = tensor<16x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 12d772f..0000000 --- a/attention/mlir/attention_16x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x2048x128xf8E4M3FNUZ> -!K = tensor<16x2048x128xf8E4M3FNUZ> -!V = tensor<16x2048x128xf8E4M3FNUZ> -!O = tensor<16x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x2048x64x64x2048xf16.mlir b/attention/mlir/attention_16x2048x64x64x2048xf16.mlir deleted file mode 100644 index b73d4a8..0000000 --- a/attention/mlir/attention_16x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x2048x64xf16> -!K = tensor<16x2048x64xf16> -!V = tensor<16x2048x64xf16> -!O = tensor<16x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index a965fae..0000000 --- a/attention/mlir/attention_16x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x2048x64xf8E4M3FNUZ> -!K = tensor<16x2048x64xf8E4M3FNUZ> -!V = tensor<16x2048x64xf8E4M3FNUZ> -!O = tensor<16x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x4096x128x128x4096xf16.mlir b/attention/mlir/attention_16x4096x128x128x4096xf16.mlir deleted file mode 100644 index 754a68d..0000000 --- a/attention/mlir/attention_16x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x4096x128xf16> -!K = tensor<16x4096x128xf16> -!V = tensor<16x4096x128xf16> -!O = tensor<16x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 2fc1642..0000000 --- a/attention/mlir/attention_16x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x4096x128xf8E4M3FNUZ> -!K = tensor<16x4096x128xf8E4M3FNUZ> -!V = tensor<16x4096x128xf8E4M3FNUZ> -!O = tensor<16x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x4096x64x64x4096xf16.mlir b/attention/mlir/attention_16x4096x64x64x4096xf16.mlir deleted file mode 100644 index c673c2c..0000000 --- a/attention/mlir/attention_16x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x4096x64xf16> -!K = tensor<16x4096x64xf16> -!V = tensor<16x4096x64xf16> -!O = tensor<16x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 7a71898..0000000 --- a/attention/mlir/attention_16x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x4096x64xf8E4M3FNUZ> -!K = tensor<16x4096x64xf8E4M3FNUZ> -!V = tensor<16x4096x64xf8E4M3FNUZ> -!O = tensor<16x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x8192x128x128x8192xf16.mlir b/attention/mlir/attention_16x8192x128x128x8192xf16.mlir deleted file mode 100644 index e9642ad..0000000 --- a/attention/mlir/attention_16x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x8192x128xf16> -!K = tensor<16x8192x128xf16> -!V = tensor<16x8192x128xf16> -!O = tensor<16x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 773130f..0000000 --- a/attention/mlir/attention_16x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x8192x128xf8E4M3FNUZ> -!K = tensor<16x8192x128xf8E4M3FNUZ> -!V = tensor<16x8192x128xf8E4M3FNUZ> -!O = tensor<16x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x8192x64x64x8192xf16.mlir b/attention/mlir/attention_16x8192x64x64x8192xf16.mlir deleted file mode 100644 index b7134c4..0000000 --- a/attention/mlir/attention_16x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<16x8192x64xf16> -!K = tensor<16x8192x64xf16> -!V = tensor<16x8192x64xf16> -!O = tensor<16x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_16x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_16x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 385f513..0000000 --- a/attention/mlir/attention_16x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<16x8192x64xf8E4M3FNUZ> -!K = tensor<16x8192x64xf8E4M3FNUZ> -!V = tensor<16x8192x64xf8E4M3FNUZ> -!O = tensor<16x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x1024x128x128x1024xf16.mlir b/attention/mlir/attention_192x1024x128x128x1024xf16.mlir deleted file mode 100644 index d568d56..0000000 --- a/attention/mlir/attention_192x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x1024x128xf16> -!K = tensor<192x1024x128xf16> -!V = tensor<192x1024x128xf16> -!O = tensor<192x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 198e2a9..0000000 --- a/attention/mlir/attention_192x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x1024x128xf8E4M3FNUZ> -!K = tensor<192x1024x128xf8E4M3FNUZ> -!V = tensor<192x1024x128xf8E4M3FNUZ> -!O = tensor<192x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x1024x64x64x1024xf16.mlir b/attention/mlir/attention_192x1024x64x64x1024xf16.mlir deleted file mode 100644 index 0ae8348..0000000 --- a/attention/mlir/attention_192x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x1024x64xf16> -!K = tensor<192x1024x64xf16> -!V = tensor<192x1024x64xf16> -!O = tensor<192x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 5167cf9..0000000 --- a/attention/mlir/attention_192x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x1024x64xf8E4M3FNUZ> -!K = tensor<192x1024x64xf8E4M3FNUZ> -!V = tensor<192x1024x64xf8E4M3FNUZ> -!O = tensor<192x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x16384x128x128x16384xf16.mlir b/attention/mlir/attention_192x16384x128x128x16384xf16.mlir deleted file mode 100644 index 5a0016f..0000000 --- a/attention/mlir/attention_192x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x16384x128xf16> -!K = tensor<192x16384x128xf16> -!V = tensor<192x16384x128xf16> -!O = tensor<192x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 4932489..0000000 --- a/attention/mlir/attention_192x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x16384x128xf8E4M3FNUZ> -!K = tensor<192x16384x128xf8E4M3FNUZ> -!V = tensor<192x16384x128xf8E4M3FNUZ> -!O = tensor<192x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x16384x64x64x16384xf16.mlir b/attention/mlir/attention_192x16384x64x64x16384xf16.mlir deleted file mode 100644 index e8d0bed..0000000 --- a/attention/mlir/attention_192x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x16384x64xf16> -!K = tensor<192x16384x64xf16> -!V = tensor<192x16384x64xf16> -!O = tensor<192x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index aa089b4..0000000 --- a/attention/mlir/attention_192x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x16384x64xf8E4M3FNUZ> -!K = tensor<192x16384x64xf8E4M3FNUZ> -!V = tensor<192x16384x64xf8E4M3FNUZ> -!O = tensor<192x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x2048x128x128x2048xf16.mlir b/attention/mlir/attention_192x2048x128x128x2048xf16.mlir deleted file mode 100644 index 3d248d7..0000000 --- a/attention/mlir/attention_192x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x2048x128xf16> -!K = tensor<192x2048x128xf16> -!V = tensor<192x2048x128xf16> -!O = tensor<192x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index ced5bb1..0000000 --- a/attention/mlir/attention_192x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x2048x128xf8E4M3FNUZ> -!K = tensor<192x2048x128xf8E4M3FNUZ> -!V = tensor<192x2048x128xf8E4M3FNUZ> -!O = tensor<192x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x2048x64x64x2048xf16.mlir b/attention/mlir/attention_192x2048x64x64x2048xf16.mlir deleted file mode 100644 index 95391b8..0000000 --- a/attention/mlir/attention_192x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x2048x64xf16> -!K = tensor<192x2048x64xf16> -!V = tensor<192x2048x64xf16> -!O = tensor<192x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index cd6006d..0000000 --- a/attention/mlir/attention_192x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x2048x64xf8E4M3FNUZ> -!K = tensor<192x2048x64xf8E4M3FNUZ> -!V = tensor<192x2048x64xf8E4M3FNUZ> -!O = tensor<192x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x4096x128x128x4096xf16.mlir b/attention/mlir/attention_192x4096x128x128x4096xf16.mlir deleted file mode 100644 index b33089f..0000000 --- a/attention/mlir/attention_192x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x4096x128xf16> -!K = tensor<192x4096x128xf16> -!V = tensor<192x4096x128xf16> -!O = tensor<192x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index c42edaa..0000000 --- a/attention/mlir/attention_192x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x4096x128xf8E4M3FNUZ> -!K = tensor<192x4096x128xf8E4M3FNUZ> -!V = tensor<192x4096x128xf8E4M3FNUZ> -!O = tensor<192x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x4096x64x64x4096xf16.mlir b/attention/mlir/attention_192x4096x64x64x4096xf16.mlir deleted file mode 100644 index 0a4df89..0000000 --- a/attention/mlir/attention_192x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x4096x64xf16> -!K = tensor<192x4096x64xf16> -!V = tensor<192x4096x64xf16> -!O = tensor<192x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 7ba5e9a..0000000 --- a/attention/mlir/attention_192x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x4096x64xf8E4M3FNUZ> -!K = tensor<192x4096x64xf8E4M3FNUZ> -!V = tensor<192x4096x64xf8E4M3FNUZ> -!O = tensor<192x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x8192x128x128x8192xf16.mlir b/attention/mlir/attention_192x8192x128x128x8192xf16.mlir deleted file mode 100644 index 9f5cacd..0000000 --- a/attention/mlir/attention_192x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x8192x128xf16> -!K = tensor<192x8192x128xf16> -!V = tensor<192x8192x128xf16> -!O = tensor<192x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index f65c32f..0000000 --- a/attention/mlir/attention_192x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x8192x128xf8E4M3FNUZ> -!K = tensor<192x8192x128xf8E4M3FNUZ> -!V = tensor<192x8192x128xf8E4M3FNUZ> -!O = tensor<192x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x8192x64x64x8192xf16.mlir b/attention/mlir/attention_192x8192x64x64x8192xf16.mlir deleted file mode 100644 index dd75314..0000000 --- a/attention/mlir/attention_192x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<192x8192x64xf16> -!K = tensor<192x8192x64xf16> -!V = tensor<192x8192x64xf16> -!O = tensor<192x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_192x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_192x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 6e02f7f..0000000 --- a/attention/mlir/attention_192x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<192x8192x64xf8E4M3FNUZ> -!K = tensor<192x8192x64xf8E4M3FNUZ> -!V = tensor<192x8192x64xf8E4M3FNUZ> -!O = tensor<192x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x1024x128x128x1024xf16.mlir b/attention/mlir/attention_1x1024x128x128x1024xf16.mlir deleted file mode 100644 index d0033c3..0000000 --- a/attention/mlir/attention_1x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x1024x128xf16> -!K = tensor<1x1024x128xf16> -!V = tensor<1x1024x128xf16> -!O = tensor<1x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 6f4262a..0000000 --- a/attention/mlir/attention_1x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x1024x128xf8E4M3FNUZ> -!K = tensor<1x1024x128xf8E4M3FNUZ> -!V = tensor<1x1024x128xf8E4M3FNUZ> -!O = tensor<1x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x1024x64x64x1024xf16.mlir b/attention/mlir/attention_1x1024x64x64x1024xf16.mlir deleted file mode 100644 index 801f7a1..0000000 --- a/attention/mlir/attention_1x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x1024x64xf16> -!K = tensor<1x1024x64xf16> -!V = tensor<1x1024x64xf16> -!O = tensor<1x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index ff0265b..0000000 --- a/attention/mlir/attention_1x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x1024x64xf8E4M3FNUZ> -!K = tensor<1x1024x64xf8E4M3FNUZ> -!V = tensor<1x1024x64xf8E4M3FNUZ> -!O = tensor<1x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x16384x128x128x16384xf16.mlir b/attention/mlir/attention_1x16384x128x128x16384xf16.mlir deleted file mode 100644 index 9de41f4..0000000 --- a/attention/mlir/attention_1x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x16384x128xf16> -!K = tensor<1x16384x128xf16> -!V = tensor<1x16384x128xf16> -!O = tensor<1x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index e2a3332..0000000 --- a/attention/mlir/attention_1x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x16384x128xf8E4M3FNUZ> -!K = tensor<1x16384x128xf8E4M3FNUZ> -!V = tensor<1x16384x128xf8E4M3FNUZ> -!O = tensor<1x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x16384x64x64x16384xf16.mlir b/attention/mlir/attention_1x16384x64x64x16384xf16.mlir deleted file mode 100644 index c6c641c..0000000 --- a/attention/mlir/attention_1x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x16384x64xf16> -!K = tensor<1x16384x64xf16> -!V = tensor<1x16384x64xf16> -!O = tensor<1x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index b1243c4..0000000 --- a/attention/mlir/attention_1x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x16384x64xf8E4M3FNUZ> -!K = tensor<1x16384x64xf8E4M3FNUZ> -!V = tensor<1x16384x64xf8E4M3FNUZ> -!O = tensor<1x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x2048x128x128x2048xf16.mlir b/attention/mlir/attention_1x2048x128x128x2048xf16.mlir deleted file mode 100644 index 33118e6..0000000 --- a/attention/mlir/attention_1x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x2048x128xf16> -!K = tensor<1x2048x128xf16> -!V = tensor<1x2048x128xf16> -!O = tensor<1x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index edc2893..0000000 --- a/attention/mlir/attention_1x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x2048x128xf8E4M3FNUZ> -!K = tensor<1x2048x128xf8E4M3FNUZ> -!V = tensor<1x2048x128xf8E4M3FNUZ> -!O = tensor<1x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x2048x64x64x2048xf16.mlir b/attention/mlir/attention_1x2048x64x64x2048xf16.mlir deleted file mode 100644 index af1fe5e..0000000 --- a/attention/mlir/attention_1x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x2048x64xf16> -!K = tensor<1x2048x64xf16> -!V = tensor<1x2048x64xf16> -!O = tensor<1x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index fc712b1..0000000 --- a/attention/mlir/attention_1x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x2048x64xf8E4M3FNUZ> -!K = tensor<1x2048x64xf8E4M3FNUZ> -!V = tensor<1x2048x64xf8E4M3FNUZ> -!O = tensor<1x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x4096x128x128x4096xf16.mlir b/attention/mlir/attention_1x4096x128x128x4096xf16.mlir deleted file mode 100644 index c65d072..0000000 --- a/attention/mlir/attention_1x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x4096x128xf16> -!K = tensor<1x4096x128xf16> -!V = tensor<1x4096x128xf16> -!O = tensor<1x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 11503c2..0000000 --- a/attention/mlir/attention_1x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x4096x128xf8E4M3FNUZ> -!K = tensor<1x4096x128xf8E4M3FNUZ> -!V = tensor<1x4096x128xf8E4M3FNUZ> -!O = tensor<1x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x4096x64x64x4096xf16.mlir b/attention/mlir/attention_1x4096x64x64x4096xf16.mlir deleted file mode 100644 index ffb81b9..0000000 --- a/attention/mlir/attention_1x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x4096x64xf16> -!K = tensor<1x4096x64xf16> -!V = tensor<1x4096x64xf16> -!O = tensor<1x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index c06f231..0000000 --- a/attention/mlir/attention_1x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x4096x64xf8E4M3FNUZ> -!K = tensor<1x4096x64xf8E4M3FNUZ> -!V = tensor<1x4096x64xf8E4M3FNUZ> -!O = tensor<1x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x4096x64x64x64xf16.mlir b/attention/mlir/attention_1x4096x64x64x64xf16.mlir deleted file mode 100644 index f514c5f..0000000 --- a/attention/mlir/attention_1x4096x64x64x64xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x4096x64xf16> -!K = tensor<1x64x64xf16> -!V = tensor<1x64x64xf16> -!O = tensor<1x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x4096x64x64x64xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x4096x64x64x64xf8E4M3FNUZ.mlir deleted file mode 100644 index 277080c..0000000 --- a/attention/mlir/attention_1x4096x64x64x64xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x4096x64xf8E4M3FNUZ> -!K = tensor<1x64x64xf8E4M3FNUZ> -!V = tensor<1x64x64xf8E4M3FNUZ> -!O = tensor<1x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x8192x128x128x8192xf16.mlir b/attention/mlir/attention_1x8192x128x128x8192xf16.mlir deleted file mode 100644 index 8f95a6e..0000000 --- a/attention/mlir/attention_1x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x8192x128xf16> -!K = tensor<1x8192x128xf16> -!V = tensor<1x8192x128xf16> -!O = tensor<1x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 296ea87..0000000 --- a/attention/mlir/attention_1x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x8192x128xf8E4M3FNUZ> -!K = tensor<1x8192x128xf8E4M3FNUZ> -!V = tensor<1x8192x128xf8E4M3FNUZ> -!O = tensor<1x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x8192x64x64x8192xf16.mlir b/attention/mlir/attention_1x8192x64x64x8192xf16.mlir deleted file mode 100644 index 8164497..0000000 --- a/attention/mlir/attention_1x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<1x8192x64xf16> -!K = tensor<1x8192x64xf16> -!V = tensor<1x8192x64xf16> -!O = tensor<1x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_1x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_1x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index ca010f7..0000000 --- a/attention/mlir/attention_1x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<1x8192x64xf8E4M3FNUZ> -!K = tensor<1x8192x64xf8E4M3FNUZ> -!V = tensor<1x8192x64xf8E4M3FNUZ> -!O = tensor<1x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_20x4096x64x64x4096xf16.mlir b/attention/mlir/attention_20x4096x64x64x4096xf16.mlir deleted file mode 100644 index 5902946..0000000 --- a/attention/mlir/attention_20x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<20x4096x64xf16> -!K = tensor<20x4096x64xf16> -!V = tensor<20x4096x64xf16> -!O = tensor<20x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_20x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_20x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index b1049ac..0000000 --- a/attention/mlir/attention_20x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<20x4096x64xf8E4M3FNUZ> -!K = tensor<20x4096x64xf8E4M3FNUZ> -!V = tensor<20x4096x64xf8E4M3FNUZ> -!O = tensor<20x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_20x4096x64x64x64xf16.mlir b/attention/mlir/attention_20x4096x64x64x64xf16.mlir deleted file mode 100644 index 125de49..0000000 --- a/attention/mlir/attention_20x4096x64x64x64xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<20x4096x64xf16> -!K = tensor<20x64x64xf16> -!V = tensor<20x64x64xf16> -!O = tensor<20x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_20x4096x64x64x64xf8E4M3FNUZ.mlir b/attention/mlir/attention_20x4096x64x64x64xf8E4M3FNUZ.mlir deleted file mode 100644 index 9a419fa..0000000 --- a/attention/mlir/attention_20x4096x64x64x64xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<20x4096x64xf8E4M3FNUZ> -!K = tensor<20x64x64xf8E4M3FNUZ> -!V = tensor<20x64x64xf8E4M3FNUZ> -!O = tensor<20x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x1024x128x128x1024xf16.mlir b/attention/mlir/attention_2x1024x128x128x1024xf16.mlir deleted file mode 100644 index 74b26d4..0000000 --- a/attention/mlir/attention_2x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x1024x128xf16> -!K = tensor<2x1024x128xf16> -!V = tensor<2x1024x128xf16> -!O = tensor<2x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index af9abef..0000000 --- a/attention/mlir/attention_2x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x1024x128xf8E4M3FNUZ> -!K = tensor<2x1024x128xf8E4M3FNUZ> -!V = tensor<2x1024x128xf8E4M3FNUZ> -!O = tensor<2x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x1024x64x64x1024xf16.mlir b/attention/mlir/attention_2x1024x64x64x1024xf16.mlir deleted file mode 100644 index 88d5950..0000000 --- a/attention/mlir/attention_2x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x1024x64xf16> -!K = tensor<2x1024x64xf16> -!V = tensor<2x1024x64xf16> -!O = tensor<2x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index e5c0053..0000000 --- a/attention/mlir/attention_2x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x1024x64xf8E4M3FNUZ> -!K = tensor<2x1024x64xf8E4M3FNUZ> -!V = tensor<2x1024x64xf8E4M3FNUZ> -!O = tensor<2x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x1024x64x64x64xf16.mlir b/attention/mlir/attention_2x1024x64x64x64xf16.mlir deleted file mode 100644 index 2bfed9b..0000000 --- a/attention/mlir/attention_2x1024x64x64x64xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x1024x64xf16> -!K = tensor<2x64x64xf16> -!V = tensor<2x64x64xf16> -!O = tensor<2x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x1024x64x64x64xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x1024x64x64x64xf8E4M3FNUZ.mlir deleted file mode 100644 index 33facf8..0000000 --- a/attention/mlir/attention_2x1024x64x64x64xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x1024x64xf8E4M3FNUZ> -!K = tensor<2x64x64xf8E4M3FNUZ> -!V = tensor<2x64x64xf8E4M3FNUZ> -!O = tensor<2x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x16384x128x128x16384xf16.mlir b/attention/mlir/attention_2x16384x128x128x16384xf16.mlir deleted file mode 100644 index 19d2a0b..0000000 --- a/attention/mlir/attention_2x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x16384x128xf16> -!K = tensor<2x16384x128xf16> -!V = tensor<2x16384x128xf16> -!O = tensor<2x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 7183642..0000000 --- a/attention/mlir/attention_2x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x16384x128xf8E4M3FNUZ> -!K = tensor<2x16384x128xf8E4M3FNUZ> -!V = tensor<2x16384x128xf8E4M3FNUZ> -!O = tensor<2x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x16384x64x64x16384xf16.mlir b/attention/mlir/attention_2x16384x64x64x16384xf16.mlir deleted file mode 100644 index ce2a318..0000000 --- a/attention/mlir/attention_2x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x16384x64xf16> -!K = tensor<2x16384x64xf16> -!V = tensor<2x16384x64xf16> -!O = tensor<2x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 4205d23..0000000 --- a/attention/mlir/attention_2x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x16384x64xf8E4M3FNUZ> -!K = tensor<2x16384x64xf8E4M3FNUZ> -!V = tensor<2x16384x64xf8E4M3FNUZ> -!O = tensor<2x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x2048x128x128x2048xf16.mlir b/attention/mlir/attention_2x2048x128x128x2048xf16.mlir deleted file mode 100644 index a9ee68d..0000000 --- a/attention/mlir/attention_2x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x2048x128xf16> -!K = tensor<2x2048x128xf16> -!V = tensor<2x2048x128xf16> -!O = tensor<2x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index b39d931..0000000 --- a/attention/mlir/attention_2x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x2048x128xf8E4M3FNUZ> -!K = tensor<2x2048x128xf8E4M3FNUZ> -!V = tensor<2x2048x128xf8E4M3FNUZ> -!O = tensor<2x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x2048x64x64x2048xf16.mlir b/attention/mlir/attention_2x2048x64x64x2048xf16.mlir deleted file mode 100644 index d5d1fff..0000000 --- a/attention/mlir/attention_2x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x2048x64xf16> -!K = tensor<2x2048x64xf16> -!V = tensor<2x2048x64xf16> -!O = tensor<2x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 3b8d113..0000000 --- a/attention/mlir/attention_2x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x2048x64xf8E4M3FNUZ> -!K = tensor<2x2048x64xf8E4M3FNUZ> -!V = tensor<2x2048x64xf8E4M3FNUZ> -!O = tensor<2x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x4096x128x128x4096xf16.mlir b/attention/mlir/attention_2x4096x128x128x4096xf16.mlir deleted file mode 100644 index d150478..0000000 --- a/attention/mlir/attention_2x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x4096x128xf16> -!K = tensor<2x4096x128xf16> -!V = tensor<2x4096x128xf16> -!O = tensor<2x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 838f730..0000000 --- a/attention/mlir/attention_2x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x4096x128xf8E4M3FNUZ> -!K = tensor<2x4096x128xf8E4M3FNUZ> -!V = tensor<2x4096x128xf8E4M3FNUZ> -!O = tensor<2x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x4096x64x64x4096xf16.mlir b/attention/mlir/attention_2x4096x64x64x4096xf16.mlir deleted file mode 100644 index f36c84d..0000000 --- a/attention/mlir/attention_2x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x4096x64xf16> -!K = tensor<2x4096x64xf16> -!V = tensor<2x4096x64xf16> -!O = tensor<2x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 6b96d01..0000000 --- a/attention/mlir/attention_2x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x4096x64xf8E4M3FNUZ> -!K = tensor<2x4096x64xf8E4M3FNUZ> -!V = tensor<2x4096x64xf8E4M3FNUZ> -!O = tensor<2x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x8192x128x128x8192xf16.mlir b/attention/mlir/attention_2x8192x128x128x8192xf16.mlir deleted file mode 100644 index 91c7a35..0000000 --- a/attention/mlir/attention_2x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x8192x128xf16> -!K = tensor<2x8192x128xf16> -!V = tensor<2x8192x128xf16> -!O = tensor<2x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 49022f9..0000000 --- a/attention/mlir/attention_2x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x8192x128xf8E4M3FNUZ> -!K = tensor<2x8192x128xf8E4M3FNUZ> -!V = tensor<2x8192x128xf8E4M3FNUZ> -!O = tensor<2x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x8192x64x64x8192xf16.mlir b/attention/mlir/attention_2x8192x64x64x8192xf16.mlir deleted file mode 100644 index 4a6854b..0000000 --- a/attention/mlir/attention_2x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<2x8192x64xf16> -!K = tensor<2x8192x64xf16> -!V = tensor<2x8192x64xf16> -!O = tensor<2x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_2x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_2x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 6c1be4a..0000000 --- a/attention/mlir/attention_2x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<2x8192x64xf8E4M3FNUZ> -!K = tensor<2x8192x64xf8E4M3FNUZ> -!V = tensor<2x8192x64xf8E4M3FNUZ> -!O = tensor<2x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x1024x128x128x1024xf16.mlir b/attention/mlir/attention_32x1024x128x128x1024xf16.mlir deleted file mode 100644 index b49b18d..0000000 --- a/attention/mlir/attention_32x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x1024x128xf16> -!K = tensor<32x1024x128xf16> -!V = tensor<32x1024x128xf16> -!O = tensor<32x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 42307f6..0000000 --- a/attention/mlir/attention_32x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x1024x128xf8E4M3FNUZ> -!K = tensor<32x1024x128xf8E4M3FNUZ> -!V = tensor<32x1024x128xf8E4M3FNUZ> -!O = tensor<32x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x1024x64x64x1024xf16.mlir b/attention/mlir/attention_32x1024x64x64x1024xf16.mlir deleted file mode 100644 index 368bdb0..0000000 --- a/attention/mlir/attention_32x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x1024x64xf16> -!K = tensor<32x1024x64xf16> -!V = tensor<32x1024x64xf16> -!O = tensor<32x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 2749162..0000000 --- a/attention/mlir/attention_32x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x1024x64xf8E4M3FNUZ> -!K = tensor<32x1024x64xf8E4M3FNUZ> -!V = tensor<32x1024x64xf8E4M3FNUZ> -!O = tensor<32x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x16384x128x128x16384xf16.mlir b/attention/mlir/attention_32x16384x128x128x16384xf16.mlir deleted file mode 100644 index e8e0305..0000000 --- a/attention/mlir/attention_32x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x16384x128xf16> -!K = tensor<32x16384x128xf16> -!V = tensor<32x16384x128xf16> -!O = tensor<32x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 678a5f4..0000000 --- a/attention/mlir/attention_32x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x16384x128xf8E4M3FNUZ> -!K = tensor<32x16384x128xf8E4M3FNUZ> -!V = tensor<32x16384x128xf8E4M3FNUZ> -!O = tensor<32x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x16384x64x64x16384xf16.mlir b/attention/mlir/attention_32x16384x64x64x16384xf16.mlir deleted file mode 100644 index f9a8903..0000000 --- a/attention/mlir/attention_32x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x16384x64xf16> -!K = tensor<32x16384x64xf16> -!V = tensor<32x16384x64xf16> -!O = tensor<32x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index be3a447..0000000 --- a/attention/mlir/attention_32x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x16384x64xf8E4M3FNUZ> -!K = tensor<32x16384x64xf8E4M3FNUZ> -!V = tensor<32x16384x64xf8E4M3FNUZ> -!O = tensor<32x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x2048x128x128x2048xf16.mlir b/attention/mlir/attention_32x2048x128x128x2048xf16.mlir deleted file mode 100644 index da02867..0000000 --- a/attention/mlir/attention_32x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x2048x128xf16> -!K = tensor<32x2048x128xf16> -!V = tensor<32x2048x128xf16> -!O = tensor<32x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 3ab545a..0000000 --- a/attention/mlir/attention_32x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x2048x128xf8E4M3FNUZ> -!K = tensor<32x2048x128xf8E4M3FNUZ> -!V = tensor<32x2048x128xf8E4M3FNUZ> -!O = tensor<32x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x2048x64x64x2048xf16.mlir b/attention/mlir/attention_32x2048x64x64x2048xf16.mlir deleted file mode 100644 index 8b4ec4f..0000000 --- a/attention/mlir/attention_32x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x2048x64xf16> -!K = tensor<32x2048x64xf16> -!V = tensor<32x2048x64xf16> -!O = tensor<32x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index d256708..0000000 --- a/attention/mlir/attention_32x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x2048x64xf8E4M3FNUZ> -!K = tensor<32x2048x64xf8E4M3FNUZ> -!V = tensor<32x2048x64xf8E4M3FNUZ> -!O = tensor<32x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x4096x128x128x4096xf16.mlir b/attention/mlir/attention_32x4096x128x128x4096xf16.mlir deleted file mode 100644 index b12bd54..0000000 --- a/attention/mlir/attention_32x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x4096x128xf16> -!K = tensor<32x4096x128xf16> -!V = tensor<32x4096x128xf16> -!O = tensor<32x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index b9f6c07..0000000 --- a/attention/mlir/attention_32x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x4096x128xf8E4M3FNUZ> -!K = tensor<32x4096x128xf8E4M3FNUZ> -!V = tensor<32x4096x128xf8E4M3FNUZ> -!O = tensor<32x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x4096x64x64x4096xf16.mlir b/attention/mlir/attention_32x4096x64x64x4096xf16.mlir deleted file mode 100644 index e7e4b2f..0000000 --- a/attention/mlir/attention_32x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x4096x64xf16> -!K = tensor<32x4096x64xf16> -!V = tensor<32x4096x64xf16> -!O = tensor<32x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index df3cf40..0000000 --- a/attention/mlir/attention_32x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x4096x64xf8E4M3FNUZ> -!K = tensor<32x4096x64xf8E4M3FNUZ> -!V = tensor<32x4096x64xf8E4M3FNUZ> -!O = tensor<32x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x8192x128x128x8192xf16.mlir b/attention/mlir/attention_32x8192x128x128x8192xf16.mlir deleted file mode 100644 index 1198ace..0000000 --- a/attention/mlir/attention_32x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x8192x128xf16> -!K = tensor<32x8192x128xf16> -!V = tensor<32x8192x128xf16> -!O = tensor<32x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 2477ea2..0000000 --- a/attention/mlir/attention_32x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x8192x128xf8E4M3FNUZ> -!K = tensor<32x8192x128xf8E4M3FNUZ> -!V = tensor<32x8192x128xf8E4M3FNUZ> -!O = tensor<32x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x8192x64x64x8192xf16.mlir b/attention/mlir/attention_32x8192x64x64x8192xf16.mlir deleted file mode 100644 index 631bded..0000000 --- a/attention/mlir/attention_32x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<32x8192x64xf16> -!K = tensor<32x8192x64xf16> -!V = tensor<32x8192x64xf16> -!O = tensor<32x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_32x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_32x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 44d9a16..0000000 --- a/attention/mlir/attention_32x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<32x8192x64xf8E4M3FNUZ> -!K = tensor<32x8192x64xf8E4M3FNUZ> -!V = tensor<32x8192x64xf8E4M3FNUZ> -!O = tensor<32x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_40x1024x64x64x1024xf16.mlir b/attention/mlir/attention_40x1024x64x64x1024xf16.mlir deleted file mode 100644 index 30b8be9..0000000 --- a/attention/mlir/attention_40x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<40x1024x64xf16> -!K = tensor<40x1024x64xf16> -!V = tensor<40x1024x64xf16> -!O = tensor<40x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_40x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_40x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index bbb52d5..0000000 --- a/attention/mlir/attention_40x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<40x1024x64xf8E4M3FNUZ> -!K = tensor<40x1024x64xf8E4M3FNUZ> -!V = tensor<40x1024x64xf8E4M3FNUZ> -!O = tensor<40x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_40x1024x64x64x64xf16.mlir b/attention/mlir/attention_40x1024x64x64x64xf16.mlir deleted file mode 100644 index 2b1ecab..0000000 --- a/attention/mlir/attention_40x1024x64x64x64xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<40x1024x64xf16> -!K = tensor<40x64x64xf16> -!V = tensor<40x64x64xf16> -!O = tensor<40x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_40x1024x64x64x64xf8E4M3FNUZ.mlir b/attention/mlir/attention_40x1024x64x64x64xf8E4M3FNUZ.mlir deleted file mode 100644 index cc73d9a..0000000 --- a/attention/mlir/attention_40x1024x64x64x64xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<40x1024x64xf8E4M3FNUZ> -!K = tensor<40x64x64xf8E4M3FNUZ> -!V = tensor<40x64x64xf8E4M3FNUZ> -!O = tensor<40x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x1024x128x128x1024xf16.mlir b/attention/mlir/attention_48x1024x128x128x1024xf16.mlir deleted file mode 100644 index b145d0e..0000000 --- a/attention/mlir/attention_48x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x1024x128xf16> -!K = tensor<48x1024x128xf16> -!V = tensor<48x1024x128xf16> -!O = tensor<48x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 066c173..0000000 --- a/attention/mlir/attention_48x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x1024x128xf8E4M3FNUZ> -!K = tensor<48x1024x128xf8E4M3FNUZ> -!V = tensor<48x1024x128xf8E4M3FNUZ> -!O = tensor<48x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x1024x64x64x1024xf16.mlir b/attention/mlir/attention_48x1024x64x64x1024xf16.mlir deleted file mode 100644 index 53ea959..0000000 --- a/attention/mlir/attention_48x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x1024x64xf16> -!K = tensor<48x1024x64xf16> -!V = tensor<48x1024x64xf16> -!O = tensor<48x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 2f0c7f0..0000000 --- a/attention/mlir/attention_48x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x1024x64xf8E4M3FNUZ> -!K = tensor<48x1024x64xf8E4M3FNUZ> -!V = tensor<48x1024x64xf8E4M3FNUZ> -!O = tensor<48x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x16384x128x128x16384xf16.mlir b/attention/mlir/attention_48x16384x128x128x16384xf16.mlir deleted file mode 100644 index 3e6f137..0000000 --- a/attention/mlir/attention_48x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x16384x128xf16> -!K = tensor<48x16384x128xf16> -!V = tensor<48x16384x128xf16> -!O = tensor<48x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 956eb56..0000000 --- a/attention/mlir/attention_48x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x16384x128xf8E4M3FNUZ> -!K = tensor<48x16384x128xf8E4M3FNUZ> -!V = tensor<48x16384x128xf8E4M3FNUZ> -!O = tensor<48x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x16384x64x64x16384xf16.mlir b/attention/mlir/attention_48x16384x64x64x16384xf16.mlir deleted file mode 100644 index 09f27d3..0000000 --- a/attention/mlir/attention_48x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x16384x64xf16> -!K = tensor<48x16384x64xf16> -!V = tensor<48x16384x64xf16> -!O = tensor<48x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 64550e6..0000000 --- a/attention/mlir/attention_48x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x16384x64xf8E4M3FNUZ> -!K = tensor<48x16384x64xf8E4M3FNUZ> -!V = tensor<48x16384x64xf8E4M3FNUZ> -!O = tensor<48x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x2048x128x128x2048xf16.mlir b/attention/mlir/attention_48x2048x128x128x2048xf16.mlir deleted file mode 100644 index d509a38..0000000 --- a/attention/mlir/attention_48x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x2048x128xf16> -!K = tensor<48x2048x128xf16> -!V = tensor<48x2048x128xf16> -!O = tensor<48x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 92dd2d1..0000000 --- a/attention/mlir/attention_48x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x2048x128xf8E4M3FNUZ> -!K = tensor<48x2048x128xf8E4M3FNUZ> -!V = tensor<48x2048x128xf8E4M3FNUZ> -!O = tensor<48x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x2048x64x64x2048xf16.mlir b/attention/mlir/attention_48x2048x64x64x2048xf16.mlir deleted file mode 100644 index 9076e81..0000000 --- a/attention/mlir/attention_48x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x2048x64xf16> -!K = tensor<48x2048x64xf16> -!V = tensor<48x2048x64xf16> -!O = tensor<48x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 7890143..0000000 --- a/attention/mlir/attention_48x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x2048x64xf8E4M3FNUZ> -!K = tensor<48x2048x64xf8E4M3FNUZ> -!V = tensor<48x2048x64xf8E4M3FNUZ> -!O = tensor<48x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x4096x128x128x4096xf16.mlir b/attention/mlir/attention_48x4096x128x128x4096xf16.mlir deleted file mode 100644 index cefa7f5..0000000 --- a/attention/mlir/attention_48x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x4096x128xf16> -!K = tensor<48x4096x128xf16> -!V = tensor<48x4096x128xf16> -!O = tensor<48x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 05d6106..0000000 --- a/attention/mlir/attention_48x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x4096x128xf8E4M3FNUZ> -!K = tensor<48x4096x128xf8E4M3FNUZ> -!V = tensor<48x4096x128xf8E4M3FNUZ> -!O = tensor<48x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x4096x64x64x4096xf16.mlir b/attention/mlir/attention_48x4096x64x64x4096xf16.mlir deleted file mode 100644 index 28a5ac9..0000000 --- a/attention/mlir/attention_48x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x4096x64xf16> -!K = tensor<48x4096x64xf16> -!V = tensor<48x4096x64xf16> -!O = tensor<48x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index e28ab3c..0000000 --- a/attention/mlir/attention_48x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x4096x64xf8E4M3FNUZ> -!K = tensor<48x4096x64xf8E4M3FNUZ> -!V = tensor<48x4096x64xf8E4M3FNUZ> -!O = tensor<48x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x8192x128x128x8192xf16.mlir b/attention/mlir/attention_48x8192x128x128x8192xf16.mlir deleted file mode 100644 index 00f17b2..0000000 --- a/attention/mlir/attention_48x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x8192x128xf16> -!K = tensor<48x8192x128xf16> -!V = tensor<48x8192x128xf16> -!O = tensor<48x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index eecccd9..0000000 --- a/attention/mlir/attention_48x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x8192x128xf8E4M3FNUZ> -!K = tensor<48x8192x128xf8E4M3FNUZ> -!V = tensor<48x8192x128xf8E4M3FNUZ> -!O = tensor<48x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x8192x64x64x8192xf16.mlir b/attention/mlir/attention_48x8192x64x64x8192xf16.mlir deleted file mode 100644 index 38f7a02..0000000 --- a/attention/mlir/attention_48x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<48x8192x64xf16> -!K = tensor<48x8192x64xf16> -!V = tensor<48x8192x64xf16> -!O = tensor<48x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_48x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_48x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 109d155..0000000 --- a/attention/mlir/attention_48x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<48x8192x64xf8E4M3FNUZ> -!K = tensor<48x8192x64xf8E4M3FNUZ> -!V = tensor<48x8192x64xf8E4M3FNUZ> -!O = tensor<48x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x1024x128x128x1024xf16.mlir b/attention/mlir/attention_4x1024x128x128x1024xf16.mlir deleted file mode 100644 index 6e34384..0000000 --- a/attention/mlir/attention_4x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x1024x128xf16> -!K = tensor<4x1024x128xf16> -!V = tensor<4x1024x128xf16> -!O = tensor<4x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 90d48c2..0000000 --- a/attention/mlir/attention_4x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x1024x128xf8E4M3FNUZ> -!K = tensor<4x1024x128xf8E4M3FNUZ> -!V = tensor<4x1024x128xf8E4M3FNUZ> -!O = tensor<4x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x1024x64x64x1024xf16.mlir b/attention/mlir/attention_4x1024x64x64x1024xf16.mlir deleted file mode 100644 index 381ad7d..0000000 --- a/attention/mlir/attention_4x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x1024x64xf16> -!K = tensor<4x1024x64xf16> -!V = tensor<4x1024x64xf16> -!O = tensor<4x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 7f920cc..0000000 --- a/attention/mlir/attention_4x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x1024x64xf8E4M3FNUZ> -!K = tensor<4x1024x64xf8E4M3FNUZ> -!V = tensor<4x1024x64xf8E4M3FNUZ> -!O = tensor<4x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x16384x128x128x16384xf16.mlir b/attention/mlir/attention_4x16384x128x128x16384xf16.mlir deleted file mode 100644 index 9571b4f..0000000 --- a/attention/mlir/attention_4x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x16384x128xf16> -!K = tensor<4x16384x128xf16> -!V = tensor<4x16384x128xf16> -!O = tensor<4x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 3c76d96..0000000 --- a/attention/mlir/attention_4x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x16384x128xf8E4M3FNUZ> -!K = tensor<4x16384x128xf8E4M3FNUZ> -!V = tensor<4x16384x128xf8E4M3FNUZ> -!O = tensor<4x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x16384x64x64x16384xf16.mlir b/attention/mlir/attention_4x16384x64x64x16384xf16.mlir deleted file mode 100644 index b97efb6..0000000 --- a/attention/mlir/attention_4x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x16384x64xf16> -!K = tensor<4x16384x64xf16> -!V = tensor<4x16384x64xf16> -!O = tensor<4x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index dea5ee9..0000000 --- a/attention/mlir/attention_4x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x16384x64xf8E4M3FNUZ> -!K = tensor<4x16384x64xf8E4M3FNUZ> -!V = tensor<4x16384x64xf8E4M3FNUZ> -!O = tensor<4x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x2048x128x128x2048xf16.mlir b/attention/mlir/attention_4x2048x128x128x2048xf16.mlir deleted file mode 100644 index b7f74a9..0000000 --- a/attention/mlir/attention_4x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x2048x128xf16> -!K = tensor<4x2048x128xf16> -!V = tensor<4x2048x128xf16> -!O = tensor<4x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 4f228c2..0000000 --- a/attention/mlir/attention_4x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x2048x128xf8E4M3FNUZ> -!K = tensor<4x2048x128xf8E4M3FNUZ> -!V = tensor<4x2048x128xf8E4M3FNUZ> -!O = tensor<4x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x2048x64x64x2048xf16.mlir b/attention/mlir/attention_4x2048x64x64x2048xf16.mlir deleted file mode 100644 index 42df208..0000000 --- a/attention/mlir/attention_4x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x2048x64xf16> -!K = tensor<4x2048x64xf16> -!V = tensor<4x2048x64xf16> -!O = tensor<4x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 5848865..0000000 --- a/attention/mlir/attention_4x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x2048x64xf8E4M3FNUZ> -!K = tensor<4x2048x64xf8E4M3FNUZ> -!V = tensor<4x2048x64xf8E4M3FNUZ> -!O = tensor<4x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x4096x128x128x4096xf16.mlir b/attention/mlir/attention_4x4096x128x128x4096xf16.mlir deleted file mode 100644 index d66c261..0000000 --- a/attention/mlir/attention_4x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x4096x128xf16> -!K = tensor<4x4096x128xf16> -!V = tensor<4x4096x128xf16> -!O = tensor<4x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index f818c13..0000000 --- a/attention/mlir/attention_4x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x4096x128xf8E4M3FNUZ> -!K = tensor<4x4096x128xf8E4M3FNUZ> -!V = tensor<4x4096x128xf8E4M3FNUZ> -!O = tensor<4x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x4096x64x64x4096xf16.mlir b/attention/mlir/attention_4x4096x64x64x4096xf16.mlir deleted file mode 100644 index 7d19ec1..0000000 --- a/attention/mlir/attention_4x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x4096x64xf16> -!K = tensor<4x4096x64xf16> -!V = tensor<4x4096x64xf16> -!O = tensor<4x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 494f39f..0000000 --- a/attention/mlir/attention_4x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x4096x64xf8E4M3FNUZ> -!K = tensor<4x4096x64xf8E4M3FNUZ> -!V = tensor<4x4096x64xf8E4M3FNUZ> -!O = tensor<4x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x4096x64x64x64xf16.mlir b/attention/mlir/attention_4x4096x64x64x64xf16.mlir deleted file mode 100644 index 6dc2d25..0000000 --- a/attention/mlir/attention_4x4096x64x64x64xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x4096x64xf16> -!K = tensor<4x64x64xf16> -!V = tensor<4x64x64xf16> -!O = tensor<4x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x4096x64x64x64xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x4096x64x64x64xf8E4M3FNUZ.mlir deleted file mode 100644 index c10cff1..0000000 --- a/attention/mlir/attention_4x4096x64x64x64xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x4096x64xf8E4M3FNUZ> -!K = tensor<4x64x64xf8E4M3FNUZ> -!V = tensor<4x64x64xf8E4M3FNUZ> -!O = tensor<4x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x8192x128x128x8192xf16.mlir b/attention/mlir/attention_4x8192x128x128x8192xf16.mlir deleted file mode 100644 index 463a0fb..0000000 --- a/attention/mlir/attention_4x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x8192x128xf16> -!K = tensor<4x8192x128xf16> -!V = tensor<4x8192x128xf16> -!O = tensor<4x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index ee97189..0000000 --- a/attention/mlir/attention_4x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x8192x128xf8E4M3FNUZ> -!K = tensor<4x8192x128xf8E4M3FNUZ> -!V = tensor<4x8192x128xf8E4M3FNUZ> -!O = tensor<4x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x8192x64x64x8192xf16.mlir b/attention/mlir/attention_4x8192x64x64x8192xf16.mlir deleted file mode 100644 index 17cf3fb..0000000 --- a/attention/mlir/attention_4x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<4x8192x64xf16> -!K = tensor<4x8192x64xf16> -!V = tensor<4x8192x64xf16> -!O = tensor<4x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_4x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_4x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index e2b6765..0000000 --- a/attention/mlir/attention_4x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<4x8192x64xf8E4M3FNUZ> -!K = tensor<4x8192x64xf8E4M3FNUZ> -!V = tensor<4x8192x64xf8E4M3FNUZ> -!O = tensor<4x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x1024x128x128x1024xf16.mlir b/attention/mlir/attention_64x1024x128x128x1024xf16.mlir deleted file mode 100644 index 43c97e6..0000000 --- a/attention/mlir/attention_64x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x1024x128xf16> -!K = tensor<64x1024x128xf16> -!V = tensor<64x1024x128xf16> -!O = tensor<64x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index b15dcd7..0000000 --- a/attention/mlir/attention_64x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x1024x128xf8E4M3FNUZ> -!K = tensor<64x1024x128xf8E4M3FNUZ> -!V = tensor<64x1024x128xf8E4M3FNUZ> -!O = tensor<64x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x1024x64x64x1024xf16.mlir b/attention/mlir/attention_64x1024x64x64x1024xf16.mlir deleted file mode 100644 index ca3f166..0000000 --- a/attention/mlir/attention_64x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x1024x64xf16> -!K = tensor<64x1024x64xf16> -!V = tensor<64x1024x64xf16> -!O = tensor<64x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 6cf5181..0000000 --- a/attention/mlir/attention_64x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x1024x64xf8E4M3FNUZ> -!K = tensor<64x1024x64xf8E4M3FNUZ> -!V = tensor<64x1024x64xf8E4M3FNUZ> -!O = tensor<64x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x16384x128x128x16384xf16.mlir b/attention/mlir/attention_64x16384x128x128x16384xf16.mlir deleted file mode 100644 index 82eb394..0000000 --- a/attention/mlir/attention_64x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x16384x128xf16> -!K = tensor<64x16384x128xf16> -!V = tensor<64x16384x128xf16> -!O = tensor<64x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index f76335c..0000000 --- a/attention/mlir/attention_64x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x16384x128xf8E4M3FNUZ> -!K = tensor<64x16384x128xf8E4M3FNUZ> -!V = tensor<64x16384x128xf8E4M3FNUZ> -!O = tensor<64x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x16384x64x64x16384xf16.mlir b/attention/mlir/attention_64x16384x64x64x16384xf16.mlir deleted file mode 100644 index 36d1ff8..0000000 --- a/attention/mlir/attention_64x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x16384x64xf16> -!K = tensor<64x16384x64xf16> -!V = tensor<64x16384x64xf16> -!O = tensor<64x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index ae5c81b..0000000 --- a/attention/mlir/attention_64x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x16384x64xf8E4M3FNUZ> -!K = tensor<64x16384x64xf8E4M3FNUZ> -!V = tensor<64x16384x64xf8E4M3FNUZ> -!O = tensor<64x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x2048x128x128x2048xf16.mlir b/attention/mlir/attention_64x2048x128x128x2048xf16.mlir deleted file mode 100644 index 1ec1b4f..0000000 --- a/attention/mlir/attention_64x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x2048x128xf16> -!K = tensor<64x2048x128xf16> -!V = tensor<64x2048x128xf16> -!O = tensor<64x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 5eaff5f..0000000 --- a/attention/mlir/attention_64x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x2048x128xf8E4M3FNUZ> -!K = tensor<64x2048x128xf8E4M3FNUZ> -!V = tensor<64x2048x128xf8E4M3FNUZ> -!O = tensor<64x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x2048x64x64x2048xf16.mlir b/attention/mlir/attention_64x2048x64x64x2048xf16.mlir deleted file mode 100644 index bd32514..0000000 --- a/attention/mlir/attention_64x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x2048x64xf16> -!K = tensor<64x2048x64xf16> -!V = tensor<64x2048x64xf16> -!O = tensor<64x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index b43c953..0000000 --- a/attention/mlir/attention_64x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x2048x64xf8E4M3FNUZ> -!K = tensor<64x2048x64xf8E4M3FNUZ> -!V = tensor<64x2048x64xf8E4M3FNUZ> -!O = tensor<64x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x4096x128x128x4096xf16.mlir b/attention/mlir/attention_64x4096x128x128x4096xf16.mlir deleted file mode 100644 index c88339f..0000000 --- a/attention/mlir/attention_64x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x4096x128xf16> -!K = tensor<64x4096x128xf16> -!V = tensor<64x4096x128xf16> -!O = tensor<64x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 97beedf..0000000 --- a/attention/mlir/attention_64x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x4096x128xf8E4M3FNUZ> -!K = tensor<64x4096x128xf8E4M3FNUZ> -!V = tensor<64x4096x128xf8E4M3FNUZ> -!O = tensor<64x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x4096x64x64x4096xf16.mlir b/attention/mlir/attention_64x4096x64x64x4096xf16.mlir deleted file mode 100644 index c62c248..0000000 --- a/attention/mlir/attention_64x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x4096x64xf16> -!K = tensor<64x4096x64xf16> -!V = tensor<64x4096x64xf16> -!O = tensor<64x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 2d21334..0000000 --- a/attention/mlir/attention_64x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x4096x64xf8E4M3FNUZ> -!K = tensor<64x4096x64xf8E4M3FNUZ> -!V = tensor<64x4096x64xf8E4M3FNUZ> -!O = tensor<64x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x8192x128x128x8192xf16.mlir b/attention/mlir/attention_64x8192x128x128x8192xf16.mlir deleted file mode 100644 index d856d95..0000000 --- a/attention/mlir/attention_64x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x8192x128xf16> -!K = tensor<64x8192x128xf16> -!V = tensor<64x8192x128xf16> -!O = tensor<64x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index eff571f..0000000 --- a/attention/mlir/attention_64x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x8192x128xf8E4M3FNUZ> -!K = tensor<64x8192x128xf8E4M3FNUZ> -!V = tensor<64x8192x128xf8E4M3FNUZ> -!O = tensor<64x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x8192x64x64x8192xf16.mlir b/attention/mlir/attention_64x8192x64x64x8192xf16.mlir deleted file mode 100644 index 26e6408..0000000 --- a/attention/mlir/attention_64x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<64x8192x64xf16> -!K = tensor<64x8192x64xf16> -!V = tensor<64x8192x64xf16> -!O = tensor<64x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_64x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_64x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 60bfd03..0000000 --- a/attention/mlir/attention_64x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<64x8192x64xf8E4M3FNUZ> -!K = tensor<64x8192x64xf8E4M3FNUZ> -!V = tensor<64x8192x64xf8E4M3FNUZ> -!O = tensor<64x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_768x4096x64x64x64xf16.mlir b/attention/mlir/attention_768x4096x64x64x64xf16.mlir deleted file mode 100644 index a21153c..0000000 --- a/attention/mlir/attention_768x4096x64x64x64xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<768x4096x64xf16> -!K = tensor<768x64x64xf16> -!V = tensor<768x64x64xf16> -!O = tensor<768x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_768x4096x64x64x64xf8E4M3FNUZ.mlir b/attention/mlir/attention_768x4096x64x64x64xf8E4M3FNUZ.mlir deleted file mode 100644 index 26fa16f..0000000 --- a/attention/mlir/attention_768x4096x64x64x64xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<768x4096x64xf8E4M3FNUZ> -!K = tensor<768x64x64xf8E4M3FNUZ> -!V = tensor<768x64x64xf8E4M3FNUZ> -!O = tensor<768x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x1024x128x128x1024xf16.mlir b/attention/mlir/attention_8x1024x128x128x1024xf16.mlir deleted file mode 100644 index c94c507..0000000 --- a/attention/mlir/attention_8x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x1024x128xf16> -!K = tensor<8x1024x128xf16> -!V = tensor<8x1024x128xf16> -!O = tensor<8x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index f01cbda..0000000 --- a/attention/mlir/attention_8x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x1024x128xf8E4M3FNUZ> -!K = tensor<8x1024x128xf8E4M3FNUZ> -!V = tensor<8x1024x128xf8E4M3FNUZ> -!O = tensor<8x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x1024x64x64x1024xf16.mlir b/attention/mlir/attention_8x1024x64x64x1024xf16.mlir deleted file mode 100644 index 7390cc2..0000000 --- a/attention/mlir/attention_8x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x1024x64xf16> -!K = tensor<8x1024x64xf16> -!V = tensor<8x1024x64xf16> -!O = tensor<8x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 4ddbddd..0000000 --- a/attention/mlir/attention_8x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x1024x64xf8E4M3FNUZ> -!K = tensor<8x1024x64xf8E4M3FNUZ> -!V = tensor<8x1024x64xf8E4M3FNUZ> -!O = tensor<8x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x1024x64x64x64xf16.mlir b/attention/mlir/attention_8x1024x64x64x64xf16.mlir deleted file mode 100644 index 6e491d5..0000000 --- a/attention/mlir/attention_8x1024x64x64x64xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x1024x64xf16> -!K = tensor<8x64x64xf16> -!V = tensor<8x64x64xf16> -!O = tensor<8x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x1024x64x64x64xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x1024x64x64x64xf8E4M3FNUZ.mlir deleted file mode 100644 index 87c5acd..0000000 --- a/attention/mlir/attention_8x1024x64x64x64xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x1024x64xf8E4M3FNUZ> -!K = tensor<8x64x64xf8E4M3FNUZ> -!V = tensor<8x64x64xf8E4M3FNUZ> -!O = tensor<8x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x16384x128x128x16384xf16.mlir b/attention/mlir/attention_8x16384x128x128x16384xf16.mlir deleted file mode 100644 index 9ef86da..0000000 --- a/attention/mlir/attention_8x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x16384x128xf16> -!K = tensor<8x16384x128xf16> -!V = tensor<8x16384x128xf16> -!O = tensor<8x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index e5f0fd2..0000000 --- a/attention/mlir/attention_8x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x16384x128xf8E4M3FNUZ> -!K = tensor<8x16384x128xf8E4M3FNUZ> -!V = tensor<8x16384x128xf8E4M3FNUZ> -!O = tensor<8x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x16384x64x64x16384xf16.mlir b/attention/mlir/attention_8x16384x64x64x16384xf16.mlir deleted file mode 100644 index f82cbcc..0000000 --- a/attention/mlir/attention_8x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x16384x64xf16> -!K = tensor<8x16384x64xf16> -!V = tensor<8x16384x64xf16> -!O = tensor<8x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index 07f3508..0000000 --- a/attention/mlir/attention_8x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x16384x64xf8E4M3FNUZ> -!K = tensor<8x16384x64xf8E4M3FNUZ> -!V = tensor<8x16384x64xf8E4M3FNUZ> -!O = tensor<8x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x2048x128x128x2048xf16.mlir b/attention/mlir/attention_8x2048x128x128x2048xf16.mlir deleted file mode 100644 index ae32ff6..0000000 --- a/attention/mlir/attention_8x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x2048x128xf16> -!K = tensor<8x2048x128xf16> -!V = tensor<8x2048x128xf16> -!O = tensor<8x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 6577fb4..0000000 --- a/attention/mlir/attention_8x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x2048x128xf8E4M3FNUZ> -!K = tensor<8x2048x128xf8E4M3FNUZ> -!V = tensor<8x2048x128xf8E4M3FNUZ> -!O = tensor<8x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x2048x64x64x2048xf16.mlir b/attention/mlir/attention_8x2048x64x64x2048xf16.mlir deleted file mode 100644 index 992cd17..0000000 --- a/attention/mlir/attention_8x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x2048x64xf16> -!K = tensor<8x2048x64xf16> -!V = tensor<8x2048x64xf16> -!O = tensor<8x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index dcbb8e2..0000000 --- a/attention/mlir/attention_8x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x2048x64xf8E4M3FNUZ> -!K = tensor<8x2048x64xf8E4M3FNUZ> -!V = tensor<8x2048x64xf8E4M3FNUZ> -!O = tensor<8x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x4096x128x128x4096xf16.mlir b/attention/mlir/attention_8x4096x128x128x4096xf16.mlir deleted file mode 100644 index ae7e70b..0000000 --- a/attention/mlir/attention_8x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x4096x128xf16> -!K = tensor<8x4096x128xf16> -!V = tensor<8x4096x128xf16> -!O = tensor<8x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 6507701..0000000 --- a/attention/mlir/attention_8x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x4096x128xf8E4M3FNUZ> -!K = tensor<8x4096x128xf8E4M3FNUZ> -!V = tensor<8x4096x128xf8E4M3FNUZ> -!O = tensor<8x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x4096x64x64x4096xf16.mlir b/attention/mlir/attention_8x4096x64x64x4096xf16.mlir deleted file mode 100644 index 0d0ed94..0000000 --- a/attention/mlir/attention_8x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x4096x64xf16> -!K = tensor<8x4096x64xf16> -!V = tensor<8x4096x64xf16> -!O = tensor<8x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index dc8ae0c..0000000 --- a/attention/mlir/attention_8x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x4096x64xf8E4M3FNUZ> -!K = tensor<8x4096x64xf8E4M3FNUZ> -!V = tensor<8x4096x64xf8E4M3FNUZ> -!O = tensor<8x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x8192x128x128x8192xf16.mlir b/attention/mlir/attention_8x8192x128x128x8192xf16.mlir deleted file mode 100644 index 458b367..0000000 --- a/attention/mlir/attention_8x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x8192x128xf16> -!K = tensor<8x8192x128xf16> -!V = tensor<8x8192x128xf16> -!O = tensor<8x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 6910322..0000000 --- a/attention/mlir/attention_8x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x8192x128xf8E4M3FNUZ> -!K = tensor<8x8192x128xf8E4M3FNUZ> -!V = tensor<8x8192x128xf8E4M3FNUZ> -!O = tensor<8x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x8192x64x64x8192xf16.mlir b/attention/mlir/attention_8x8192x64x64x8192xf16.mlir deleted file mode 100644 index 7e03007..0000000 --- a/attention/mlir/attention_8x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<8x8192x64xf16> -!K = tensor<8x8192x64xf16> -!V = tensor<8x8192x64xf16> -!O = tensor<8x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_8x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_8x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index e8ce4e8..0000000 --- a/attention/mlir/attention_8x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<8x8192x64xf8E4M3FNUZ> -!K = tensor<8x8192x64xf8E4M3FNUZ> -!V = tensor<8x8192x64xf8E4M3FNUZ> -!O = tensor<8x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x1024x128x128x1024xf16.mlir b/attention/mlir/attention_96x1024x128x128x1024xf16.mlir deleted file mode 100644 index 7728731..0000000 --- a/attention/mlir/attention_96x1024x128x128x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x1024x128xf16> -!K = tensor<96x1024x128xf16> -!V = tensor<96x1024x128xf16> -!O = tensor<96x1024x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x1024x128x128x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x1024x128x128x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 0f440bf..0000000 --- a/attention/mlir/attention_96x1024x128x128x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x1024x128xf8E4M3FNUZ> -!K = tensor<96x1024x128xf8E4M3FNUZ> -!V = tensor<96x1024x128xf8E4M3FNUZ> -!O = tensor<96x1024x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x1024x64x64x1024xf16.mlir b/attention/mlir/attention_96x1024x64x64x1024xf16.mlir deleted file mode 100644 index 8d682cd..0000000 --- a/attention/mlir/attention_96x1024x64x64x1024xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x1024x64xf16> -!K = tensor<96x1024x64xf16> -!V = tensor<96x1024x64xf16> -!O = tensor<96x1024x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x1024x64x64x1024xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x1024x64x64x1024xf8E4M3FNUZ.mlir deleted file mode 100644 index 9a71011..0000000 --- a/attention/mlir/attention_96x1024x64x64x1024xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x1024x64xf8E4M3FNUZ> -!K = tensor<96x1024x64xf8E4M3FNUZ> -!V = tensor<96x1024x64xf8E4M3FNUZ> -!O = tensor<96x1024x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x16384x128x128x16384xf16.mlir b/attention/mlir/attention_96x16384x128x128x16384xf16.mlir deleted file mode 100644 index eaca82a..0000000 --- a/attention/mlir/attention_96x16384x128x128x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x16384x128xf16> -!K = tensor<96x16384x128xf16> -!V = tensor<96x16384x128xf16> -!O = tensor<96x16384x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x16384x128x128x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x16384x128x128x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index ca80b55..0000000 --- a/attention/mlir/attention_96x16384x128x128x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x16384x128xf8E4M3FNUZ> -!K = tensor<96x16384x128xf8E4M3FNUZ> -!V = tensor<96x16384x128xf8E4M3FNUZ> -!O = tensor<96x16384x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x16384x64x64x16384xf16.mlir b/attention/mlir/attention_96x16384x64x64x16384xf16.mlir deleted file mode 100644 index 67ec70b..0000000 --- a/attention/mlir/attention_96x16384x64x64x16384xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x16384x64xf16> -!K = tensor<96x16384x64xf16> -!V = tensor<96x16384x64xf16> -!O = tensor<96x16384x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x16384x64x64x16384xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x16384x64x64x16384xf8E4M3FNUZ.mlir deleted file mode 100644 index e611023..0000000 --- a/attention/mlir/attention_96x16384x64x64x16384xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x16384x64xf8E4M3FNUZ> -!K = tensor<96x16384x64xf8E4M3FNUZ> -!V = tensor<96x16384x64xf8E4M3FNUZ> -!O = tensor<96x16384x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x2048x128x128x2048xf16.mlir b/attention/mlir/attention_96x2048x128x128x2048xf16.mlir deleted file mode 100644 index 371b275..0000000 --- a/attention/mlir/attention_96x2048x128x128x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x2048x128xf16> -!K = tensor<96x2048x128xf16> -!V = tensor<96x2048x128xf16> -!O = tensor<96x2048x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x2048x128x128x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x2048x128x128x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index 7ae181d..0000000 --- a/attention/mlir/attention_96x2048x128x128x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x2048x128xf8E4M3FNUZ> -!K = tensor<96x2048x128xf8E4M3FNUZ> -!V = tensor<96x2048x128xf8E4M3FNUZ> -!O = tensor<96x2048x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x2048x64x64x2048xf16.mlir b/attention/mlir/attention_96x2048x64x64x2048xf16.mlir deleted file mode 100644 index d0484ab..0000000 --- a/attention/mlir/attention_96x2048x64x64x2048xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x2048x64xf16> -!K = tensor<96x2048x64xf16> -!V = tensor<96x2048x64xf16> -!O = tensor<96x2048x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x2048x64x64x2048xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x2048x64x64x2048xf8E4M3FNUZ.mlir deleted file mode 100644 index ec17a0a..0000000 --- a/attention/mlir/attention_96x2048x64x64x2048xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x2048x64xf8E4M3FNUZ> -!K = tensor<96x2048x64xf8E4M3FNUZ> -!V = tensor<96x2048x64xf8E4M3FNUZ> -!O = tensor<96x2048x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x4096x128x128x4096xf16.mlir b/attention/mlir/attention_96x4096x128x128x4096xf16.mlir deleted file mode 100644 index 5d72378..0000000 --- a/attention/mlir/attention_96x4096x128x128x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x4096x128xf16> -!K = tensor<96x4096x128xf16> -!V = tensor<96x4096x128xf16> -!O = tensor<96x4096x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x4096x128x128x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x4096x128x128x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 1c90153..0000000 --- a/attention/mlir/attention_96x4096x128x128x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x4096x128xf8E4M3FNUZ> -!K = tensor<96x4096x128xf8E4M3FNUZ> -!V = tensor<96x4096x128xf8E4M3FNUZ> -!O = tensor<96x4096x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x4096x64x64x4096xf16.mlir b/attention/mlir/attention_96x4096x64x64x4096xf16.mlir deleted file mode 100644 index d5b335f..0000000 --- a/attention/mlir/attention_96x4096x64x64x4096xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x4096x64xf16> -!K = tensor<96x4096x64xf16> -!V = tensor<96x4096x64xf16> -!O = tensor<96x4096x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x4096x64x64x4096xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x4096x64x64x4096xf8E4M3FNUZ.mlir deleted file mode 100644 index 9ce9417..0000000 --- a/attention/mlir/attention_96x4096x64x64x4096xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x4096x64xf8E4M3FNUZ> -!K = tensor<96x4096x64xf8E4M3FNUZ> -!V = tensor<96x4096x64xf8E4M3FNUZ> -!O = tensor<96x4096x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x8192x128x128x8192xf16.mlir b/attention/mlir/attention_96x8192x128x128x8192xf16.mlir deleted file mode 100644 index 105ebc1..0000000 --- a/attention/mlir/attention_96x8192x128x128x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x8192x128xf16> -!K = tensor<96x8192x128xf16> -!V = tensor<96x8192x128xf16> -!O = tensor<96x8192x128xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x8192x128x128x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x8192x128x128x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 9edf095..0000000 --- a/attention/mlir/attention_96x8192x128x128x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x8192x128xf8E4M3FNUZ> -!K = tensor<96x8192x128xf8E4M3FNUZ> -!V = tensor<96x8192x128xf8E4M3FNUZ> -!O = tensor<96x8192x128xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x8192x64x64x8192xf16.mlir b/attention/mlir/attention_96x8192x64x64x8192xf16.mlir deleted file mode 100644 index 436ab86..0000000 --- a/attention/mlir/attention_96x8192x64x64x8192xf16.mlir +++ /dev/null @@ -1,26 +0,0 @@ -!dtype = f16 -!Q = tensor<96x8192x64xf16> -!K = tensor<96x8192x64xf16> -!V = tensor<96x8192x64xf16> -!O = tensor<96x8192x64xf16> - -#tuning = #iree_codegen.compilation_info, translation_info = #iree_codegen.translation_info, subgroup_m_count = 4, subgroup_n_count = 1> , llvm_func_attrs = { "amdgpu-waves-per-eu" = "2","denormal-fp-math-f32" = "preserve-sign" }}>> - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - ,compilation_info = #tuning - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/attention/mlir/attention_96x8192x64x64x8192xf8E4M3FNUZ.mlir b/attention/mlir/attention_96x8192x64x64x8192xf8E4M3FNUZ.mlir deleted file mode 100644 index 9301a91..0000000 --- a/attention/mlir/attention_96x8192x64x64x8192xf8E4M3FNUZ.mlir +++ /dev/null @@ -1,25 +0,0 @@ -!dtype = f8E4M3FNUZ -!Q = tensor<96x8192x64xf8E4M3FNUZ> -!K = tensor<96x8192x64xf8E4M3FNUZ> -!V = tensor<96x8192x64xf8E4M3FNUZ> -!O = tensor<96x8192x64xf8E4M3FNUZ> - - - -#Q = affine_map<(b, m, n, k1, k2) -> (b, m, k1)> -#K = affine_map<(b, m, n, k1, k2) -> (b, k2, k1)> -#V = affine_map<(b, m, n, k1, k2) -> (b, k2, n)> -#S = affine_map<(b, m, n, k1, k2) -> ()> -#O = affine_map<(b, m, n, k1, k2) -> (b, m, n)> - -func.func @main(%Q : !Q, %K : !K, %V : !V) -> !O { - %scale = arith.constant 1.0 : !dtype - %empty = tensor.empty() : !O - %O = iree_linalg_ext.attention - { indexing_maps = [#Q, #K, #V, #S, #O] - - } - ins(%Q, %K, %V, %scale : !Q, !K, !V, !dtype) - outs(%empty : !O) -> !O - return %O : !O -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.mlir deleted file mode 100644 index 1f136f0..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x112x112x64x7x7x3_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x64x230x230xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<16x3x112x112xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x3x112x112xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x3x112x112xf32>) -> tensor<16x3x112x112xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x64x230x230xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<16x3x112x112xf32>) -> tensor<16x3x112x112xf32> - util.return %11 : tensor<16x3x112x112xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index d919e2b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x1024x28x28xf32>, %arg1: tensor<512x1024x1x1xf32>) -> tensor<16x512x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x512x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x512x14x14xf32>) -> tensor<16x512x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x1024x28x28xf32>, tensor<512x1024x1x1xf32>) outs(%10 : tensor<16x512x14x14xf32>) -> tensor<16x512x14x14xf32> - util.return %11 : tensor<16x512x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.mlir deleted file mode 100644 index 679e9c1..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x256x16x16xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<16x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x256x14x14xf32>) -> tensor<16x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<16x256x14x14xf32>) -> tensor<16x256x14x14xf32> - util.return %11 : tensor<16x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 7f3b898..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x14x14x256x3x3x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x256x30x30xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<16x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x256x14x14xf32>) -> tensor<16x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<16x256x14x14xf32>) -> tensor<16x256x14x14xf32> - util.return %11 : tensor<16x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.mlir deleted file mode 100644 index ad8f50b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x128x30x30xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<16x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x128x28x28xf32>) -> tensor<16x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<16x128x28x28xf32>) -> tensor<16x128x28x28xf32> - util.return %11 : tensor<16x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.mlir deleted file mode 100644 index dfa1206..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x28x28x128x3x3x128_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x128x58x58xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<16x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x128x28x28xf32>) -> tensor<16x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<16x128x28x28xf32>) -> tensor<16x128x28x28xf32> - util.return %11 : tensor<16x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 1a54757..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x28x28x512x1x1x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x512x56x56xf32>, %arg1: tensor<256x512x1x1xf32>) -> tensor<16x256x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x256x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x256x28x28xf32>) -> tensor<16x256x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x512x56x56xf32>, tensor<256x512x1x1xf32>) outs(%10 : tensor<16x256x28x28xf32>) -> tensor<16x256x28x28xf32> - util.return %11 : tensor<16x256x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.mlir deleted file mode 100644 index cd5bb55..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x56x56x64x3x3x64_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x64x58x58xf32>, %arg1: tensor<64x64x3x3xf32>) -> tensor<16x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x64x56x56xf32>) -> tensor<16x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%10 : tensor<16x64x56x56xf32>) -> tensor<16x64x56x56xf32> - util.return %11 : tensor<16x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir deleted file mode 100644 index 65bb530..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x2048x14x14xf32>, %arg1: tensor<1024x2048x1x1xf32>) -> tensor<16x1024x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x1024x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x1024x7x7xf32>) -> tensor<16x1024x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x2048x14x14xf32>, tensor<1024x2048x1x1xf32>) outs(%10 : tensor<16x1024x7x7xf32>) -> tensor<16x1024x7x7xf32> - util.return %11 : tensor<16x1024x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.mlir deleted file mode 100644 index 8be6811..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x512x9x9xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<16x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x512x7x7xf32>) -> tensor<16x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<16x512x7x7xf32>) -> tensor<16x512x7x7xf32> - util.return %11 : tensor<16x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index c29dbad..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_16x7x7x512x3x3x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<16x512x16x16xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<16x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<16x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<16x512x7x7xf32>) -> tensor<16x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<16x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<16x512x7x7xf32>) -> tensor<16x512x7x7xf32> - util.return %11 : tensor<16x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.mlir deleted file mode 100644 index 6b8f39c..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x112x112x64x7x7x3_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x64x230x230xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<1x3x112x112xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x3x112x112xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x3x112x112xf32>) -> tensor<1x3x112x112xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x64x230x230xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<1x3x112x112xf32>) -> tensor<1x3x112x112xf32> - util.return %11 : tensor<1x3x112x112xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 979447f..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x1024x28x28xf32>, %arg1: tensor<512x1024x1x1xf32>) -> tensor<1x512x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x512x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x1024x28x28xf32>, tensor<512x1024x1x1xf32>) outs(%10 : tensor<1x512x14x14xf32>) -> tensor<1x512x14x14xf32> - util.return %11 : tensor<1x512x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.mlir deleted file mode 100644 index 5333cb1..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x256x16x16xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<1x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> - util.return %11 : tensor<1x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 9bde2cb..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x14x14x256x3x3x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x256x30x30xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<1x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<1x256x14x14xf32>) -> tensor<1x256x14x14xf32> - util.return %11 : tensor<1x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.mlir deleted file mode 100644 index 3fade95..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x128x30x30xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<1x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> - util.return %11 : tensor<1x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.mlir deleted file mode 100644 index 1d69b02..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x28x28x128x3x3x128_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x128x58x58xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<1x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<1x128x28x28xf32>) -> tensor<1x128x28x28xf32> - util.return %11 : tensor<1x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index f64ac9d..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x28x28x512x1x1x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x512x56x56xf32>, %arg1: tensor<256x512x1x1xf32>) -> tensor<1x256x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x256x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x512x56x56xf32>, tensor<256x512x1x1xf32>) outs(%10 : tensor<1x256x28x28xf32>) -> tensor<1x256x28x28xf32> - util.return %11 : tensor<1x256x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.mlir deleted file mode 100644 index a519332..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x56x56x64x3x3x64_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x64x58x58xf32>, %arg1: tensor<64x64x3x3xf32>) -> tensor<1x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%10 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> - util.return %11 : tensor<1x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x56x56x64x7x7x3_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_1x56x56x64x7x7x3_f32xf32xf32_stride1.mlir deleted file mode 100644 index e64129d..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x56x56x64x7x7x3_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ - -util.func public @main(%arg0: tensor<1x64x58x58xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<1x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x64x58x58xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<1x64x56x56xf32>) -> tensor<1x64x56x56xf32> - util.return %11 : tensor<1x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir deleted file mode 100644 index 3e7dc3a..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x2048x14x14xf32>, %arg1: tensor<1024x2048x1x1xf32>) -> tensor<1x1024x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x1024x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x1024x7x7xf32>) -> tensor<1x1024x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x2048x14x14xf32>, tensor<1024x2048x1x1xf32>) outs(%10 : tensor<1x1024x7x7xf32>) -> tensor<1x1024x7x7xf32> - util.return %11 : tensor<1x1024x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.mlir deleted file mode 100644 index 437a522..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x512x9x9xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<1x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> - util.return %11 : tensor<1x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 745dff7..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_1x7x7x512x3x3x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<1x512x16x16xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<1x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<1x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<1x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<1x512x7x7xf32>) -> tensor<1x512x7x7xf32> - util.return %11 : tensor<1x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.mlir deleted file mode 100644 index be31d37..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x112x112x64x7x7x3_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x64x230x230xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<2x3x112x112xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x3x112x112xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x3x112x112xf32>) -> tensor<2x3x112x112xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x64x230x230xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<2x3x112x112xf32>) -> tensor<2x3x112x112xf32> - util.return %11 : tensor<2x3x112x112xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 75c53d7..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x1024x28x28xf32>, %arg1: tensor<512x1024x1x1xf32>) -> tensor<2x512x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x512x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x512x14x14xf32>) -> tensor<2x512x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x1024x28x28xf32>, tensor<512x1024x1x1xf32>) outs(%10 : tensor<2x512x14x14xf32>) -> tensor<2x512x14x14xf32> - util.return %11 : tensor<2x512x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.mlir deleted file mode 100644 index 0086840..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x256x16x16xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<2x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x256x14x14xf32>) -> tensor<2x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<2x256x14x14xf32>) -> tensor<2x256x14x14xf32> - util.return %11 : tensor<2x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 799d59b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x14x14x256x3x3x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x256x30x30xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<2x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x256x14x14xf32>) -> tensor<2x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<2x256x14x14xf32>) -> tensor<2x256x14x14xf32> - util.return %11 : tensor<2x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.mlir deleted file mode 100644 index 4e3ac52..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x128x30x30xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<2x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x128x28x28xf32>) -> tensor<2x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<2x128x28x28xf32>) -> tensor<2x128x28x28xf32> - util.return %11 : tensor<2x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.mlir deleted file mode 100644 index 03f9ca2..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x28x28x128x3x3x128_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x128x58x58xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<2x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x128x28x28xf32>) -> tensor<2x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<2x128x28x28xf32>) -> tensor<2x128x28x28xf32> - util.return %11 : tensor<2x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 837a4fe..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x28x28x512x1x1x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x512x56x56xf32>, %arg1: tensor<256x512x1x1xf32>) -> tensor<2x256x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x256x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x256x28x28xf32>) -> tensor<2x256x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x512x56x56xf32>, tensor<256x512x1x1xf32>) outs(%10 : tensor<2x256x28x28xf32>) -> tensor<2x256x28x28xf32> - util.return %11 : tensor<2x256x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.mlir deleted file mode 100644 index 5a86b3d..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x56x56x64x3x3x64_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x64x58x58xf32>, %arg1: tensor<64x64x3x3xf32>) -> tensor<2x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x64x56x56xf32>) -> tensor<2x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%10 : tensor<2x64x56x56xf32>) -> tensor<2x64x56x56xf32> - util.return %11 : tensor<2x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir deleted file mode 100644 index 0febd5a..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x2048x14x14xf32>, %arg1: tensor<1024x2048x1x1xf32>) -> tensor<2x1024x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x1024x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x1024x7x7xf32>) -> tensor<2x1024x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x2048x14x14xf32>, tensor<1024x2048x1x1xf32>) outs(%10 : tensor<2x1024x7x7xf32>) -> tensor<2x1024x7x7xf32> - util.return %11 : tensor<2x1024x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.mlir deleted file mode 100644 index 80c002a..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x512x9x9xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<2x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x512x7x7xf32>) -> tensor<2x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<2x512x7x7xf32>) -> tensor<2x512x7x7xf32> - util.return %11 : tensor<2x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 9d3daa1..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_2x7x7x512x3x3x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<2x512x16x16xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<2x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<2x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<2x512x7x7xf32>) -> tensor<2x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<2x512x7x7xf32>) -> tensor<2x512x7x7xf32> - util.return %11 : tensor<2x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.mlir deleted file mode 100644 index 78b135b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x112x112x64x7x7x3_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x64x230x230xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<32x3x112x112xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x3x112x112xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x3x112x112xf32>) -> tensor<32x3x112x112xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x64x230x230xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<32x3x112x112xf32>) -> tensor<32x3x112x112xf32> - util.return %11 : tensor<32x3x112x112xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 7856bdf..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x1024x28x28xf32>, %arg1: tensor<512x1024x1x1xf32>) -> tensor<32x512x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x512x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x512x14x14xf32>) -> tensor<32x512x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x1024x28x28xf32>, tensor<512x1024x1x1xf32>) outs(%10 : tensor<32x512x14x14xf32>) -> tensor<32x512x14x14xf32> - util.return %11 : tensor<32x512x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.mlir deleted file mode 100644 index 1e0e58c..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x256x16x16xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<32x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x256x14x14xf32>) -> tensor<32x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<32x256x14x14xf32>) -> tensor<32x256x14x14xf32> - util.return %11 : tensor<32x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 56cf448..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x14x14x256x3x3x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x256x30x30xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<32x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x256x14x14xf32>) -> tensor<32x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<32x256x14x14xf32>) -> tensor<32x256x14x14xf32> - util.return %11 : tensor<32x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.mlir deleted file mode 100644 index a720340..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x128x30x30xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<32x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x128x28x28xf32>) -> tensor<32x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<32x128x28x28xf32>) -> tensor<32x128x28x28xf32> - util.return %11 : tensor<32x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.mlir deleted file mode 100644 index c206973..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x28x28x128x3x3x128_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x128x58x58xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<32x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x128x28x28xf32>) -> tensor<32x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<32x128x28x28xf32>) -> tensor<32x128x28x28xf32> - util.return %11 : tensor<32x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 492581c..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x28x28x512x1x1x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x512x56x56xf32>, %arg1: tensor<256x512x1x1xf32>) -> tensor<32x256x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x256x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x256x28x28xf32>) -> tensor<32x256x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x512x56x56xf32>, tensor<256x512x1x1xf32>) outs(%10 : tensor<32x256x28x28xf32>) -> tensor<32x256x28x28xf32> - util.return %11 : tensor<32x256x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.mlir deleted file mode 100644 index e3ffe53..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x56x56x64x3x3x64_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x64x58x58xf32>, %arg1: tensor<64x64x3x3xf32>) -> tensor<32x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x64x56x56xf32>) -> tensor<32x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%10 : tensor<32x64x56x56xf32>) -> tensor<32x64x56x56xf32> - util.return %11 : tensor<32x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir deleted file mode 100644 index 131cbd7..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x2048x14x14xf32>, %arg1: tensor<1024x2048x1x1xf32>) -> tensor<32x1024x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x1024x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x1024x7x7xf32>) -> tensor<32x1024x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x2048x14x14xf32>, tensor<1024x2048x1x1xf32>) outs(%10 : tensor<32x1024x7x7xf32>) -> tensor<32x1024x7x7xf32> - util.return %11 : tensor<32x1024x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.mlir deleted file mode 100644 index 254d882..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x512x9x9xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<32x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x512x7x7xf32>) -> tensor<32x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<32x512x7x7xf32>) -> tensor<32x512x7x7xf32> - util.return %11 : tensor<32x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 2227e1b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_32x7x7x512x3x3x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<32x512x16x16xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<32x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<32x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<32x512x7x7xf32>) -> tensor<32x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<32x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<32x512x7x7xf32>) -> tensor<32x512x7x7xf32> - util.return %11 : tensor<32x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.mlir deleted file mode 100644 index c40a1d1..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x112x112x64x7x7x3_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x64x230x230xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<48x3x112x112xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x3x112x112xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x3x112x112xf32>) -> tensor<48x3x112x112xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x64x230x230xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<48x3x112x112xf32>) -> tensor<48x3x112x112xf32> - util.return %11 : tensor<48x3x112x112xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 1e19c6b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x1024x28x28xf32>, %arg1: tensor<512x1024x1x1xf32>) -> tensor<48x512x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x512x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x512x14x14xf32>) -> tensor<48x512x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x1024x28x28xf32>, tensor<512x1024x1x1xf32>) outs(%10 : tensor<48x512x14x14xf32>) -> tensor<48x512x14x14xf32> - util.return %11 : tensor<48x512x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.mlir deleted file mode 100644 index e7eb010..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x256x16x16xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<48x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x256x14x14xf32>) -> tensor<48x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<48x256x14x14xf32>) -> tensor<48x256x14x14xf32> - util.return %11 : tensor<48x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 60a80b3..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x14x14x256x3x3x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x256x30x30xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<48x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x256x14x14xf32>) -> tensor<48x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<48x256x14x14xf32>) -> tensor<48x256x14x14xf32> - util.return %11 : tensor<48x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.mlir deleted file mode 100644 index 0c8451f..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x128x30x30xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<48x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x128x28x28xf32>) -> tensor<48x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<48x128x28x28xf32>) -> tensor<48x128x28x28xf32> - util.return %11 : tensor<48x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.mlir deleted file mode 100644 index 5da75c9..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x28x28x128x3x3x128_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x128x58x58xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<48x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x128x28x28xf32>) -> tensor<48x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<48x128x28x28xf32>) -> tensor<48x128x28x28xf32> - util.return %11 : tensor<48x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 111edbc..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x28x28x512x1x1x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x512x56x56xf32>, %arg1: tensor<256x512x1x1xf32>) -> tensor<48x256x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x256x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x256x28x28xf32>) -> tensor<48x256x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x512x56x56xf32>, tensor<256x512x1x1xf32>) outs(%10 : tensor<48x256x28x28xf32>) -> tensor<48x256x28x28xf32> - util.return %11 : tensor<48x256x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.mlir deleted file mode 100644 index 976b273..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x56x56x64x3x3x64_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x64x58x58xf32>, %arg1: tensor<64x64x3x3xf32>) -> tensor<48x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x64x56x56xf32>) -> tensor<48x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%10 : tensor<48x64x56x56xf32>) -> tensor<48x64x56x56xf32> - util.return %11 : tensor<48x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir deleted file mode 100644 index 91ef8a1..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x2048x14x14xf32>, %arg1: tensor<1024x2048x1x1xf32>) -> tensor<48x1024x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x1024x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x1024x7x7xf32>) -> tensor<48x1024x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x2048x14x14xf32>, tensor<1024x2048x1x1xf32>) outs(%10 : tensor<48x1024x7x7xf32>) -> tensor<48x1024x7x7xf32> - util.return %11 : tensor<48x1024x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.mlir deleted file mode 100644 index b4b108b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x512x9x9xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<48x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x512x7x7xf32>) -> tensor<48x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<48x512x7x7xf32>) -> tensor<48x512x7x7xf32> - util.return %11 : tensor<48x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 4e64124..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_48x7x7x512x3x3x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<48x512x16x16xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<48x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<48x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<48x512x7x7xf32>) -> tensor<48x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<48x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<48x512x7x7xf32>) -> tensor<48x512x7x7xf32> - util.return %11 : tensor<48x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.mlir deleted file mode 100644 index dfec8cd..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x112x112x64x7x7x3_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x64x230x230xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<4x3x112x112xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x3x112x112xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x3x112x112xf32>) -> tensor<4x3x112x112xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x64x230x230xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<4x3x112x112xf32>) -> tensor<4x3x112x112xf32> - util.return %11 : tensor<4x3x112x112xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index a4be022..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x1024x28x28xf32>, %arg1: tensor<512x1024x1x1xf32>) -> tensor<4x512x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x512x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x512x14x14xf32>) -> tensor<4x512x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x1024x28x28xf32>, tensor<512x1024x1x1xf32>) outs(%10 : tensor<4x512x14x14xf32>) -> tensor<4x512x14x14xf32> - util.return %11 : tensor<4x512x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.mlir deleted file mode 100644 index 84cb673..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x256x16x16xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<4x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x256x14x14xf32>) -> tensor<4x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<4x256x14x14xf32>) -> tensor<4x256x14x14xf32> - util.return %11 : tensor<4x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 0e72024..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x14x14x256x3x3x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x256x30x30xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<4x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x256x14x14xf32>) -> tensor<4x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<4x256x14x14xf32>) -> tensor<4x256x14x14xf32> - util.return %11 : tensor<4x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.mlir deleted file mode 100644 index f4b6e62..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x128x30x30xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<4x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x128x28x28xf32>) -> tensor<4x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<4x128x28x28xf32>) -> tensor<4x128x28x28xf32> - util.return %11 : tensor<4x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.mlir deleted file mode 100644 index b8b87bb..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x28x28x128x3x3x128_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x128x58x58xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<4x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x128x28x28xf32>) -> tensor<4x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<4x128x28x28xf32>) -> tensor<4x128x28x28xf32> - util.return %11 : tensor<4x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index 29e7dc8..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x28x28x512x1x1x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x512x56x56xf32>, %arg1: tensor<256x512x1x1xf32>) -> tensor<4x256x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x256x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x256x28x28xf32>) -> tensor<4x256x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x512x56x56xf32>, tensor<256x512x1x1xf32>) outs(%10 : tensor<4x256x28x28xf32>) -> tensor<4x256x28x28xf32> - util.return %11 : tensor<4x256x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.mlir deleted file mode 100644 index 3e1b498..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x56x56x64x3x3x64_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x64x58x58xf32>, %arg1: tensor<64x64x3x3xf32>) -> tensor<4x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x64x56x56xf32>) -> tensor<4x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%10 : tensor<4x64x56x56xf32>) -> tensor<4x64x56x56xf32> - util.return %11 : tensor<4x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir deleted file mode 100644 index 351a3f1..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x2048x14x14xf32>, %arg1: tensor<1024x2048x1x1xf32>) -> tensor<4x1024x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x1024x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x1024x7x7xf32>) -> tensor<4x1024x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x2048x14x14xf32>, tensor<1024x2048x1x1xf32>) outs(%10 : tensor<4x1024x7x7xf32>) -> tensor<4x1024x7x7xf32> - util.return %11 : tensor<4x1024x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.mlir deleted file mode 100644 index 106e477..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x512x9x9xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<4x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x512x7x7xf32>) -> tensor<4x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<4x512x7x7xf32>) -> tensor<4x512x7x7xf32> - util.return %11 : tensor<4x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index d3535b6..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_4x7x7x512x3x3x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<4x512x16x16xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<4x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<4x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<4x512x7x7xf32>) -> tensor<4x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<4x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<4x512x7x7xf32>) -> tensor<4x512x7x7xf32> - util.return %11 : tensor<4x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.mlir deleted file mode 100644 index c280b6c..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x112x112x64x7x7x3_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x64x230x230xf32>, %arg1: tensor<3x64x7x7xf32>) -> tensor<8x3x112x112xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x3x112x112xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x3x112x112xf32>) -> tensor<8x3x112x112xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x64x230x230xf32>, tensor<3x64x7x7xf32>) outs(%10 : tensor<8x3x112x112xf32>) -> tensor<8x3x112x112xf32> - util.return %11 : tensor<8x3x112x112xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 3a1b502..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x14x14x1024x1x1x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x1024x28x28xf32>, %arg1: tensor<512x1024x1x1xf32>) -> tensor<8x512x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x512x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x512x14x14xf32>) -> tensor<8x512x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x1024x28x28xf32>, tensor<512x1024x1x1xf32>) outs(%10 : tensor<8x512x14x14xf32>) -> tensor<8x512x14x14xf32> - util.return %11 : tensor<8x512x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.mlir deleted file mode 100644 index dbdd40c..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x256x16x16xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<8x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x256x14x14xf32>) -> tensor<8x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x256x16x16xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<8x256x14x14xf32>) -> tensor<8x256x14x14xf32> - util.return %11 : tensor<8x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index d8c468b..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x14x14x256x3x3x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x256x30x30xf32>, %arg1: tensor<256x256x3x3xf32>) -> tensor<8x256x14x14xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x256x14x14xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x256x14x14xf32>) -> tensor<8x256x14x14xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x256x30x30xf32>, tensor<256x256x3x3xf32>) outs(%10 : tensor<8x256x14x14xf32>) -> tensor<8x256x14x14xf32> - util.return %11 : tensor<8x256x14x14xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.mlir deleted file mode 100644 index 4ff5f80..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x128x30x30xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<8x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x128x28x28xf32>) -> tensor<8x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x128x30x30xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<8x128x28x28xf32>) -> tensor<8x128x28x28xf32> - util.return %11 : tensor<8x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.mlir deleted file mode 100644 index 511eb75..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x28x28x128x3x3x128_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x128x58x58xf32>, %arg1: tensor<128x128x3x3xf32>) -> tensor<8x128x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x128x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x128x28x28xf32>) -> tensor<8x128x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x128x58x58xf32>, tensor<128x128x3x3xf32>) outs(%10 : tensor<8x128x28x28xf32>) -> tensor<8x128x28x28xf32> - util.return %11 : tensor<8x128x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.mlir deleted file mode 100644 index c318b1f..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x28x28x512x1x1x256_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x512x56x56xf32>, %arg1: tensor<256x512x1x1xf32>) -> tensor<8x256x28x28xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x256x28x28xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x256x28x28xf32>) -> tensor<8x256x28x28xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x512x56x56xf32>, tensor<256x512x1x1xf32>) outs(%10 : tensor<8x256x28x28xf32>) -> tensor<8x256x28x28xf32> - util.return %11 : tensor<8x256x28x28xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.mlir deleted file mode 100644 index ce3336c..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x56x56x64x3x3x64_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x64x58x58xf32>, %arg1: tensor<64x64x3x3xf32>) -> tensor<8x64x56x56xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x64x56x56xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x64x56x56xf32>) -> tensor<8x64x56x56xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x64x58x58xf32>, tensor<64x64x3x3xf32>) outs(%10 : tensor<8x64x56x56xf32>) -> tensor<8x64x56x56xf32> - util.return %11 : tensor<8x64x56x56xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir deleted file mode 100644 index e910d9d..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x7x7x2048x1x1x1024_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x2048x14x14xf32>, %arg1: tensor<1024x2048x1x1xf32>) -> tensor<8x1024x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x1024x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x1024x7x7xf32>) -> tensor<8x1024x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x2048x14x14xf32>, tensor<1024x2048x1x1xf32>) outs(%10 : tensor<8x1024x7x7xf32>) -> tensor<8x1024x7x7xf32> - util.return %11 : tensor<8x1024x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.mlir b/conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.mlir deleted file mode 100644 index e2700fc..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride1.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x512x9x9xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<8x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x512x7x7xf32>) -> tensor<8x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x512x9x9xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<8x512x7x7xf32>) -> tensor<8x512x7x7xf32> - util.return %11 : tensor<8x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.mlir b/conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.mlir deleted file mode 100644 index 4b6bdae..0000000 --- a/conv/mlir/conv_2d_nchw_fchw_8x7x7x512x3x3x512_f32xf32xf32_stride2.mlir +++ /dev/null @@ -1,7 +0,0 @@ -util.func public @main(%arg0: tensor<8x512x16x16xf32>, %arg1: tensor<512x512x3x3xf32>) -> tensor<8x512x7x7xf32> { - %cst = arith.constant 0.0 : f32 - %9 = tensor.empty() : tensor<8x512x7x7xf32> - %10 = linalg.fill ins(%cst : f32) outs(%9 : tensor<8x512x7x7xf32>) -> tensor<8x512x7x7xf32> - %11 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1 : tensor<8x512x16x16xf32>, tensor<512x512x3x3xf32>) outs(%10 : tensor<8x512x7x7xf32>) -> tensor<8x512x7x7xf32> - util.return %11 : tensor<8x512x7x7xf32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.mlir deleted file mode 100644 index 51aa523..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x112x112x64x7x7x3_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x230x230x64xi8>, %arg1: tensor<7x7x64x3xi8>) -> tensor<16x112x112x3xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x112x112x3xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x112x112x3xi32>) -> tensor<16x112x112x3xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x230x230x64xi8>, tensor<7x7x64x3xi8>, i32, i32) outs(%10 : tensor<16x112x112x3xi32>) -> tensor<16x112x112x3xi32> - util.return %11 : tensor<16x112x112x3xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 901f76d..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x28x28x1024xi8>, %arg1: tensor<1x1x1024x512xi8>) -> tensor<16x14x14x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x14x14x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x14x14x512xi32>) -> tensor<16x14x14x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x28x28x1024xi8>, tensor<1x1x1024x512xi8>, i32, i32) outs(%10 : tensor<16x14x14x512xi32>) -> tensor<16x14x14x512xi32> - util.return %11 : tensor<16x14x14x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.mlir deleted file mode 100644 index 9cccdc5..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x16x16x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<16x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x14x14x256xi32>) -> tensor<16x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x16x16x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<16x14x14x256xi32>) -> tensor<16x14x14x256xi32> - util.return %11 : tensor<16x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 5ef0460..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x14x14x256x3x3x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x30x30x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<16x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x14x14x256xi32>) -> tensor<16x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x30x30x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<16x14x14x256xi32>) -> tensor<16x14x14x256xi32> - util.return %11 : tensor<16x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.mlir deleted file mode 100644 index 39443af..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x30x30x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<16x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x28x28x128xi32>) -> tensor<16x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x30x30x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<16x28x28x128xi32>) -> tensor<16x28x28x128xi32> - util.return %11 : tensor<16x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.mlir deleted file mode 100644 index 1b2c1e8..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x128x3x3x128_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x58x58x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<16x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x28x28x128xi32>) -> tensor<16x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x58x58x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<16x28x28x128xi32>) -> tensor<16x28x28x128xi32> - util.return %11 : tensor<16x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index dc625e6..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x28x28x512x1x1x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x56x56x512xi8>, %arg1: tensor<1x1x512x256xi8>) -> tensor<16x28x28x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x28x28x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x28x28x256xi32>) -> tensor<16x28x28x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x56x56x512xi8>, tensor<1x1x512x256xi8>, i32, i32) outs(%10 : tensor<16x28x28x256xi32>) -> tensor<16x28x28x256xi32> - util.return %11 : tensor<16x28x28x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.mlir deleted file mode 100644 index ada033c..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x56x56x64x3x3x64_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x58x58x64xi8>, %arg1: tensor<3x3x64x64xi8>) -> tensor<16x56x56x64xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x56x56x64xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x56x56x64xi32>) -> tensor<16x56x56x64xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x58x58x64xi8>, tensor<3x3x64x64xi8>, i32, i32) outs(%10 : tensor<16x56x56x64xi32>) -> tensor<16x56x56x64xi32> - util.return %11 : tensor<16x56x56x64xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir deleted file mode 100644 index d0273cb..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x14x14x2048xi8>, %arg1: tensor<1x1x2048x1024xi8>) -> tensor<16x7x7x1024xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x7x7x1024xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x7x7x1024xi32>) -> tensor<16x7x7x1024xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x14x14x2048xi8>, tensor<1x1x2048x1024xi8>, i32, i32) outs(%10 : tensor<16x7x7x1024xi32>) -> tensor<16x7x7x1024xi32> - util.return %11 : tensor<16x7x7x1024xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.mlir deleted file mode 100644 index 79d6bd8..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x9x9x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<16x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x7x7x512xi32>) -> tensor<16x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x9x9x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<16x7x7x512xi32>) -> tensor<16x7x7x512xi32> - util.return %11 : tensor<16x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index d815eba..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_16x7x7x512x3x3x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<16x16x16x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<16x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<16x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<16x7x7x512xi32>) -> tensor<16x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<16x16x16x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<16x7x7x512xi32>) -> tensor<16x7x7x512xi32> - util.return %11 : tensor<16x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.mlir deleted file mode 100644 index 8f8aef5..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x112x112x64x7x7x3_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x230x230x64xi8>, %arg1: tensor<7x7x64x3xi8>) -> tensor<1x112x112x3xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x112x112x3xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x112x112x3xi32>) -> tensor<1x112x112x3xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x230x230x64xi8>, tensor<7x7x64x3xi8>, i32, i32) outs(%10 : tensor<1x112x112x3xi32>) -> tensor<1x112x112x3xi32> - util.return %11 : tensor<1x112x112x3xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 22483c7..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x28x28x1024xi8>, %arg1: tensor<1x1x1024x512xi8>) -> tensor<1x14x14x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x14x14x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x14x14x512xi32>) -> tensor<1x14x14x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x28x28x1024xi8>, tensor<1x1x1024x512xi8>, i32, i32) outs(%10 : tensor<1x14x14x512xi32>) -> tensor<1x14x14x512xi32> - util.return %11 : tensor<1x14x14x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.mlir deleted file mode 100644 index 0f680d6..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x16x16x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<1x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x14x14x256xi32>) -> tensor<1x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x16x16x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<1x14x14x256xi32>) -> tensor<1x14x14x256xi32> - util.return %11 : tensor<1x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index ac0f447..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x14x14x256x3x3x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x30x30x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<1x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x14x14x256xi32>) -> tensor<1x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x30x30x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<1x14x14x256xi32>) -> tensor<1x14x14x256xi32> - util.return %11 : tensor<1x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.mlir deleted file mode 100644 index d26a99b..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x30x30x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<1x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x28x28x128xi32>) -> tensor<1x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x30x30x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<1x28x28x128xi32>) -> tensor<1x28x28x128xi32> - util.return %11 : tensor<1x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.mlir deleted file mode 100644 index 5b5d2e7..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x128x3x3x128_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x58x58x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<1x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x28x28x128xi32>) -> tensor<1x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x58x58x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<1x28x28x128xi32>) -> tensor<1x28x28x128xi32> - util.return %11 : tensor<1x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index e8d1574..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x28x28x512x1x1x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x56x56x512xi8>, %arg1: tensor<1x1x512x256xi8>) -> tensor<1x28x28x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x28x28x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x28x28x256xi32>) -> tensor<1x28x28x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x56x56x512xi8>, tensor<1x1x512x256xi8>, i32, i32) outs(%10 : tensor<1x28x28x256xi32>) -> tensor<1x28x28x256xi32> - util.return %11 : tensor<1x28x28x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.mlir deleted file mode 100644 index f33d50a..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x56x56x64x3x3x64_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x58x58x64xi8>, %arg1: tensor<3x3x64x64xi8>) -> tensor<1x56x56x64xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x56x56x64xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x56x56x64xi32>) -> tensor<1x56x56x64xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x58x58x64xi8>, tensor<3x3x64x64xi8>, i32, i32) outs(%10 : tensor<1x56x56x64xi32>) -> tensor<1x56x56x64xi32> - util.return %11 : tensor<1x56x56x64xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir deleted file mode 100644 index ba91ced..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x14x14x2048xi8>, %arg1: tensor<1x1x2048x1024xi8>) -> tensor<1x7x7x1024xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x7x7x1024xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x7x7x1024xi32>) -> tensor<1x7x7x1024xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x14x14x2048xi8>, tensor<1x1x2048x1024xi8>, i32, i32) outs(%10 : tensor<1x7x7x1024xi32>) -> tensor<1x7x7x1024xi32> - util.return %11 : tensor<1x7x7x1024xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.mlir deleted file mode 100644 index 9b3d433..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x9x9x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<1x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x7x7x512xi32>) -> tensor<1x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x9x9x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<1x7x7x512xi32>) -> tensor<1x7x7x512xi32> - util.return %11 : tensor<1x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 57902ba..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_1x7x7x512x3x3x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<1x16x16x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<1x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<1x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<1x7x7x512xi32>) -> tensor<1x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<1x16x16x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<1x7x7x512xi32>) -> tensor<1x7x7x512xi32> - util.return %11 : tensor<1x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.mlir deleted file mode 100644 index 980db88..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x112x112x64x7x7x3_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x230x230x64xi8>, %arg1: tensor<7x7x64x3xi8>) -> tensor<2x112x112x3xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x112x112x3xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x112x112x3xi32>) -> tensor<2x112x112x3xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x230x230x64xi8>, tensor<7x7x64x3xi8>, i32, i32) outs(%10 : tensor<2x112x112x3xi32>) -> tensor<2x112x112x3xi32> - util.return %11 : tensor<2x112x112x3xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 17c720e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x28x28x1024xi8>, %arg1: tensor<1x1x1024x512xi8>) -> tensor<2x14x14x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x14x14x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x14x14x512xi32>) -> tensor<2x14x14x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x28x28x1024xi8>, tensor<1x1x1024x512xi8>, i32, i32) outs(%10 : tensor<2x14x14x512xi32>) -> tensor<2x14x14x512xi32> - util.return %11 : tensor<2x14x14x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.mlir deleted file mode 100644 index 07690ae..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x16x16x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<2x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x14x14x256xi32>) -> tensor<2x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x16x16x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<2x14x14x256xi32>) -> tensor<2x14x14x256xi32> - util.return %11 : tensor<2x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 0a72f46..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x14x14x256x3x3x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x30x30x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<2x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x14x14x256xi32>) -> tensor<2x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x30x30x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<2x14x14x256xi32>) -> tensor<2x14x14x256xi32> - util.return %11 : tensor<2x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.mlir deleted file mode 100644 index 701dfa9..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x30x30x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<2x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x28x28x128xi32>) -> tensor<2x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x30x30x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<2x28x28x128xi32>) -> tensor<2x28x28x128xi32> - util.return %11 : tensor<2x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.mlir deleted file mode 100644 index fb40589..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x128x3x3x128_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x58x58x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<2x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x28x28x128xi32>) -> tensor<2x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x58x58x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<2x28x28x128xi32>) -> tensor<2x28x28x128xi32> - util.return %11 : tensor<2x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 96af59a..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x28x28x512x1x1x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x56x56x512xi8>, %arg1: tensor<1x1x512x256xi8>) -> tensor<2x28x28x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x28x28x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x28x28x256xi32>) -> tensor<2x28x28x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x56x56x512xi8>, tensor<1x1x512x256xi8>, i32, i32) outs(%10 : tensor<2x28x28x256xi32>) -> tensor<2x28x28x256xi32> - util.return %11 : tensor<2x28x28x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.mlir deleted file mode 100644 index be5037c..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x56x56x64x3x3x64_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x58x58x64xi8>, %arg1: tensor<3x3x64x64xi8>) -> tensor<2x56x56x64xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x56x56x64xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x56x56x64xi32>) -> tensor<2x56x56x64xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x58x58x64xi8>, tensor<3x3x64x64xi8>, i32, i32) outs(%10 : tensor<2x56x56x64xi32>) -> tensor<2x56x56x64xi32> - util.return %11 : tensor<2x56x56x64xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir deleted file mode 100644 index 630dbb4..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x14x14x2048xi8>, %arg1: tensor<1x1x2048x1024xi8>) -> tensor<2x7x7x1024xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x7x7x1024xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x7x7x1024xi32>) -> tensor<2x7x7x1024xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x14x14x2048xi8>, tensor<1x1x2048x1024xi8>, i32, i32) outs(%10 : tensor<2x7x7x1024xi32>) -> tensor<2x7x7x1024xi32> - util.return %11 : tensor<2x7x7x1024xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.mlir deleted file mode 100644 index 63fddc7..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x9x9x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<2x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x7x7x512xi32>) -> tensor<2x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x9x9x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<2x7x7x512xi32>) -> tensor<2x7x7x512xi32> - util.return %11 : tensor<2x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 3a7790a..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_2x7x7x512x3x3x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<2x16x16x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<2x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<2x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<2x7x7x512xi32>) -> tensor<2x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<2x16x16x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<2x7x7x512xi32>) -> tensor<2x7x7x512xi32> - util.return %11 : tensor<2x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.mlir deleted file mode 100644 index b402202..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x112x112x64x7x7x3_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x230x230x64xi8>, %arg1: tensor<7x7x64x3xi8>) -> tensor<32x112x112x3xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x112x112x3xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x112x112x3xi32>) -> tensor<32x112x112x3xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x230x230x64xi8>, tensor<7x7x64x3xi8>, i32, i32) outs(%10 : tensor<32x112x112x3xi32>) -> tensor<32x112x112x3xi32> - util.return %11 : tensor<32x112x112x3xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index c74cd1c..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x28x28x1024xi8>, %arg1: tensor<1x1x1024x512xi8>) -> tensor<32x14x14x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x14x14x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x14x14x512xi32>) -> tensor<32x14x14x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x28x28x1024xi8>, tensor<1x1x1024x512xi8>, i32, i32) outs(%10 : tensor<32x14x14x512xi32>) -> tensor<32x14x14x512xi32> - util.return %11 : tensor<32x14x14x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.mlir deleted file mode 100644 index 22ae40a..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x16x16x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<32x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x14x14x256xi32>) -> tensor<32x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x16x16x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<32x14x14x256xi32>) -> tensor<32x14x14x256xi32> - util.return %11 : tensor<32x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 4526e5e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x14x14x256x3x3x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x30x30x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<32x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x14x14x256xi32>) -> tensor<32x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x30x30x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<32x14x14x256xi32>) -> tensor<32x14x14x256xi32> - util.return %11 : tensor<32x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.mlir deleted file mode 100644 index 9e6be94..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x30x30x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<32x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x28x28x128xi32>) -> tensor<32x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x30x30x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<32x28x28x128xi32>) -> tensor<32x28x28x128xi32> - util.return %11 : tensor<32x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.mlir deleted file mode 100644 index bc1357a..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x128x3x3x128_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x58x58x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<32x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x28x28x128xi32>) -> tensor<32x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x58x58x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<32x28x28x128xi32>) -> tensor<32x28x28x128xi32> - util.return %11 : tensor<32x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 769904d..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x28x28x512x1x1x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x56x56x512xi8>, %arg1: tensor<1x1x512x256xi8>) -> tensor<32x28x28x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x28x28x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x28x28x256xi32>) -> tensor<32x28x28x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x56x56x512xi8>, tensor<1x1x512x256xi8>, i32, i32) outs(%10 : tensor<32x28x28x256xi32>) -> tensor<32x28x28x256xi32> - util.return %11 : tensor<32x28x28x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.mlir deleted file mode 100644 index 91cfcb4..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x56x56x64x3x3x64_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x58x58x64xi8>, %arg1: tensor<3x3x64x64xi8>) -> tensor<32x56x56x64xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x56x56x64xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x56x56x64xi32>) -> tensor<32x56x56x64xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x58x58x64xi8>, tensor<3x3x64x64xi8>, i32, i32) outs(%10 : tensor<32x56x56x64xi32>) -> tensor<32x56x56x64xi32> - util.return %11 : tensor<32x56x56x64xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir deleted file mode 100644 index 8cd1114..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x14x14x2048xi8>, %arg1: tensor<1x1x2048x1024xi8>) -> tensor<32x7x7x1024xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x7x7x1024xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x7x7x1024xi32>) -> tensor<32x7x7x1024xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x14x14x2048xi8>, tensor<1x1x2048x1024xi8>, i32, i32) outs(%10 : tensor<32x7x7x1024xi32>) -> tensor<32x7x7x1024xi32> - util.return %11 : tensor<32x7x7x1024xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.mlir deleted file mode 100644 index 2ae453e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x9x9x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<32x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x7x7x512xi32>) -> tensor<32x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x9x9x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<32x7x7x512xi32>) -> tensor<32x7x7x512xi32> - util.return %11 : tensor<32x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 58c5b1e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_32x7x7x512x3x3x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<32x16x16x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<32x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<32x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<32x7x7x512xi32>) -> tensor<32x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<32x16x16x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<32x7x7x512xi32>) -> tensor<32x7x7x512xi32> - util.return %11 : tensor<32x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.mlir deleted file mode 100644 index 2c8253e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x112x112x64x7x7x3_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x230x230x64xi8>, %arg1: tensor<7x7x64x3xi8>) -> tensor<48x112x112x3xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x112x112x3xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x112x112x3xi32>) -> tensor<48x112x112x3xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x230x230x64xi8>, tensor<7x7x64x3xi8>, i32, i32) outs(%10 : tensor<48x112x112x3xi32>) -> tensor<48x112x112x3xi32> - util.return %11 : tensor<48x112x112x3xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 5289794..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x28x28x1024xi8>, %arg1: tensor<1x1x1024x512xi8>) -> tensor<48x14x14x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x14x14x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x14x14x512xi32>) -> tensor<48x14x14x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x28x28x1024xi8>, tensor<1x1x1024x512xi8>, i32, i32) outs(%10 : tensor<48x14x14x512xi32>) -> tensor<48x14x14x512xi32> - util.return %11 : tensor<48x14x14x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.mlir deleted file mode 100644 index 3f1f5dd..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x16x16x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<48x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x14x14x256xi32>) -> tensor<48x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x16x16x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<48x14x14x256xi32>) -> tensor<48x14x14x256xi32> - util.return %11 : tensor<48x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 8a9306c..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x14x14x256x3x3x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x30x30x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<48x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x14x14x256xi32>) -> tensor<48x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x30x30x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<48x14x14x256xi32>) -> tensor<48x14x14x256xi32> - util.return %11 : tensor<48x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.mlir deleted file mode 100644 index 30cdd37..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x30x30x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<48x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x28x28x128xi32>) -> tensor<48x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x30x30x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<48x28x28x128xi32>) -> tensor<48x28x28x128xi32> - util.return %11 : tensor<48x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.mlir deleted file mode 100644 index c044d56..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x128x3x3x128_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x58x58x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<48x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x28x28x128xi32>) -> tensor<48x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x58x58x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<48x28x28x128xi32>) -> tensor<48x28x28x128xi32> - util.return %11 : tensor<48x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 78cc002..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x28x28x512x1x1x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x56x56x512xi8>, %arg1: tensor<1x1x512x256xi8>) -> tensor<48x28x28x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x28x28x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x28x28x256xi32>) -> tensor<48x28x28x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x56x56x512xi8>, tensor<1x1x512x256xi8>, i32, i32) outs(%10 : tensor<48x28x28x256xi32>) -> tensor<48x28x28x256xi32> - util.return %11 : tensor<48x28x28x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.mlir deleted file mode 100644 index 5c4839e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x56x56x64x3x3x64_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x58x58x64xi8>, %arg1: tensor<3x3x64x64xi8>) -> tensor<48x56x56x64xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x56x56x64xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x56x56x64xi32>) -> tensor<48x56x56x64xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x58x58x64xi8>, tensor<3x3x64x64xi8>, i32, i32) outs(%10 : tensor<48x56x56x64xi32>) -> tensor<48x56x56x64xi32> - util.return %11 : tensor<48x56x56x64xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir deleted file mode 100644 index 7a087b0..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x14x14x2048xi8>, %arg1: tensor<1x1x2048x1024xi8>) -> tensor<48x7x7x1024xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x7x7x1024xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x7x7x1024xi32>) -> tensor<48x7x7x1024xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x14x14x2048xi8>, tensor<1x1x2048x1024xi8>, i32, i32) outs(%10 : tensor<48x7x7x1024xi32>) -> tensor<48x7x7x1024xi32> - util.return %11 : tensor<48x7x7x1024xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.mlir deleted file mode 100644 index 8873a93..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x9x9x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<48x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x7x7x512xi32>) -> tensor<48x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x9x9x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<48x7x7x512xi32>) -> tensor<48x7x7x512xi32> - util.return %11 : tensor<48x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index b4fa224..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_48x7x7x512x3x3x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<48x16x16x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<48x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<48x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<48x7x7x512xi32>) -> tensor<48x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<48x16x16x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<48x7x7x512xi32>) -> tensor<48x7x7x512xi32> - util.return %11 : tensor<48x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.mlir deleted file mode 100644 index 19cfe1e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x112x112x64x7x7x3_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x230x230x64xi8>, %arg1: tensor<7x7x64x3xi8>) -> tensor<4x112x112x3xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x112x112x3xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x112x112x3xi32>) -> tensor<4x112x112x3xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x230x230x64xi8>, tensor<7x7x64x3xi8>, i32, i32) outs(%10 : tensor<4x112x112x3xi32>) -> tensor<4x112x112x3xi32> - util.return %11 : tensor<4x112x112x3xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index d05f703..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x28x28x1024xi8>, %arg1: tensor<1x1x1024x512xi8>) -> tensor<4x14x14x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x14x14x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x14x14x512xi32>) -> tensor<4x14x14x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x28x28x1024xi8>, tensor<1x1x1024x512xi8>, i32, i32) outs(%10 : tensor<4x14x14x512xi32>) -> tensor<4x14x14x512xi32> - util.return %11 : tensor<4x14x14x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.mlir deleted file mode 100644 index 8087212..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x16x16x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<4x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x14x14x256xi32>) -> tensor<4x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x16x16x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<4x14x14x256xi32>) -> tensor<4x14x14x256xi32> - util.return %11 : tensor<4x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 0757c3d..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x14x14x256x3x3x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x30x30x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<4x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x14x14x256xi32>) -> tensor<4x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x30x30x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<4x14x14x256xi32>) -> tensor<4x14x14x256xi32> - util.return %11 : tensor<4x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.mlir deleted file mode 100644 index 65c9515..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x30x30x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<4x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x28x28x128xi32>) -> tensor<4x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x30x30x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<4x28x28x128xi32>) -> tensor<4x28x28x128xi32> - util.return %11 : tensor<4x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.mlir deleted file mode 100644 index b1977ac..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x128x3x3x128_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x58x58x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<4x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x28x28x128xi32>) -> tensor<4x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x58x58x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<4x28x28x128xi32>) -> tensor<4x28x28x128xi32> - util.return %11 : tensor<4x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 954f14c..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x28x28x512x1x1x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x56x56x512xi8>, %arg1: tensor<1x1x512x256xi8>) -> tensor<4x28x28x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x28x28x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x28x28x256xi32>) -> tensor<4x28x28x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x56x56x512xi8>, tensor<1x1x512x256xi8>, i32, i32) outs(%10 : tensor<4x28x28x256xi32>) -> tensor<4x28x28x256xi32> - util.return %11 : tensor<4x28x28x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.mlir deleted file mode 100644 index 13553f3..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x56x56x64x3x3x64_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x58x58x64xi8>, %arg1: tensor<3x3x64x64xi8>) -> tensor<4x56x56x64xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x56x56x64xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x56x56x64xi32>) -> tensor<4x56x56x64xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x58x58x64xi8>, tensor<3x3x64x64xi8>, i32, i32) outs(%10 : tensor<4x56x56x64xi32>) -> tensor<4x56x56x64xi32> - util.return %11 : tensor<4x56x56x64xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir deleted file mode 100644 index e4c857e..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x14x14x2048xi8>, %arg1: tensor<1x1x2048x1024xi8>) -> tensor<4x7x7x1024xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x7x7x1024xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x7x7x1024xi32>) -> tensor<4x7x7x1024xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x14x14x2048xi8>, tensor<1x1x2048x1024xi8>, i32, i32) outs(%10 : tensor<4x7x7x1024xi32>) -> tensor<4x7x7x1024xi32> - util.return %11 : tensor<4x7x7x1024xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.mlir deleted file mode 100644 index 37b65ed..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x9x9x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<4x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x7x7x512xi32>) -> tensor<4x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x9x9x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<4x7x7x512xi32>) -> tensor<4x7x7x512xi32> - util.return %11 : tensor<4x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 0d84e71..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_4x7x7x512x3x3x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<4x16x16x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<4x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<4x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<4x7x7x512xi32>) -> tensor<4x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<4x16x16x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<4x7x7x512xi32>) -> tensor<4x7x7x512xi32> - util.return %11 : tensor<4x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.mlir deleted file mode 100644 index 40d6ef8..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x112x112x64x7x7x3_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x230x230x64xi8>, %arg1: tensor<7x7x64x3xi8>) -> tensor<8x112x112x3xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x112x112x3xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x112x112x3xi32>) -> tensor<8x112x112x3xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x230x230x64xi8>, tensor<7x7x64x3xi8>, i32, i32) outs(%10 : tensor<8x112x112x3xi32>) -> tensor<8x112x112x3xi32> - util.return %11 : tensor<8x112x112x3xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index c3e3ba6..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x1024x1x1x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x28x28x1024xi8>, %arg1: tensor<1x1x1024x512xi8>) -> tensor<8x14x14x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x14x14x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x14x14x512xi32>) -> tensor<8x14x14x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x28x28x1024xi8>, tensor<1x1x1024x512xi8>, i32, i32) outs(%10 : tensor<8x14x14x512xi32>) -> tensor<8x14x14x512xi32> - util.return %11 : tensor<8x14x14x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.mlir deleted file mode 100644 index d641165..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x16x16x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<8x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x14x14x256xi32>) -> tensor<8x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x16x16x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<8x14x14x256xi32>) -> tensor<8x14x14x256xi32> - util.return %11 : tensor<8x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index 9d5b6a7..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x14x14x256x3x3x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x30x30x256xi8>, %arg1: tensor<3x3x256x256xi8>) -> tensor<8x14x14x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x14x14x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x14x14x256xi32>) -> tensor<8x14x14x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x30x30x256xi8>, tensor<3x3x256x256xi8>, i32, i32) outs(%10 : tensor<8x14x14x256xi32>) -> tensor<8x14x14x256xi32> - util.return %11 : tensor<8x14x14x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.mlir deleted file mode 100644 index c65bba4..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x30x30x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<8x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x28x28x128xi32>) -> tensor<8x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x30x30x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<8x28x28x128xi32>) -> tensor<8x28x28x128xi32> - util.return %11 : tensor<8x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.mlir deleted file mode 100644 index 94378ef..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x128x3x3x128_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x58x58x128xi8>, %arg1: tensor<3x3x128x128xi8>) -> tensor<8x28x28x128xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x28x28x128xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x28x28x128xi32>) -> tensor<8x28x28x128xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x58x58x128xi8>, tensor<3x3x128x128xi8>, i32, i32) outs(%10 : tensor<8x28x28x128xi32>) -> tensor<8x28x28x128xi32> - util.return %11 : tensor<8x28x28x128xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.mlir deleted file mode 100644 index cfed53f..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x28x28x512x1x1x256_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x56x56x512xi8>, %arg1: tensor<1x1x512x256xi8>) -> tensor<8x28x28x256xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x28x28x256xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x28x28x256xi32>) -> tensor<8x28x28x256xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x56x56x512xi8>, tensor<1x1x512x256xi8>, i32, i32) outs(%10 : tensor<8x28x28x256xi32>) -> tensor<8x28x28x256xi32> - util.return %11 : tensor<8x28x28x256xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.mlir deleted file mode 100644 index 5bca844..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x56x56x64x3x3x64_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x58x58x64xi8>, %arg1: tensor<3x3x64x64xi8>) -> tensor<8x56x56x64xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x56x56x64xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x56x56x64xi32>) -> tensor<8x56x56x64xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x58x58x64xi8>, tensor<3x3x64x64xi8>, i32, i32) outs(%10 : tensor<8x56x56x64xi32>) -> tensor<8x56x56x64xi32> - util.return %11 : tensor<8x56x56x64xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir deleted file mode 100644 index 6dba97c..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x2048x1x1x1024_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x14x14x2048xi8>, %arg1: tensor<1x1x2048x1024xi8>) -> tensor<8x7x7x1024xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x7x7x1024xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x7x7x1024xi32>) -> tensor<8x7x7x1024xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x14x14x2048xi8>, tensor<1x1x2048x1024xi8>, i32, i32) outs(%10 : tensor<8x7x7x1024xi32>) -> tensor<8x7x7x1024xi32> - util.return %11 : tensor<8x7x7x1024xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.mlir deleted file mode 100644 index 1c218cf..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride1.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x9x9x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<8x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x7x7x512xi32>) -> tensor<8x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x9x9x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<8x7x7x512xi32>) -> tensor<8x7x7x512xi32> - util.return %11 : tensor<8x7x7x512xi32> -} diff --git a/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.mlir b/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.mlir deleted file mode 100644 index 25d0638..0000000 --- a/conv/mlir/conv_2d_nhwc_hwcf_q_8x7x7x512x3x3x512_i8xi8xi32_stride2.mlir +++ /dev/null @@ -1,8 +0,0 @@ -util.func public @main(%arg0: tensor<8x16x16x512xi8>, %arg1: tensor<3x3x512x512xi8>) -> tensor<8x7x7x512xi32> { - %cst = arith.constant 0 : i32 - %9 = tensor.empty() : tensor<8x7x7x512xi32> - %10 = linalg.fill ins(%cst : i32) outs(%9 : tensor<8x7x7x512xi32>) -> tensor<8x7x7x512xi32> - %c0_i32 = arith.constant 0 : i32 - %11 = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%arg0, %arg1, %c0_i32, %c0_i32 : tensor<8x16x16x512xi8>, tensor<3x3x512x512xi8>, i32, i32) outs(%10 : tensor<8x7x7x512xi32>) -> tensor<8x7x7x512xi32> - util.return %11 : tensor<8x7x7x512xi32> -} diff --git a/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir deleted file mode 100644 index 2f56e73..0000000 --- a/gemm/mlir/gemm_10240_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<10240x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<10240x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x16xbf16>) -> tensor<10240x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<10240x16xbf16>) -> tensor<10240x16xbf16> - return %2 : tensor<10240x16xbf16> - } -} diff --git a/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir deleted file mode 100644 index 78c8d49..0000000 --- a/gemm/mlir/gemm_10240_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x16xf16>) -> tensor<10240x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<10240x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x16xf16>) -> tensor<10240x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x16xf16>) outs(%1 : tensor<10240x16xf16>) -> tensor<10240x16xf16> - return %2 : tensor<10240x16xf16> - } -} diff --git a/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir deleted file mode 100644 index ad452e2..0000000 --- a/gemm/mlir/gemm_10240_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<10240x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<10240x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x1xbf16>) -> tensor<10240x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<10240x1xbf16>) -> tensor<10240x1xbf16> - return %2 : tensor<10240x1xbf16> - } -} diff --git a/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir deleted file mode 100644 index 71b8145..0000000 --- a/gemm/mlir/gemm_10240_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x1xf16>) -> tensor<10240x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<10240x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x1xf16>) -> tensor<10240x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x1xf16>) outs(%1 : tensor<10240x1xf16>) -> tensor<10240x1xf16> - return %2 : tensor<10240x1xf16> - } -} diff --git a/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir deleted file mode 100644 index b0f1298..0000000 --- a/gemm/mlir/gemm_10240_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<10240x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<10240x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x2xbf16>) -> tensor<10240x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<10240x2xbf16>) -> tensor<10240x2xbf16> - return %2 : tensor<10240x2xbf16> - } -} diff --git a/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir deleted file mode 100644 index 273354c..0000000 --- a/gemm/mlir/gemm_10240_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x2xf16>) -> tensor<10240x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<10240x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x2xf16>) -> tensor<10240x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x2xf16>) outs(%1 : tensor<10240x2xf16>) -> tensor<10240x2xf16> - return %2 : tensor<10240x2xf16> - } -} diff --git a/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir deleted file mode 100644 index f1ec0ed..0000000 --- a/gemm/mlir/gemm_10240_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<10240x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<10240x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x32xbf16>) -> tensor<10240x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<10240x32xbf16>) -> tensor<10240x32xbf16> - return %2 : tensor<10240x32xbf16> - } -} diff --git a/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir deleted file mode 100644 index 3a3e10a..0000000 --- a/gemm/mlir/gemm_10240_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x32xf16>) -> tensor<10240x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<10240x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x32xf16>) -> tensor<10240x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x32xf16>) outs(%1 : tensor<10240x32xf16>) -> tensor<10240x32xf16> - return %2 : tensor<10240x32xf16> - } -} diff --git a/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir deleted file mode 100644 index 2b73883..0000000 --- a/gemm/mlir/gemm_10240_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<10240x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<10240x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x4xbf16>) -> tensor<10240x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<10240x4xbf16>) -> tensor<10240x4xbf16> - return %2 : tensor<10240x4xbf16> - } -} diff --git a/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir deleted file mode 100644 index 2a97ec8..0000000 --- a/gemm/mlir/gemm_10240_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x4xf16>) -> tensor<10240x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<10240x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x4xf16>) -> tensor<10240x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x4xf16>) outs(%1 : tensor<10240x4xf16>) -> tensor<10240x4xf16> - return %2 : tensor<10240x4xf16> - } -} diff --git a/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir deleted file mode 100644 index a5c4f70..0000000 --- a/gemm/mlir/gemm_10240_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<10240x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<10240x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<10240x8xbf16>) -> tensor<10240x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<10240x8xbf16>) -> tensor<10240x8xbf16> - return %2 : tensor<10240x8xbf16> - } -} diff --git a/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir b/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir deleted file mode 100644 index 96ca8f3..0000000 --- a/gemm/mlir/gemm_10240_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x10240xf16>, %arg1: tensor<8192x8xf16>) -> tensor<10240x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<10240x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<10240x8xf16>) -> tensor<10240x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x10240xf16>, tensor<8192x8xf16>) outs(%1 : tensor<10240x8xf16>) -> tensor<10240x8xf16> - return %2 : tensor<10240x8xf16> - } -} diff --git a/gemm/mlir/gemm_1024_5120_640_f16_tB.mlir b/gemm/mlir/gemm_1024_5120_640_f16_tB.mlir deleted file mode 100644 index 7e31313..0000000 --- a/gemm/mlir/gemm_1024_5120_640_f16_tB.mlir +++ /dev/null @@ -1,145 +0,0 @@ -#translation = #iree_codegen.translation_info -module attributes {transform.with_named_sequence} { - stream.executable private @gemm { - stream.executable.export public @gemm workgroups() -> (index, index, index) { - %c16 = arith.constant 16 : index - %c80 = arith.constant 80 : index - %c1 = arith.constant 1 : index - stream.return %c16, %c80, %c1 : index, index, index - } - builtin.module { - func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) attributes {translation_info = #translation} { - %c19 = arith.constant 19 : index - %c18 = arith.constant 18 : index - %c17 = arith.constant 17 : index - %c3 = arith.constant 3 : index - %c2 = arith.constant 2 : index - %c16 = arith.constant 16 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c32 = arith.constant 32 : index - %c64 = arith.constant 64 : index - %c1 = arith.constant 1 : index - %c20 = arith.constant 20 : index - %c0 = arith.constant 0 : index - %cst = arith.constant dense<0.000000e+00> : vector<4xf32> - %workgroup_id_0 = stream.dispatch.workgroup.id[0] : index - %workgroup_id_1 = stream.dispatch.workgroup.id[1] : index - %thread_id_x = gpu.thread_id x - %thread_id_y = gpu.thread_id y - %alloc = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %alloc_0 = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> memref<1024x640xf16, strided<[640, 1], offset: ?>> - %1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> memref<5120x640xf16, strided<[640, 1], offset: ?>> - %2 = arith.muli %workgroup_id_0, %c64 : index - %3 = arith.muli %thread_id_y, %c32 : index - %4 = arith.divsi %thread_id_x, %c4 : index - %5 = arith.addi %4, %3 : index - %6 = arith.remsi %5, %c64 : index - %7 = arith.addi %6, %2 : index - %8 = arith.remsi %thread_id_x, %c4 : index - %9 = arith.muli %8, %c8 : index - %10 = arith.divsi %thread_id_x, %c64 : index - %11 = arith.muli %10, %c32 : index - %12 = arith.remsi %thread_id_x, %c16 : index - %13 = arith.addi %12, %11 : index - %14 = arith.remsi %thread_id_x, %c64 : index - %15 = arith.divsi %14, %c16 : index - %16 = arith.muli %15, %c4 : index - %17 = arith.addi %16, %c16 : index - %18 = arith.addi %13, %c16 : index - %19 = arith.muli %workgroup_id_1, %c64 : index - %20 = arith.addi %6, %19 : index - %21 = arith.addi %12, %3 : index - %22 = arith.addi %21, %c16 : index - %23:4 = scf.for %arg3 = %c0 to %c20 step %c1 iter_args(%arg4 = %cst, %arg5 = %cst, %arg6 = %cst, %arg7 = %cst) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { - %62 = arith.muli %arg3, %c32 : index - %63 = arith.addi %62, %9 : index - %64 = vector.load %0[%7, %63] : memref<1024x640xf16, strided<[640, 1], offset: ?>>, vector<8xf16> - vector.store %64, %alloc[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %65 = vector.load %alloc[%13, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %66 = vector.load %alloc[%13, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %67 = vector.load %alloc[%18, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %68 = vector.load %alloc[%18, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %69 = vector.load %1[%20, %63] : memref<5120x640xf16, strided<[640, 1], offset: ?>>, vector<8xf16> - amdgpu.lds_barrier - vector.store %69, %alloc_0[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %70 = vector.load %alloc_0[%21, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %71 = vector.load %alloc_0[%21, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %72 = vector.load %alloc_0[%22, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %73 = vector.load %alloc_0[%22, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %74 = amdgpu.mfma %65 * %70 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %75 = amdgpu.mfma %66 * %71 + %74 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %76 = amdgpu.mfma %67 * %72 + %arg7 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %77 = amdgpu.mfma %68 * %73 + %76 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %78 = amdgpu.mfma %67 * %70 + %arg6 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %79 = amdgpu.mfma %68 * %71 + %78 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %80 = amdgpu.mfma %65 * %72 + %arg5 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %81 = amdgpu.mfma %66 * %73 + %80 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - scf.yield %75, %81, %79, %77 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> - } - %24 = vector.extract_strided_slice %23#0 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %25 = stream.binding.subspan %arg2[%c0] : !stream.binding -> memref<1024x5120xf32, strided<[5120, 1], offset: ?>> - %26 = arith.remsi %thread_id_x, %c64 : index - %27 = arith.divsi %26, %c16 : index - %28 = arith.muli %27, %c4 : index - %29 = arith.divsi %thread_id_x, %c64 : index - %30 = arith.muli %29, %c32 : index - %31 = arith.muli %workgroup_id_0, %c64 : index - %32 = arith.addi %31, %30 : index - %33 = arith.addi %32, %28 : index - %34 = arith.muli %thread_id_y, %c32 : index - %35 = arith.muli %workgroup_id_1, %c64 : index - %36 = arith.remsi %thread_id_x, %c16 : index - %37 = arith.addi %36, %35 : index - %38 = arith.addi %37, %34 : index - vector.store %24, %25[%33, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %39 = vector.extract_strided_slice %23#0 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %40 = arith.addi %33, %c1 : index - vector.store %39, %25[%40, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %41 = vector.extract_strided_slice %23#0 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %42 = arith.addi %33, %c2 : index - vector.store %41, %25[%42, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %43 = vector.extract_strided_slice %23#0 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %44 = arith.addi %33, %c3 : index - vector.store %43, %25[%44, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %45 = vector.extract_strided_slice %23#3 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %46 = arith.addi %33, %c16 : index - %47 = arith.addi %38, %c16 : index - vector.store %45, %25[%46, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %48 = vector.extract_strided_slice %23#3 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %49 = arith.addi %33, %c17 : index - vector.store %48, %25[%49, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %50 = vector.extract_strided_slice %23#3 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %51 = arith.addi %33, %c18 : index - vector.store %50, %25[%51, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %52 = vector.extract_strided_slice %23#3 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %53 = arith.addi %33, %c19 : index - vector.store %52, %25[%53, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %54 = vector.extract_strided_slice %23#2 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %54, %25[%46, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %55 = vector.extract_strided_slice %23#2 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %55, %25[%49, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %56 = vector.extract_strided_slice %23#2 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %56, %25[%51, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %57 = vector.extract_strided_slice %23#2 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %57, %25[%53, %38] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %58 = vector.extract_strided_slice %23#1 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %58, %25[%33, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %59 = vector.extract_strided_slice %23#1 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %59, %25[%40, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %60 = vector.extract_strided_slice %23#1 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %60, %25[%42, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %61 = vector.extract_strided_slice %23#1 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %61, %25[%44, %47] : memref<1024x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - return - } - } - } - func.func @isolated_benchmark(%arg0: tensor<1024x640xf16>, %arg1: tensor<5120x640xf16>) -> tensor<1024x5120xf32> { - %0 = flow.dispatch @gemm::@gemm(%arg0, %arg1) : (tensor<1024x640xf16>, tensor<5120x640xf16>) -> tensor<1024x5120xf32> - return %0 : tensor<1024x5120xf32> - } -} diff --git a/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir deleted file mode 100644 index 3baa555..0000000 --- a/gemm/mlir/gemm_1280_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<1280x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1280x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x16xbf16>) -> tensor<1280x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<1280x16xbf16>) -> tensor<1280x16xbf16> - return %2 : tensor<1280x16xbf16> - } -} diff --git a/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir deleted file mode 100644 index 3fe4759..0000000 --- a/gemm/mlir/gemm_1280_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x16xf16>) -> tensor<1280x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1280x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x16xf16>) -> tensor<1280x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x16xf16>) outs(%1 : tensor<1280x16xf16>) -> tensor<1280x16xf16> - return %2 : tensor<1280x16xf16> - } -} diff --git a/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir deleted file mode 100644 index 3d2ccc5..0000000 --- a/gemm/mlir/gemm_1280_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<1280x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1280x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x1xbf16>) -> tensor<1280x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<1280x1xbf16>) -> tensor<1280x1xbf16> - return %2 : tensor<1280x1xbf16> - } -} diff --git a/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir deleted file mode 100644 index b723290..0000000 --- a/gemm/mlir/gemm_1280_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x1xf16>) -> tensor<1280x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1280x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x1xf16>) -> tensor<1280x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x1xf16>) outs(%1 : tensor<1280x1xf16>) -> tensor<1280x1xf16> - return %2 : tensor<1280x1xf16> - } -} diff --git a/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir deleted file mode 100644 index 3f23515..0000000 --- a/gemm/mlir/gemm_1280_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<1280x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1280x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x2xbf16>) -> tensor<1280x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<1280x2xbf16>) -> tensor<1280x2xbf16> - return %2 : tensor<1280x2xbf16> - } -} diff --git a/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir deleted file mode 100644 index 32fdd34..0000000 --- a/gemm/mlir/gemm_1280_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x2xf16>) -> tensor<1280x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1280x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x2xf16>) -> tensor<1280x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x2xf16>) outs(%1 : tensor<1280x2xf16>) -> tensor<1280x2xf16> - return %2 : tensor<1280x2xf16> - } -} diff --git a/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir deleted file mode 100644 index e9bf063..0000000 --- a/gemm/mlir/gemm_1280_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<1280x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1280x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x32xbf16>) -> tensor<1280x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<1280x32xbf16>) -> tensor<1280x32xbf16> - return %2 : tensor<1280x32xbf16> - } -} diff --git a/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir deleted file mode 100644 index faf8f1a..0000000 --- a/gemm/mlir/gemm_1280_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x32xf16>) -> tensor<1280x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1280x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x32xf16>) -> tensor<1280x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x32xf16>) outs(%1 : tensor<1280x32xf16>) -> tensor<1280x32xf16> - return %2 : tensor<1280x32xf16> - } -} diff --git a/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir deleted file mode 100644 index d844019..0000000 --- a/gemm/mlir/gemm_1280_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<1280x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1280x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x4xbf16>) -> tensor<1280x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<1280x4xbf16>) -> tensor<1280x4xbf16> - return %2 : tensor<1280x4xbf16> - } -} diff --git a/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir deleted file mode 100644 index f7ead50..0000000 --- a/gemm/mlir/gemm_1280_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x4xf16>) -> tensor<1280x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1280x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x4xf16>) -> tensor<1280x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x4xf16>) outs(%1 : tensor<1280x4xf16>) -> tensor<1280x4xf16> - return %2 : tensor<1280x4xf16> - } -} diff --git a/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir deleted file mode 100644 index 8f2da95..0000000 --- a/gemm/mlir/gemm_1280_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<1280x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1280x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1280x8xbf16>) -> tensor<1280x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<1280x8xbf16>) -> tensor<1280x8xbf16> - return %2 : tensor<1280x8xbf16> - } -} diff --git a/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir b/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir deleted file mode 100644 index 4c96f74..0000000 --- a/gemm/mlir/gemm_1280_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x1280xf16>, %arg1: tensor<8192x8xf16>) -> tensor<1280x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1280x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1280x8xf16>) -> tensor<1280x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x1280xf16>, tensor<8192x8xf16>) outs(%1 : tensor<1280x8xf16>) -> tensor<1280x8xf16> - return %2 : tensor<1280x8xf16> - } -} diff --git a/gemm/mlir/gemm_128_1280_2048_bf16.mlir b/gemm/mlir/gemm_128_1280_2048_bf16.mlir deleted file mode 100644 index c758c9d..0000000 --- a/gemm/mlir/gemm_128_1280_2048_bf16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<128x2048xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<128x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<128x2048xbf16>, tensor<2048x1280xbf16>) outs(%1 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> - return %2 : tensor<128x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir b/gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir deleted file mode 100644 index 0cb012c..0000000 --- a/gemm/mlir/gemm_128_1280_2048_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x128xbf16>, %arg1: tensor<2048x1280xbf16>) -> tensor<128x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<128x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x128xbf16>, tensor<2048x1280xbf16>) outs(%1 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> - return %2 : tensor<128x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir b/gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir deleted file mode 100644 index 32f5e6f..0000000 --- a/gemm/mlir/gemm_128_1280_2048_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<128x2048xbf16>, %arg1: tensor<1280x2048xbf16>) -> tensor<128x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<128x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<128x2048xbf16>, tensor<1280x2048xbf16>) outs(%1 : tensor<128x1280xbf16>) -> tensor<128x1280xbf16> - return %2 : tensor<128x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_128_1280_2048_f16.mlir b/gemm/mlir/gemm_128_1280_2048_f16.mlir deleted file mode 100644 index 84ea04a..0000000 --- a/gemm/mlir/gemm_128_1280_2048_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<128x2048xf16>, %arg1: tensor<2048x1280xf16>) -> tensor<128x1280xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<128x1280xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<128x1280xf16>) -> tensor<128x1280xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<128x2048xf16>, tensor<2048x1280xf16>) outs(%1 : tensor<128x1280xf16>) -> tensor<128x1280xf16> - return %2 : tensor<128x1280xf16> - } -} diff --git a/gemm/mlir/gemm_128_1280_2048_f16_tA.mlir b/gemm/mlir/gemm_128_1280_2048_f16_tA.mlir deleted file mode 100644 index 45cda80..0000000 --- a/gemm/mlir/gemm_128_1280_2048_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x128xf16>, %arg1: tensor<2048x1280xf16>) -> tensor<128x1280xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<128x1280xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<128x1280xf16>) -> tensor<128x1280xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x128xf16>, tensor<2048x1280xf16>) outs(%1 : tensor<128x1280xf16>) -> tensor<128x1280xf16> - return %2 : tensor<128x1280xf16> - } -} diff --git a/gemm/mlir/gemm_128_1280_2048_f16_tB.mlir b/gemm/mlir/gemm_128_1280_2048_f16_tB.mlir deleted file mode 100644 index 785a854..0000000 --- a/gemm/mlir/gemm_128_1280_2048_f16_tB.mlir +++ /dev/null @@ -1,144 +0,0 @@ -#translation = #iree_codegen.translation_info -module attributes {transform.with_named_sequence} { - stream.executable private @gemm { - stream.executable.export public @gemm workgroups() -> (index, index, index) { - %c2 = arith.constant 2 : index - %c20 = arith.constant 20 : index - %c1 = arith.constant 1 : index - stream.return %c2, %c20, %c1 : index, index, index - } - builtin.module { - func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) attributes {translation_info = #translation} { - %c19 = arith.constant 19 : index - %c18 = arith.constant 18 : index - %c17 = arith.constant 17 : index - %c3 = arith.constant 3 : index - %c2 = arith.constant 2 : index - %c16 = arith.constant 16 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c32 = arith.constant 32 : index - %c1 = arith.constant 1 : index - %c64 = arith.constant 64 : index - %c0 = arith.constant 0 : index - %cst = arith.constant dense<0.000000e+00> : vector<4xf32> - %workgroup_id_0 = stream.dispatch.workgroup.id[0] : index - %workgroup_id_1 = stream.dispatch.workgroup.id[1] : index - %thread_id_x = gpu.thread_id x - %thread_id_y = gpu.thread_id y - %alloc = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %alloc_0 = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> memref<128x2048xf16, strided<[2048, 1], offset: ?>> - %1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> memref<1280x2048xf16, strided<[2048, 1], offset: ?>> - %2 = arith.muli %workgroup_id_0, %c64 : index - %3 = arith.muli %thread_id_y, %c32 : index - %4 = arith.divsi %thread_id_x, %c4 : index - %5 = arith.addi %4, %3 : index - %6 = arith.remsi %5, %c64 : index - %7 = arith.addi %6, %2 : index - %8 = arith.remsi %thread_id_x, %c4 : index - %9 = arith.muli %8, %c8 : index - %10 = arith.divsi %thread_id_x, %c64 : index - %11 = arith.muli %10, %c32 : index - %12 = arith.remsi %thread_id_x, %c16 : index - %13 = arith.addi %12, %11 : index - %14 = arith.remsi %thread_id_x, %c64 : index - %15 = arith.divsi %14, %c16 : index - %16 = arith.muli %15, %c4 : index - %17 = arith.addi %16, %c16 : index - %18 = arith.addi %13, %c16 : index - %19 = arith.muli %workgroup_id_1, %c64 : index - %20 = arith.addi %6, %19 : index - %21 = arith.addi %12, %3 : index - %22 = arith.addi %21, %c16 : index - %23:4 = scf.for %arg3 = %c0 to %c64 step %c1 iter_args(%arg4 = %cst, %arg5 = %cst, %arg6 = %cst, %arg7 = %cst) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { - %62 = arith.muli %arg3, %c32 : index - %63 = arith.addi %62, %9 : index - %64 = vector.load %0[%7, %63] : memref<128x2048xf16, strided<[2048, 1], offset: ?>>, vector<8xf16> - vector.store %64, %alloc[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %65 = vector.load %alloc[%13, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %66 = vector.load %alloc[%13, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %67 = vector.load %alloc[%18, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %68 = vector.load %alloc[%18, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %69 = vector.load %1[%20, %63] : memref<1280x2048xf16, strided<[2048, 1], offset: ?>>, vector<8xf16> - amdgpu.lds_barrier - vector.store %69, %alloc_0[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %70 = vector.load %alloc_0[%21, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %71 = vector.load %alloc_0[%21, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %72 = vector.load %alloc_0[%22, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %73 = vector.load %alloc_0[%22, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %74 = amdgpu.mfma %65 * %70 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %75 = amdgpu.mfma %66 * %71 + %74 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %76 = amdgpu.mfma %67 * %72 + %arg7 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %77 = amdgpu.mfma %68 * %73 + %76 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %78 = amdgpu.mfma %67 * %70 + %arg6 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %79 = amdgpu.mfma %68 * %71 + %78 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %80 = amdgpu.mfma %65 * %72 + %arg5 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %81 = amdgpu.mfma %66 * %73 + %80 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - scf.yield %75, %81, %79, %77 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> - } - %24 = vector.extract_strided_slice %23#0 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %25 = stream.binding.subspan %arg2[%c0] : !stream.binding -> memref<128x1280xf32, strided<[1280, 1], offset: ?>> - %26 = arith.remsi %thread_id_x, %c64 : index - %27 = arith.divsi %26, %c16 : index - %28 = arith.muli %27, %c4 : index - %29 = arith.divsi %thread_id_x, %c64 : index - %30 = arith.muli %29, %c32 : index - %31 = arith.muli %workgroup_id_0, %c64 : index - %32 = arith.addi %31, %30 : index - %33 = arith.addi %32, %28 : index - %34 = arith.muli %thread_id_y, %c32 : index - %35 = arith.muli %workgroup_id_1, %c64 : index - %36 = arith.remsi %thread_id_x, %c16 : index - %37 = arith.addi %36, %35 : index - %38 = arith.addi %37, %34 : index - vector.store %24, %25[%33, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %39 = vector.extract_strided_slice %23#0 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %40 = arith.addi %33, %c1 : index - vector.store %39, %25[%40, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %41 = vector.extract_strided_slice %23#0 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %42 = arith.addi %33, %c2 : index - vector.store %41, %25[%42, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %43 = vector.extract_strided_slice %23#0 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %44 = arith.addi %33, %c3 : index - vector.store %43, %25[%44, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %45 = vector.extract_strided_slice %23#3 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %46 = arith.addi %33, %c16 : index - %47 = arith.addi %38, %c16 : index - vector.store %45, %25[%46, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %48 = vector.extract_strided_slice %23#3 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %49 = arith.addi %33, %c17 : index - vector.store %48, %25[%49, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %50 = vector.extract_strided_slice %23#3 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %51 = arith.addi %33, %c18 : index - vector.store %50, %25[%51, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %52 = vector.extract_strided_slice %23#3 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %53 = arith.addi %33, %c19 : index - vector.store %52, %25[%53, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %54 = vector.extract_strided_slice %23#2 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %54, %25[%46, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %55 = vector.extract_strided_slice %23#2 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %55, %25[%49, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %56 = vector.extract_strided_slice %23#2 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %56, %25[%51, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %57 = vector.extract_strided_slice %23#2 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %57, %25[%53, %38] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %58 = vector.extract_strided_slice %23#1 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %58, %25[%33, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %59 = vector.extract_strided_slice %23#1 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %59, %25[%40, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %60 = vector.extract_strided_slice %23#1 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %60, %25[%42, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %61 = vector.extract_strided_slice %23#1 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %61, %25[%44, %47] : memref<128x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - return - } - } - } - func.func @isolated_benchmark(%arg0: tensor<128x2048xf16>, %arg1: tensor<1280x2048xf16>) -> tensor<128x1280xf32> { - %0 = flow.dispatch @gemm::@gemm(%arg0, %arg1) : (tensor<128x2048xf16>, tensor<1280x2048xf16>) -> tensor<128x1280xf32> - return %0 : tensor<128x1280xf32> - } -} diff --git a/gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir deleted file mode 100644 index bf06e53..0000000 --- a/gemm/mlir/gemm_13824_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<13824x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<13824x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x16xbf16>) -> tensor<13824x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<13824x16xbf16>) -> tensor<13824x16xbf16> - return %2 : tensor<13824x16xbf16> - } -} diff --git a/gemm/mlir/gemm_13824_16_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_16_5120_f16_tA.mlir deleted file mode 100644 index 6820445..0000000 --- a/gemm/mlir/gemm_13824_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x16xf16>) -> tensor<13824x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<13824x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x16xf16>) -> tensor<13824x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x16xf16>) outs(%1 : tensor<13824x16xf16>) -> tensor<13824x16xf16> - return %2 : tensor<13824x16xf16> - } -} diff --git a/gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir deleted file mode 100644 index bddc513..0000000 --- a/gemm/mlir/gemm_13824_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<13824x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<13824x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x1xbf16>) -> tensor<13824x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<13824x1xbf16>) -> tensor<13824x1xbf16> - return %2 : tensor<13824x1xbf16> - } -} diff --git a/gemm/mlir/gemm_13824_1_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_1_5120_f16_tA.mlir deleted file mode 100644 index de51690..0000000 --- a/gemm/mlir/gemm_13824_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x1xf16>) -> tensor<13824x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<13824x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x1xf16>) -> tensor<13824x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x1xf16>) outs(%1 : tensor<13824x1xf16>) -> tensor<13824x1xf16> - return %2 : tensor<13824x1xf16> - } -} diff --git a/gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir deleted file mode 100644 index b73977b..0000000 --- a/gemm/mlir/gemm_13824_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<13824x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<13824x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x2xbf16>) -> tensor<13824x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<13824x2xbf16>) -> tensor<13824x2xbf16> - return %2 : tensor<13824x2xbf16> - } -} diff --git a/gemm/mlir/gemm_13824_2_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_2_5120_f16_tA.mlir deleted file mode 100644 index b763847..0000000 --- a/gemm/mlir/gemm_13824_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x2xf16>) -> tensor<13824x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<13824x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x2xf16>) -> tensor<13824x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x2xf16>) outs(%1 : tensor<13824x2xf16>) -> tensor<13824x2xf16> - return %2 : tensor<13824x2xf16> - } -} diff --git a/gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir deleted file mode 100644 index 3be8ecf..0000000 --- a/gemm/mlir/gemm_13824_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<13824x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<13824x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x32xbf16>) -> tensor<13824x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<13824x32xbf16>) -> tensor<13824x32xbf16> - return %2 : tensor<13824x32xbf16> - } -} diff --git a/gemm/mlir/gemm_13824_32_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_32_5120_f16_tA.mlir deleted file mode 100644 index 2069eef..0000000 --- a/gemm/mlir/gemm_13824_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x32xf16>) -> tensor<13824x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<13824x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x32xf16>) -> tensor<13824x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x32xf16>) outs(%1 : tensor<13824x32xf16>) -> tensor<13824x32xf16> - return %2 : tensor<13824x32xf16> - } -} diff --git a/gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir deleted file mode 100644 index 3ac974f..0000000 --- a/gemm/mlir/gemm_13824_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<13824x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<13824x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x4xbf16>) -> tensor<13824x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<13824x4xbf16>) -> tensor<13824x4xbf16> - return %2 : tensor<13824x4xbf16> - } -} diff --git a/gemm/mlir/gemm_13824_4_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_4_5120_f16_tA.mlir deleted file mode 100644 index 2d2dbaf..0000000 --- a/gemm/mlir/gemm_13824_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x4xf16>) -> tensor<13824x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<13824x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x4xf16>) -> tensor<13824x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x4xf16>) outs(%1 : tensor<13824x4xf16>) -> tensor<13824x4xf16> - return %2 : tensor<13824x4xf16> - } -} diff --git a/gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir deleted file mode 100644 index 30c7d55..0000000 --- a/gemm/mlir/gemm_13824_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<13824x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<13824x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<13824x8xbf16>) -> tensor<13824x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<13824x8xbf16>) -> tensor<13824x8xbf16> - return %2 : tensor<13824x8xbf16> - } -} diff --git a/gemm/mlir/gemm_13824_8_5120_f16_tA.mlir b/gemm/mlir/gemm_13824_8_5120_f16_tA.mlir deleted file mode 100644 index 96d5e3c..0000000 --- a/gemm/mlir/gemm_13824_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x13824xf16>, %arg1: tensor<5120x8xf16>) -> tensor<13824x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<13824x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<13824x8xf16>) -> tensor<13824x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x13824xf16>, tensor<5120x8xf16>) outs(%1 : tensor<13824x8xf16>) -> tensor<13824x8xf16> - return %2 : tensor<13824x8xf16> - } -} diff --git a/gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir deleted file mode 100644 index ebb53bd..0000000 --- a/gemm/mlir/gemm_14336_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<14336x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<14336x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x16xbf16>) -> tensor<14336x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<14336x16xbf16>) -> tensor<14336x16xbf16> - return %2 : tensor<14336x16xbf16> - } -} diff --git a/gemm/mlir/gemm_14336_16_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_16_8192_f16_tA.mlir deleted file mode 100644 index 1c62bae..0000000 --- a/gemm/mlir/gemm_14336_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x16xf16>) -> tensor<14336x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<14336x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x16xf16>) -> tensor<14336x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x16xf16>) outs(%1 : tensor<14336x16xf16>) -> tensor<14336x16xf16> - return %2 : tensor<14336x16xf16> - } -} diff --git a/gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir deleted file mode 100644 index 12e1750..0000000 --- a/gemm/mlir/gemm_14336_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<14336x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<14336x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x1xbf16>) -> tensor<14336x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<14336x1xbf16>) -> tensor<14336x1xbf16> - return %2 : tensor<14336x1xbf16> - } -} diff --git a/gemm/mlir/gemm_14336_1_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_1_8192_f16_tA.mlir deleted file mode 100644 index b3cee07..0000000 --- a/gemm/mlir/gemm_14336_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x1xf16>) -> tensor<14336x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<14336x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x1xf16>) -> tensor<14336x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x1xf16>) outs(%1 : tensor<14336x1xf16>) -> tensor<14336x1xf16> - return %2 : tensor<14336x1xf16> - } -} diff --git a/gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir deleted file mode 100644 index ce3f701..0000000 --- a/gemm/mlir/gemm_14336_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<14336x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<14336x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x2xbf16>) -> tensor<14336x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<14336x2xbf16>) -> tensor<14336x2xbf16> - return %2 : tensor<14336x2xbf16> - } -} diff --git a/gemm/mlir/gemm_14336_2_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_2_8192_f16_tA.mlir deleted file mode 100644 index 100d62f..0000000 --- a/gemm/mlir/gemm_14336_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x2xf16>) -> tensor<14336x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<14336x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x2xf16>) -> tensor<14336x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x2xf16>) outs(%1 : tensor<14336x2xf16>) -> tensor<14336x2xf16> - return %2 : tensor<14336x2xf16> - } -} diff --git a/gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir deleted file mode 100644 index 39a012e..0000000 --- a/gemm/mlir/gemm_14336_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<14336x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<14336x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x32xbf16>) -> tensor<14336x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<14336x32xbf16>) -> tensor<14336x32xbf16> - return %2 : tensor<14336x32xbf16> - } -} diff --git a/gemm/mlir/gemm_14336_32_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_32_8192_f16_tA.mlir deleted file mode 100644 index 6457a07..0000000 --- a/gemm/mlir/gemm_14336_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x32xf16>) -> tensor<14336x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<14336x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x32xf16>) -> tensor<14336x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x32xf16>) outs(%1 : tensor<14336x32xf16>) -> tensor<14336x32xf16> - return %2 : tensor<14336x32xf16> - } -} diff --git a/gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir deleted file mode 100644 index 99bcffb..0000000 --- a/gemm/mlir/gemm_14336_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<14336x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<14336x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x4xbf16>) -> tensor<14336x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<14336x4xbf16>) -> tensor<14336x4xbf16> - return %2 : tensor<14336x4xbf16> - } -} diff --git a/gemm/mlir/gemm_14336_4_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_4_8192_f16_tA.mlir deleted file mode 100644 index 6c93d68..0000000 --- a/gemm/mlir/gemm_14336_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x4xf16>) -> tensor<14336x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<14336x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x4xf16>) -> tensor<14336x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x4xf16>) outs(%1 : tensor<14336x4xf16>) -> tensor<14336x4xf16> - return %2 : tensor<14336x4xf16> - } -} diff --git a/gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir deleted file mode 100644 index 22146cb..0000000 --- a/gemm/mlir/gemm_14336_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<14336x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<14336x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<14336x8xbf16>) -> tensor<14336x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<14336x8xbf16>) -> tensor<14336x8xbf16> - return %2 : tensor<14336x8xbf16> - } -} diff --git a/gemm/mlir/gemm_14336_8_8192_f16_tA.mlir b/gemm/mlir/gemm_14336_8_8192_f16_tA.mlir deleted file mode 100644 index 452edf9..0000000 --- a/gemm/mlir/gemm_14336_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x14336xf16>, %arg1: tensor<8192x8xf16>) -> tensor<14336x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<14336x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<14336x8xf16>) -> tensor<14336x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x14336xf16>, tensor<8192x8xf16>) outs(%1 : tensor<14336x8xf16>) -> tensor<14336x8xf16> - return %2 : tensor<14336x8xf16> - } -} diff --git a/gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir deleted file mode 100644 index da57d0c..0000000 --- a/gemm/mlir/gemm_15360_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<15360x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<15360x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x16xbf16>) -> tensor<15360x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<15360x16xbf16>) -> tensor<15360x16xbf16> - return %2 : tensor<15360x16xbf16> - } -} diff --git a/gemm/mlir/gemm_15360_16_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_16_5120_f16_tA.mlir deleted file mode 100644 index b15d265..0000000 --- a/gemm/mlir/gemm_15360_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x16xf16>) -> tensor<15360x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<15360x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x16xf16>) -> tensor<15360x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x16xf16>) outs(%1 : tensor<15360x16xf16>) -> tensor<15360x16xf16> - return %2 : tensor<15360x16xf16> - } -} diff --git a/gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir deleted file mode 100644 index b0d9c92..0000000 --- a/gemm/mlir/gemm_15360_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<15360x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<15360x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x1xbf16>) -> tensor<15360x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<15360x1xbf16>) -> tensor<15360x1xbf16> - return %2 : tensor<15360x1xbf16> - } -} diff --git a/gemm/mlir/gemm_15360_1_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_1_5120_f16_tA.mlir deleted file mode 100644 index d458ee9..0000000 --- a/gemm/mlir/gemm_15360_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x1xf16>) -> tensor<15360x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<15360x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x1xf16>) -> tensor<15360x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x1xf16>) outs(%1 : tensor<15360x1xf16>) -> tensor<15360x1xf16> - return %2 : tensor<15360x1xf16> - } -} diff --git a/gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir deleted file mode 100644 index 032eae5..0000000 --- a/gemm/mlir/gemm_15360_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<15360x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<15360x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x2xbf16>) -> tensor<15360x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<15360x2xbf16>) -> tensor<15360x2xbf16> - return %2 : tensor<15360x2xbf16> - } -} diff --git a/gemm/mlir/gemm_15360_2_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_2_5120_f16_tA.mlir deleted file mode 100644 index 18a0d50..0000000 --- a/gemm/mlir/gemm_15360_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x2xf16>) -> tensor<15360x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<15360x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x2xf16>) -> tensor<15360x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x2xf16>) outs(%1 : tensor<15360x2xf16>) -> tensor<15360x2xf16> - return %2 : tensor<15360x2xf16> - } -} diff --git a/gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir deleted file mode 100644 index 8f7fa25..0000000 --- a/gemm/mlir/gemm_15360_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<15360x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<15360x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x32xbf16>) -> tensor<15360x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<15360x32xbf16>) -> tensor<15360x32xbf16> - return %2 : tensor<15360x32xbf16> - } -} diff --git a/gemm/mlir/gemm_15360_32_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_32_5120_f16_tA.mlir deleted file mode 100644 index fc86593..0000000 --- a/gemm/mlir/gemm_15360_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x32xf16>) -> tensor<15360x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<15360x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x32xf16>) -> tensor<15360x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x32xf16>) outs(%1 : tensor<15360x32xf16>) -> tensor<15360x32xf16> - return %2 : tensor<15360x32xf16> - } -} diff --git a/gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir deleted file mode 100644 index f388bfc..0000000 --- a/gemm/mlir/gemm_15360_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<15360x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<15360x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x4xbf16>) -> tensor<15360x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<15360x4xbf16>) -> tensor<15360x4xbf16> - return %2 : tensor<15360x4xbf16> - } -} diff --git a/gemm/mlir/gemm_15360_4_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_4_5120_f16_tA.mlir deleted file mode 100644 index c8666aa..0000000 --- a/gemm/mlir/gemm_15360_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x4xf16>) -> tensor<15360x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<15360x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x4xf16>) -> tensor<15360x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x4xf16>) outs(%1 : tensor<15360x4xf16>) -> tensor<15360x4xf16> - return %2 : tensor<15360x4xf16> - } -} diff --git a/gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir deleted file mode 100644 index 813f5a1..0000000 --- a/gemm/mlir/gemm_15360_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<15360x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<15360x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<15360x8xbf16>) -> tensor<15360x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<15360x8xbf16>) -> tensor<15360x8xbf16> - return %2 : tensor<15360x8xbf16> - } -} diff --git a/gemm/mlir/gemm_15360_8_5120_f16_tA.mlir b/gemm/mlir/gemm_15360_8_5120_f16_tA.mlir deleted file mode 100644 index 5df7526..0000000 --- a/gemm/mlir/gemm_15360_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x15360xf16>, %arg1: tensor<5120x8xf16>) -> tensor<15360x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<15360x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<15360x8xf16>) -> tensor<15360x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x15360xf16>, tensor<5120x8xf16>) outs(%1 : tensor<15360x8xf16>) -> tensor<15360x8xf16> - return %2 : tensor<15360x8xf16> - } -} diff --git a/gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir deleted file mode 100644 index 50136f8..0000000 --- a/gemm/mlir/gemm_16000_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<16000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> - return %2 : tensor<16000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_16_5120_f16_tA.mlir deleted file mode 100644 index e0ebb71..0000000 --- a/gemm/mlir/gemm_16000_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<16000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x16xf16>) -> tensor<16000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<16000x16xf16>) -> tensor<16000x16xf16> - return %2 : tensor<16000x16xf16> - } -} diff --git a/gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir deleted file mode 100644 index 95ae5e6..0000000 --- a/gemm/mlir/gemm_16000_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<16000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<16000x16xbf16>) -> tensor<16000x16xbf16> - return %2 : tensor<16000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_16_8192_f16_tA.mlir deleted file mode 100644 index c1107cc..0000000 --- a/gemm/mlir/gemm_16000_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<16000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x16xf16>) -> tensor<16000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<16000x16xf16>) -> tensor<16000x16xf16> - return %2 : tensor<16000x16xf16> - } -} diff --git a/gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir deleted file mode 100644 index d0fc2f2..0000000 --- a/gemm/mlir/gemm_16000_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<16000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> - return %2 : tensor<16000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_1_5120_f16_tA.mlir deleted file mode 100644 index 7182791..0000000 --- a/gemm/mlir/gemm_16000_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<16000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x1xf16>) -> tensor<16000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<16000x1xf16>) -> tensor<16000x1xf16> - return %2 : tensor<16000x1xf16> - } -} diff --git a/gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir deleted file mode 100644 index 8258663..0000000 --- a/gemm/mlir/gemm_16000_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<16000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<16000x1xbf16>) -> tensor<16000x1xbf16> - return %2 : tensor<16000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_1_8192_f16_tA.mlir deleted file mode 100644 index 8186ad5..0000000 --- a/gemm/mlir/gemm_16000_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<16000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x1xf16>) -> tensor<16000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<16000x1xf16>) -> tensor<16000x1xf16> - return %2 : tensor<16000x1xf16> - } -} diff --git a/gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir deleted file mode 100644 index 11c07f2..0000000 --- a/gemm/mlir/gemm_16000_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<16000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> - return %2 : tensor<16000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_2_5120_f16_tA.mlir deleted file mode 100644 index 3efeb6a..0000000 --- a/gemm/mlir/gemm_16000_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<16000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x2xf16>) -> tensor<16000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<16000x2xf16>) -> tensor<16000x2xf16> - return %2 : tensor<16000x2xf16> - } -} diff --git a/gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir deleted file mode 100644 index 28e4d63..0000000 --- a/gemm/mlir/gemm_16000_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<16000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<16000x2xbf16>) -> tensor<16000x2xbf16> - return %2 : tensor<16000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_2_8192_f16_tA.mlir deleted file mode 100644 index 8c125de..0000000 --- a/gemm/mlir/gemm_16000_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<16000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x2xf16>) -> tensor<16000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<16000x2xf16>) -> tensor<16000x2xf16> - return %2 : tensor<16000x2xf16> - } -} diff --git a/gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir deleted file mode 100644 index a47ce25..0000000 --- a/gemm/mlir/gemm_16000_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<16000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> - return %2 : tensor<16000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_32_5120_f16_tA.mlir deleted file mode 100644 index 5ea27d7..0000000 --- a/gemm/mlir/gemm_16000_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<16000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x32xf16>) -> tensor<16000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<16000x32xf16>) -> tensor<16000x32xf16> - return %2 : tensor<16000x32xf16> - } -} diff --git a/gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir deleted file mode 100644 index 72308e0..0000000 --- a/gemm/mlir/gemm_16000_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<16000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<16000x32xbf16>) -> tensor<16000x32xbf16> - return %2 : tensor<16000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_32_8192_f16_tA.mlir deleted file mode 100644 index e5f6d3b..0000000 --- a/gemm/mlir/gemm_16000_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<16000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x32xf16>) -> tensor<16000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<16000x32xf16>) -> tensor<16000x32xf16> - return %2 : tensor<16000x32xf16> - } -} diff --git a/gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir deleted file mode 100644 index a514a47..0000000 --- a/gemm/mlir/gemm_16000_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<16000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> - return %2 : tensor<16000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_4_5120_f16_tA.mlir deleted file mode 100644 index 1b73c07..0000000 --- a/gemm/mlir/gemm_16000_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<16000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x4xf16>) -> tensor<16000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<16000x4xf16>) -> tensor<16000x4xf16> - return %2 : tensor<16000x4xf16> - } -} diff --git a/gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir deleted file mode 100644 index 1de70e2..0000000 --- a/gemm/mlir/gemm_16000_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<16000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<16000x4xbf16>) -> tensor<16000x4xbf16> - return %2 : tensor<16000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_4_8192_f16_tA.mlir deleted file mode 100644 index a035de1..0000000 --- a/gemm/mlir/gemm_16000_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<16000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x4xf16>) -> tensor<16000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<16000x4xf16>) -> tensor<16000x4xf16> - return %2 : tensor<16000x4xf16> - } -} diff --git a/gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir deleted file mode 100644 index 23c98e5..0000000 --- a/gemm/mlir/gemm_16000_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<16000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> - return %2 : tensor<16000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_16000_8_5120_f16_tA.mlir deleted file mode 100644 index 25ea2f2..0000000 --- a/gemm/mlir/gemm_16000_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x16000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<16000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x8xf16>) -> tensor<16000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x16000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<16000x8xf16>) -> tensor<16000x8xf16> - return %2 : tensor<16000x8xf16> - } -} diff --git a/gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir deleted file mode 100644 index 8b5ce5a..0000000 --- a/gemm/mlir/gemm_16000_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<16000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<16000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<16000x8xbf16>) -> tensor<16000x8xbf16> - return %2 : tensor<16000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_16000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_16000_8_8192_f16_tA.mlir deleted file mode 100644 index b53f1c0..0000000 --- a/gemm/mlir/gemm_16000_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x16000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<16000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<16000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<16000x8xf16>) -> tensor<16000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x16000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<16000x8xf16>) -> tensor<16000x8xf16> - return %2 : tensor<16000x8xf16> - } -} diff --git a/gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir deleted file mode 100644 index 0498cb4..0000000 --- a/gemm/mlir/gemm_1920_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<1920x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1920x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x16xbf16>) -> tensor<1920x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<1920x16xbf16>) -> tensor<1920x16xbf16> - return %2 : tensor<1920x16xbf16> - } -} diff --git a/gemm/mlir/gemm_1920_16_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_16_5120_f16_tA.mlir deleted file mode 100644 index 7a26a60..0000000 --- a/gemm/mlir/gemm_1920_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x16xf16>) -> tensor<1920x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1920x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x16xf16>) -> tensor<1920x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x16xf16>) outs(%1 : tensor<1920x16xf16>) -> tensor<1920x16xf16> - return %2 : tensor<1920x16xf16> - } -} diff --git a/gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir deleted file mode 100644 index 69a8142..0000000 --- a/gemm/mlir/gemm_1920_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<1920x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1920x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x1xbf16>) -> tensor<1920x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<1920x1xbf16>) -> tensor<1920x1xbf16> - return %2 : tensor<1920x1xbf16> - } -} diff --git a/gemm/mlir/gemm_1920_1_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_1_5120_f16_tA.mlir deleted file mode 100644 index 7f56072..0000000 --- a/gemm/mlir/gemm_1920_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x1xf16>) -> tensor<1920x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1920x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x1xf16>) -> tensor<1920x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x1xf16>) outs(%1 : tensor<1920x1xf16>) -> tensor<1920x1xf16> - return %2 : tensor<1920x1xf16> - } -} diff --git a/gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir deleted file mode 100644 index 8241b87..0000000 --- a/gemm/mlir/gemm_1920_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<1920x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1920x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x2xbf16>) -> tensor<1920x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<1920x2xbf16>) -> tensor<1920x2xbf16> - return %2 : tensor<1920x2xbf16> - } -} diff --git a/gemm/mlir/gemm_1920_2_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_2_5120_f16_tA.mlir deleted file mode 100644 index 8410b70..0000000 --- a/gemm/mlir/gemm_1920_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x2xf16>) -> tensor<1920x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1920x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x2xf16>) -> tensor<1920x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x2xf16>) outs(%1 : tensor<1920x2xf16>) -> tensor<1920x2xf16> - return %2 : tensor<1920x2xf16> - } -} diff --git a/gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir deleted file mode 100644 index fb33ba0..0000000 --- a/gemm/mlir/gemm_1920_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<1920x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1920x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x32xbf16>) -> tensor<1920x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<1920x32xbf16>) -> tensor<1920x32xbf16> - return %2 : tensor<1920x32xbf16> - } -} diff --git a/gemm/mlir/gemm_1920_32_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_32_5120_f16_tA.mlir deleted file mode 100644 index 17e9ebc..0000000 --- a/gemm/mlir/gemm_1920_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x32xf16>) -> tensor<1920x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1920x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x32xf16>) -> tensor<1920x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x32xf16>) outs(%1 : tensor<1920x32xf16>) -> tensor<1920x32xf16> - return %2 : tensor<1920x32xf16> - } -} diff --git a/gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir deleted file mode 100644 index 5f1c806..0000000 --- a/gemm/mlir/gemm_1920_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<1920x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1920x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x4xbf16>) -> tensor<1920x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<1920x4xbf16>) -> tensor<1920x4xbf16> - return %2 : tensor<1920x4xbf16> - } -} diff --git a/gemm/mlir/gemm_1920_4_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_4_5120_f16_tA.mlir deleted file mode 100644 index cd45416..0000000 --- a/gemm/mlir/gemm_1920_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x4xf16>) -> tensor<1920x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1920x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x4xf16>) -> tensor<1920x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x4xf16>) outs(%1 : tensor<1920x4xf16>) -> tensor<1920x4xf16> - return %2 : tensor<1920x4xf16> - } -} diff --git a/gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir deleted file mode 100644 index bb5ee3c..0000000 --- a/gemm/mlir/gemm_1920_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<1920x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<1920x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<1920x8xbf16>) -> tensor<1920x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<1920x8xbf16>) -> tensor<1920x8xbf16> - return %2 : tensor<1920x8xbf16> - } -} diff --git a/gemm/mlir/gemm_1920_8_5120_f16_tA.mlir b/gemm/mlir/gemm_1920_8_5120_f16_tA.mlir deleted file mode 100644 index 7f94a48..0000000 --- a/gemm/mlir/gemm_1920_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x1920xf16>, %arg1: tensor<5120x8xf16>) -> tensor<1920x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<1920x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1920x8xf16>) -> tensor<1920x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x1920xf16>, tensor<5120x8xf16>) outs(%1 : tensor<1920x8xf16>) -> tensor<1920x8xf16> - return %2 : tensor<1920x8xf16> - } -} diff --git a/gemm/mlir/gemm_2048_10240_1280_bf16.mlir b/gemm/mlir/gemm_2048_10240_1280_bf16.mlir deleted file mode 100644 index d65d3a7..0000000 --- a/gemm/mlir/gemm_2048_10240_1280_bf16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x10240xbf16>) -> tensor<2048x10240xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x10240xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<1280x10240xbf16>) outs(%1 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> - return %2 : tensor<2048x10240xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir b/gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir deleted file mode 100644 index 84241c7..0000000 --- a/gemm/mlir/gemm_2048_10240_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x2048xbf16>, %arg1: tensor<1280x10240xbf16>) -> tensor<2048x10240xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x10240xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xbf16>, tensor<1280x10240xbf16>) outs(%1 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> - return %2 : tensor<2048x10240xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir b/gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir deleted file mode 100644 index 28e61ff..0000000 --- a/gemm/mlir/gemm_2048_10240_1280_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<10240x1280xbf16>) -> tensor<2048x10240xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x10240xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<10240x1280xbf16>) outs(%1 : tensor<2048x10240xbf16>) -> tensor<2048x10240xbf16> - return %2 : tensor<2048x10240xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_10240_1280_f16.mlir b/gemm/mlir/gemm_2048_10240_1280_f16.mlir deleted file mode 100644 index e3bbec5..0000000 --- a/gemm/mlir/gemm_2048_10240_1280_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x10240xf16>) -> tensor<2048x10240xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x10240xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xf16>, tensor<1280x10240xf16>) outs(%1 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> - return %2 : tensor<2048x10240xf16> - } -} diff --git a/gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir b/gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir deleted file mode 100644 index 6a0033c..0000000 --- a/gemm/mlir/gemm_2048_10240_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x2048xf16>, %arg1: tensor<1280x10240xf16>) -> tensor<2048x10240xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x10240xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xf16>, tensor<1280x10240xf16>) outs(%1 : tensor<2048x10240xf16>) -> tensor<2048x10240xf16> - return %2 : tensor<2048x10240xf16> - } -} diff --git a/gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir b/gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir deleted file mode 100644 index 5513769..0000000 --- a/gemm/mlir/gemm_2048_10240_1280_f16_tB.mlir +++ /dev/null @@ -1,145 +0,0 @@ -#translation = #iree_codegen.translation_info -module attributes {transform.with_named_sequence} { - stream.executable private @gemm { - stream.executable.export public @gemm workgroups() -> (index, index, index) { - %c32 = arith.constant 32 : index - %c160 = arith.constant 160 : index - %c1 = arith.constant 1 : index - stream.return %c32, %c160, %c1 : index, index, index - } - builtin.module { - func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) attributes {translation_info = #translation} { - %c19 = arith.constant 19 : index - %c18 = arith.constant 18 : index - %c17 = arith.constant 17 : index - %c3 = arith.constant 3 : index - %c2 = arith.constant 2 : index - %c16 = arith.constant 16 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c32 = arith.constant 32 : index - %c64 = arith.constant 64 : index - %c1 = arith.constant 1 : index - %c40 = arith.constant 40 : index - %c0 = arith.constant 0 : index - %cst = arith.constant dense<0.000000e+00> : vector<4xf32> - %workgroup_id_0 = stream.dispatch.workgroup.id[0] : index - %workgroup_id_1 = stream.dispatch.workgroup.id[1] : index - %thread_id_x = gpu.thread_id x - %thread_id_y = gpu.thread_id y - %alloc = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %alloc_0 = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> memref<2048x1280xf16, strided<[1280, 1], offset: ?>> - %1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> memref<10240x1280xf16, strided<[1280, 1], offset: ?>> - %2 = arith.muli %workgroup_id_0, %c64 : index - %3 = arith.muli %thread_id_y, %c32 : index - %4 = arith.divsi %thread_id_x, %c4 : index - %5 = arith.addi %4, %3 : index - %6 = arith.remsi %5, %c64 : index - %7 = arith.addi %6, %2 : index - %8 = arith.remsi %thread_id_x, %c4 : index - %9 = arith.muli %8, %c8 : index - %10 = arith.divsi %thread_id_x, %c64 : index - %11 = arith.muli %10, %c32 : index - %12 = arith.remsi %thread_id_x, %c16 : index - %13 = arith.addi %12, %11 : index - %14 = arith.remsi %thread_id_x, %c64 : index - %15 = arith.divsi %14, %c16 : index - %16 = arith.muli %15, %c4 : index - %17 = arith.addi %16, %c16 : index - %18 = arith.addi %13, %c16 : index - %19 = arith.muli %workgroup_id_1, %c64 : index - %20 = arith.addi %6, %19 : index - %21 = arith.addi %12, %3 : index - %22 = arith.addi %21, %c16 : index - %23:4 = scf.for %arg3 = %c0 to %c40 step %c1 iter_args(%arg4 = %cst, %arg5 = %cst, %arg6 = %cst, %arg7 = %cst) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { - %62 = arith.muli %arg3, %c32 : index - %63 = arith.addi %62, %9 : index - %64 = vector.load %0[%7, %63] : memref<2048x1280xf16, strided<[1280, 1], offset: ?>>, vector<8xf16> - vector.store %64, %alloc[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %65 = vector.load %alloc[%13, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %66 = vector.load %alloc[%13, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %67 = vector.load %alloc[%18, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %68 = vector.load %alloc[%18, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %69 = vector.load %1[%20, %63] : memref<10240x1280xf16, strided<[1280, 1], offset: ?>>, vector<8xf16> - amdgpu.lds_barrier - vector.store %69, %alloc_0[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %70 = vector.load %alloc_0[%21, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %71 = vector.load %alloc_0[%21, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %72 = vector.load %alloc_0[%22, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %73 = vector.load %alloc_0[%22, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %74 = amdgpu.mfma %65 * %70 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %75 = amdgpu.mfma %66 * %71 + %74 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %76 = amdgpu.mfma %67 * %72 + %arg7 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %77 = amdgpu.mfma %68 * %73 + %76 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %78 = amdgpu.mfma %67 * %70 + %arg6 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %79 = amdgpu.mfma %68 * %71 + %78 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %80 = amdgpu.mfma %65 * %72 + %arg5 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %81 = amdgpu.mfma %66 * %73 + %80 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - scf.yield %75, %81, %79, %77 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> - } - %24 = vector.extract_strided_slice %23#0 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %25 = stream.binding.subspan %arg2[%c0] : !stream.binding -> memref<2048x10240xf32, strided<[10240, 1], offset: ?>> - %26 = arith.remsi %thread_id_x, %c64 : index - %27 = arith.divsi %26, %c16 : index - %28 = arith.muli %27, %c4 : index - %29 = arith.divsi %thread_id_x, %c64 : index - %30 = arith.muli %29, %c32 : index - %31 = arith.muli %workgroup_id_0, %c64 : index - %32 = arith.addi %31, %30 : index - %33 = arith.addi %32, %28 : index - %34 = arith.muli %thread_id_y, %c32 : index - %35 = arith.muli %workgroup_id_1, %c64 : index - %36 = arith.remsi %thread_id_x, %c16 : index - %37 = arith.addi %36, %35 : index - %38 = arith.addi %37, %34 : index - vector.store %24, %25[%33, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %39 = vector.extract_strided_slice %23#0 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %40 = arith.addi %33, %c1 : index - vector.store %39, %25[%40, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %41 = vector.extract_strided_slice %23#0 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %42 = arith.addi %33, %c2 : index - vector.store %41, %25[%42, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %43 = vector.extract_strided_slice %23#0 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %44 = arith.addi %33, %c3 : index - vector.store %43, %25[%44, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %45 = vector.extract_strided_slice %23#3 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %46 = arith.addi %33, %c16 : index - %47 = arith.addi %38, %c16 : index - vector.store %45, %25[%46, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %48 = vector.extract_strided_slice %23#3 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %49 = arith.addi %33, %c17 : index - vector.store %48, %25[%49, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %50 = vector.extract_strided_slice %23#3 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %51 = arith.addi %33, %c18 : index - vector.store %50, %25[%51, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %52 = vector.extract_strided_slice %23#3 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %53 = arith.addi %33, %c19 : index - vector.store %52, %25[%53, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %54 = vector.extract_strided_slice %23#2 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %54, %25[%46, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %55 = vector.extract_strided_slice %23#2 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %55, %25[%49, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %56 = vector.extract_strided_slice %23#2 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %56, %25[%51, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %57 = vector.extract_strided_slice %23#2 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %57, %25[%53, %38] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %58 = vector.extract_strided_slice %23#1 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %58, %25[%33, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %59 = vector.extract_strided_slice %23#1 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %59, %25[%40, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %60 = vector.extract_strided_slice %23#1 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %60, %25[%42, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - %61 = vector.extract_strided_slice %23#1 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %61, %25[%44, %47] : memref<2048x10240xf32, strided<[10240, 1], offset: ?>>, vector<1xf32> - return - } - } - } - func.func @isolated_benchmark(%arg0: tensor<2048x1280xf16>, %arg1: tensor<10240x1280xf16>) -> tensor<2048x10240xf32> { - %0 = flow.dispatch @gemm::@gemm(%arg0, %arg1) : (tensor<2048x1280xf16>, tensor<10240x1280xf16>) -> tensor<2048x10240xf32> - return %0 : tensor<2048x10240xf32> - } -} diff --git a/gemm/mlir/gemm_2048_1280_1280_bf16.mlir b/gemm/mlir/gemm_2048_1280_1280_bf16.mlir deleted file mode 100644 index 91c8ae2..0000000 --- a/gemm/mlir/gemm_2048_1280_1280_bf16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<1280x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - return %2 : tensor<2048x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir b/gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir deleted file mode 100644 index a155776..0000000 --- a/gemm/mlir/gemm_2048_1280_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x2048xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xbf16>, tensor<1280x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - return %2 : tensor<2048x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir b/gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir deleted file mode 100644 index 2087786..0000000 --- a/gemm/mlir/gemm_2048_1280_1280_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x1280xbf16>, %arg1: tensor<1280x1280xbf16>) -> tensor<2048x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x1280xbf16>, tensor<1280x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - return %2 : tensor<2048x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_1280_f16.mlir b/gemm/mlir/gemm_2048_1280_1280_f16.mlir deleted file mode 100644 index ceb58a2..0000000 --- a/gemm/mlir/gemm_2048_1280_1280_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x1280xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1280xf16>, tensor<1280x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - return %2 : tensor<2048x1280xf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir b/gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir deleted file mode 100644 index 13b6466..0000000 --- a/gemm/mlir/gemm_2048_1280_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x2048xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x1280xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x2048xf16>, tensor<1280x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - return %2 : tensor<2048x1280xf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir b/gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir deleted file mode 100644 index 54d210b..0000000 --- a/gemm/mlir/gemm_2048_1280_1280_f16_tB.mlir +++ /dev/null @@ -1,145 +0,0 @@ -#translation = #iree_codegen.translation_info -module attributes {transform.with_named_sequence} { - stream.executable private @gemm { - stream.executable.export public @gemm workgroups() -> (index, index, index) { - %c32 = arith.constant 32 : index - %c20 = arith.constant 20 : index - %c1 = arith.constant 1 : index - stream.return %c32, %c20, %c1 : index, index, index - } - builtin.module { - func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) attributes {translation_info = #translation} { - %c19 = arith.constant 19 : index - %c18 = arith.constant 18 : index - %c17 = arith.constant 17 : index - %c3 = arith.constant 3 : index - %c2 = arith.constant 2 : index - %c16 = arith.constant 16 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c32 = arith.constant 32 : index - %c64 = arith.constant 64 : index - %c1 = arith.constant 1 : index - %c40 = arith.constant 40 : index - %c0 = arith.constant 0 : index - %cst = arith.constant dense<0.000000e+00> : vector<4xf32> - %workgroup_id_0 = stream.dispatch.workgroup.id[0] : index - %workgroup_id_1 = stream.dispatch.workgroup.id[1] : index - %thread_id_x = gpu.thread_id x - %thread_id_y = gpu.thread_id y - %alloc = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %alloc_0 = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> memref<2048x1280xf16, strided<[1280, 1], offset: ?>> - %1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> memref<1280x1280xf16, strided<[1280, 1], offset: ?>> - %2 = arith.muli %workgroup_id_0, %c64 : index - %3 = arith.muli %thread_id_y, %c32 : index - %4 = arith.divsi %thread_id_x, %c4 : index - %5 = arith.addi %4, %3 : index - %6 = arith.remsi %5, %c64 : index - %7 = arith.addi %6, %2 : index - %8 = arith.remsi %thread_id_x, %c4 : index - %9 = arith.muli %8, %c8 : index - %10 = arith.divsi %thread_id_x, %c64 : index - %11 = arith.muli %10, %c32 : index - %12 = arith.remsi %thread_id_x, %c16 : index - %13 = arith.addi %12, %11 : index - %14 = arith.remsi %thread_id_x, %c64 : index - %15 = arith.divsi %14, %c16 : index - %16 = arith.muli %15, %c4 : index - %17 = arith.addi %16, %c16 : index - %18 = arith.addi %13, %c16 : index - %19 = arith.muli %workgroup_id_1, %c64 : index - %20 = arith.addi %6, %19 : index - %21 = arith.addi %12, %3 : index - %22 = arith.addi %21, %c16 : index - %23:4 = scf.for %arg3 = %c0 to %c40 step %c1 iter_args(%arg4 = %cst, %arg5 = %cst, %arg6 = %cst, %arg7 = %cst) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { - %62 = arith.muli %arg3, %c32 : index - %63 = arith.addi %62, %9 : index - %64 = vector.load %0[%7, %63] : memref<2048x1280xf16, strided<[1280, 1], offset: ?>>, vector<8xf16> - vector.store %64, %alloc[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %65 = vector.load %alloc[%13, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %66 = vector.load %alloc[%13, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %67 = vector.load %alloc[%18, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %68 = vector.load %alloc[%18, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %69 = vector.load %1[%20, %63] : memref<1280x1280xf16, strided<[1280, 1], offset: ?>>, vector<8xf16> - amdgpu.lds_barrier - vector.store %69, %alloc_0[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %70 = vector.load %alloc_0[%21, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %71 = vector.load %alloc_0[%21, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %72 = vector.load %alloc_0[%22, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %73 = vector.load %alloc_0[%22, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %74 = amdgpu.mfma %65 * %70 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %75 = amdgpu.mfma %66 * %71 + %74 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %76 = amdgpu.mfma %67 * %72 + %arg7 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %77 = amdgpu.mfma %68 * %73 + %76 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %78 = amdgpu.mfma %67 * %70 + %arg6 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %79 = amdgpu.mfma %68 * %71 + %78 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %80 = amdgpu.mfma %65 * %72 + %arg5 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %81 = amdgpu.mfma %66 * %73 + %80 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - scf.yield %75, %81, %79, %77 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> - } - %24 = vector.extract_strided_slice %23#0 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %25 = stream.binding.subspan %arg2[%c0] : !stream.binding -> memref<2048x1280xf32, strided<[1280, 1], offset: ?>> - %26 = arith.remsi %thread_id_x, %c64 : index - %27 = arith.divsi %26, %c16 : index - %28 = arith.muli %27, %c4 : index - %29 = arith.divsi %thread_id_x, %c64 : index - %30 = arith.muli %29, %c32 : index - %31 = arith.muli %workgroup_id_0, %c64 : index - %32 = arith.addi %31, %30 : index - %33 = arith.addi %32, %28 : index - %34 = arith.muli %thread_id_y, %c32 : index - %35 = arith.muli %workgroup_id_1, %c64 : index - %36 = arith.remsi %thread_id_x, %c16 : index - %37 = arith.addi %36, %35 : index - %38 = arith.addi %37, %34 : index - vector.store %24, %25[%33, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %39 = vector.extract_strided_slice %23#0 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %40 = arith.addi %33, %c1 : index - vector.store %39, %25[%40, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %41 = vector.extract_strided_slice %23#0 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %42 = arith.addi %33, %c2 : index - vector.store %41, %25[%42, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %43 = vector.extract_strided_slice %23#0 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %44 = arith.addi %33, %c3 : index - vector.store %43, %25[%44, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %45 = vector.extract_strided_slice %23#3 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %46 = arith.addi %33, %c16 : index - %47 = arith.addi %38, %c16 : index - vector.store %45, %25[%46, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %48 = vector.extract_strided_slice %23#3 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %49 = arith.addi %33, %c17 : index - vector.store %48, %25[%49, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %50 = vector.extract_strided_slice %23#3 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %51 = arith.addi %33, %c18 : index - vector.store %50, %25[%51, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %52 = vector.extract_strided_slice %23#3 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %53 = arith.addi %33, %c19 : index - vector.store %52, %25[%53, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %54 = vector.extract_strided_slice %23#2 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %54, %25[%46, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %55 = vector.extract_strided_slice %23#2 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %55, %25[%49, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %56 = vector.extract_strided_slice %23#2 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %56, %25[%51, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %57 = vector.extract_strided_slice %23#2 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %57, %25[%53, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %58 = vector.extract_strided_slice %23#1 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %58, %25[%33, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %59 = vector.extract_strided_slice %23#1 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %59, %25[%40, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %60 = vector.extract_strided_slice %23#1 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %60, %25[%42, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %61 = vector.extract_strided_slice %23#1 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %61, %25[%44, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - return - } - } - } - func.func @isolated_benchmark(%arg0: tensor<2048x1280xf16>, %arg1: tensor<1280x1280xf16>) -> tensor<2048x1280xf32> { - %0 = flow.dispatch @gemm::@gemm(%arg0, %arg1) : (tensor<2048x1280xf16>, tensor<1280x1280xf16>) -> tensor<2048x1280xf32> - return %0 : tensor<2048x1280xf32> - } -} diff --git a/gemm/mlir/gemm_2048_1280_5120_bf16.mlir b/gemm/mlir/gemm_2048_1280_5120_bf16.mlir deleted file mode 100644 index 6739dcc..0000000 --- a/gemm/mlir/gemm_2048_1280_5120_bf16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x5120xbf16>, %arg1: tensor<5120x1280xbf16>) -> tensor<2048x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x5120xbf16>, tensor<5120x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - return %2 : tensor<2048x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir b/gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir deleted file mode 100644 index d99f327..0000000 --- a/gemm/mlir/gemm_2048_1280_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x2048xbf16>, %arg1: tensor<5120x1280xbf16>) -> tensor<2048x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x2048xbf16>, tensor<5120x1280xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - return %2 : tensor<2048x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir b/gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir deleted file mode 100644 index ef0bd8e..0000000 --- a/gemm/mlir/gemm_2048_1280_5120_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x5120xbf16>, %arg1: tensor<1280x5120xbf16>) -> tensor<2048x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2048x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2048x5120xbf16>, tensor<1280x5120xbf16>) outs(%1 : tensor<2048x1280xbf16>) -> tensor<2048x1280xbf16> - return %2 : tensor<2048x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_5120_f16.mlir b/gemm/mlir/gemm_2048_1280_5120_f16.mlir deleted file mode 100644 index c75885c..0000000 --- a/gemm/mlir/gemm_2048_1280_5120_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x5120xf16>, %arg1: tensor<5120x1280xf16>) -> tensor<2048x1280xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x1280xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x5120xf16>, tensor<5120x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - return %2 : tensor<2048x1280xf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir b/gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir deleted file mode 100644 index 3d6fa99..0000000 --- a/gemm/mlir/gemm_2048_1280_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x2048xf16>, %arg1: tensor<5120x1280xf16>) -> tensor<2048x1280xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x1280xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x2048xf16>, tensor<5120x1280xf16>) outs(%1 : tensor<2048x1280xf16>) -> tensor<2048x1280xf16> - return %2 : tensor<2048x1280xf16> - } -} diff --git a/gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir b/gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir deleted file mode 100644 index 9a35119..0000000 --- a/gemm/mlir/gemm_2048_1280_5120_f16_tB.mlir +++ /dev/null @@ -1,145 +0,0 @@ -#translation = #iree_codegen.translation_info -module attributes {transform.with_named_sequence} { - stream.executable private @gemm { - stream.executable.export public @gemm workgroups() -> (index, index, index) { - %c32 = arith.constant 32 : index - %c20 = arith.constant 20 : index - %c1 = arith.constant 1 : index - stream.return %c32, %c20, %c1 : index, index, index - } - builtin.module { - func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) attributes {translation_info = #translation} { - %c19 = arith.constant 19 : index - %c18 = arith.constant 18 : index - %c17 = arith.constant 17 : index - %c3 = arith.constant 3 : index - %c2 = arith.constant 2 : index - %c16 = arith.constant 16 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c32 = arith.constant 32 : index - %c64 = arith.constant 64 : index - %c1 = arith.constant 1 : index - %c160 = arith.constant 160 : index - %c0 = arith.constant 0 : index - %cst = arith.constant dense<0.000000e+00> : vector<4xf32> - %workgroup_id_0 = stream.dispatch.workgroup.id[0] : index - %workgroup_id_1 = stream.dispatch.workgroup.id[1] : index - %thread_id_x = gpu.thread_id x - %thread_id_y = gpu.thread_id y - %alloc = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %alloc_0 = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> memref<2048x5120xf16, strided<[5120, 1], offset: ?>> - %1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> memref<1280x5120xf16, strided<[5120, 1], offset: ?>> - %2 = arith.muli %workgroup_id_0, %c64 : index - %3 = arith.muli %thread_id_y, %c32 : index - %4 = arith.divsi %thread_id_x, %c4 : index - %5 = arith.addi %4, %3 : index - %6 = arith.remsi %5, %c64 : index - %7 = arith.addi %6, %2 : index - %8 = arith.remsi %thread_id_x, %c4 : index - %9 = arith.muli %8, %c8 : index - %10 = arith.divsi %thread_id_x, %c64 : index - %11 = arith.muli %10, %c32 : index - %12 = arith.remsi %thread_id_x, %c16 : index - %13 = arith.addi %12, %11 : index - %14 = arith.remsi %thread_id_x, %c64 : index - %15 = arith.divsi %14, %c16 : index - %16 = arith.muli %15, %c4 : index - %17 = arith.addi %16, %c16 : index - %18 = arith.addi %13, %c16 : index - %19 = arith.muli %workgroup_id_1, %c64 : index - %20 = arith.addi %6, %19 : index - %21 = arith.addi %12, %3 : index - %22 = arith.addi %21, %c16 : index - %23:4 = scf.for %arg3 = %c0 to %c160 step %c1 iter_args(%arg4 = %cst, %arg5 = %cst, %arg6 = %cst, %arg7 = %cst) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { - %62 = arith.muli %arg3, %c32 : index - %63 = arith.addi %62, %9 : index - %64 = vector.load %0[%7, %63] : memref<2048x5120xf16, strided<[5120, 1], offset: ?>>, vector<8xf16> - vector.store %64, %alloc[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %65 = vector.load %alloc[%13, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %66 = vector.load %alloc[%13, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %67 = vector.load %alloc[%18, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %68 = vector.load %alloc[%18, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %69 = vector.load %1[%20, %63] : memref<1280x5120xf16, strided<[5120, 1], offset: ?>>, vector<8xf16> - amdgpu.lds_barrier - vector.store %69, %alloc_0[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %70 = vector.load %alloc_0[%21, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %71 = vector.load %alloc_0[%21, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %72 = vector.load %alloc_0[%22, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %73 = vector.load %alloc_0[%22, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %74 = amdgpu.mfma %65 * %70 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %75 = amdgpu.mfma %66 * %71 + %74 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %76 = amdgpu.mfma %67 * %72 + %arg7 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %77 = amdgpu.mfma %68 * %73 + %76 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %78 = amdgpu.mfma %67 * %70 + %arg6 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %79 = amdgpu.mfma %68 * %71 + %78 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %80 = amdgpu.mfma %65 * %72 + %arg5 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %81 = amdgpu.mfma %66 * %73 + %80 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - scf.yield %75, %81, %79, %77 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> - } - %24 = vector.extract_strided_slice %23#0 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %25 = stream.binding.subspan %arg2[%c0] : !stream.binding -> memref<2048x1280xf32, strided<[1280, 1], offset: ?>> - %26 = arith.remsi %thread_id_x, %c64 : index - %27 = arith.divsi %26, %c16 : index - %28 = arith.muli %27, %c4 : index - %29 = arith.divsi %thread_id_x, %c64 : index - %30 = arith.muli %29, %c32 : index - %31 = arith.muli %workgroup_id_0, %c64 : index - %32 = arith.addi %31, %30 : index - %33 = arith.addi %32, %28 : index - %34 = arith.muli %thread_id_y, %c32 : index - %35 = arith.muli %workgroup_id_1, %c64 : index - %36 = arith.remsi %thread_id_x, %c16 : index - %37 = arith.addi %36, %35 : index - %38 = arith.addi %37, %34 : index - vector.store %24, %25[%33, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %39 = vector.extract_strided_slice %23#0 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %40 = arith.addi %33, %c1 : index - vector.store %39, %25[%40, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %41 = vector.extract_strided_slice %23#0 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %42 = arith.addi %33, %c2 : index - vector.store %41, %25[%42, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %43 = vector.extract_strided_slice %23#0 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %44 = arith.addi %33, %c3 : index - vector.store %43, %25[%44, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %45 = vector.extract_strided_slice %23#3 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %46 = arith.addi %33, %c16 : index - %47 = arith.addi %38, %c16 : index - vector.store %45, %25[%46, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %48 = vector.extract_strided_slice %23#3 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %49 = arith.addi %33, %c17 : index - vector.store %48, %25[%49, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %50 = vector.extract_strided_slice %23#3 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %51 = arith.addi %33, %c18 : index - vector.store %50, %25[%51, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %52 = vector.extract_strided_slice %23#3 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %53 = arith.addi %33, %c19 : index - vector.store %52, %25[%53, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %54 = vector.extract_strided_slice %23#2 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %54, %25[%46, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %55 = vector.extract_strided_slice %23#2 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %55, %25[%49, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %56 = vector.extract_strided_slice %23#2 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %56, %25[%51, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %57 = vector.extract_strided_slice %23#2 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %57, %25[%53, %38] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %58 = vector.extract_strided_slice %23#1 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %58, %25[%33, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %59 = vector.extract_strided_slice %23#1 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %59, %25[%40, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %60 = vector.extract_strided_slice %23#1 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %60, %25[%42, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - %61 = vector.extract_strided_slice %23#1 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %61, %25[%44, %47] : memref<2048x1280xf32, strided<[1280, 1], offset: ?>>, vector<1xf32> - return - } - } - } - func.func @isolated_benchmark(%arg0: tensor<2048x5120xf16>, %arg1: tensor<1280x5120xf16>) -> tensor<2048x1280xf32> { - %0 = flow.dispatch @gemm::@gemm(%arg0, %arg1) : (tensor<2048x5120xf16>, tensor<1280x5120xf16>) -> tensor<2048x1280xf32> - return %0 : tensor<2048x1280xf32> - } -} diff --git a/gemm/mlir/gemm_2048_2048_1024_f16.mlir b/gemm/mlir/gemm_2048_2048_1024_f16.mlir deleted file mode 100644 index cc77455..0000000 --- a/gemm/mlir/gemm_2048_2048_1024_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x1024xf16>, %arg1: tensor<1024x2048xf16>) -> tensor<2048x2048xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x2048xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1024xf16>, tensor<1024x2048xf16>) outs(%1 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> - return %2 : tensor<2048x2048xf16> - } -} diff --git a/gemm/mlir/gemm_2048_2048_65536_f16.mlir b/gemm/mlir/gemm_2048_2048_65536_f16.mlir deleted file mode 100644 index 34b9849..0000000 --- a/gemm/mlir/gemm_2048_2048_65536_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x65536xf16>, %arg1: tensor<65536x2048xf16>) -> tensor<2048x2048xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x2048xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x65536xf16>, tensor<65536x2048xf16>) outs(%1 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> - return %2 : tensor<2048x2048xf16> - } -} diff --git a/gemm/mlir/gemm_2048_2048_8192_f16.mlir b/gemm/mlir/gemm_2048_2048_8192_f16.mlir deleted file mode 100644 index e9f3dd8..0000000 --- a/gemm/mlir/gemm_2048_2048_8192_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<8192x2048xf16>) -> tensor<2048x2048xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x2048xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<8192x2048xf16>) outs(%1 : tensor<2048x2048xf16>) -> tensor<2048x2048xf16> - return %2 : tensor<2048x2048xf16> - } -} diff --git a/gemm/mlir/gemm_2048_8192_1024_f16.mlir b/gemm/mlir/gemm_2048_8192_1024_f16.mlir deleted file mode 100644 index edfa213..0000000 --- a/gemm/mlir/gemm_2048_8192_1024_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x1024xf16>, %arg1: tensor<1024x8192xf16>) -> tensor<2048x8192xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x8192xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x1024xf16>, tensor<1024x8192xf16>) outs(%1 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> - return %2 : tensor<2048x8192xf16> - } -} diff --git a/gemm/mlir/gemm_2048_8192_65536_f16.mlir b/gemm/mlir/gemm_2048_8192_65536_f16.mlir deleted file mode 100644 index e419b78..0000000 --- a/gemm/mlir/gemm_2048_8192_65536_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x65536xf16>, %arg1: tensor<65536x8192xf16>) -> tensor<2048x8192xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x8192xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x65536xf16>, tensor<65536x8192xf16>) outs(%1 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> - return %2 : tensor<2048x8192xf16> - } -} diff --git a/gemm/mlir/gemm_2048_8192_8192_f16.mlir b/gemm/mlir/gemm_2048_8192_8192_f16.mlir deleted file mode 100644 index cc93de1..0000000 --- a/gemm/mlir/gemm_2048_8192_8192_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<8192x8192xf16>) -> tensor<2048x8192xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2048x8192xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<8192x8192xf16>) outs(%1 : tensor<2048x8192xf16>) -> tensor<2048x8192xf16> - return %2 : tensor<2048x8192xf16> - } -} diff --git a/gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir deleted file mode 100644 index 1ab9cc0..0000000 --- a/gemm/mlir/gemm_2560_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<2560x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2560x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x16xbf16>) -> tensor<2560x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<2560x16xbf16>) -> tensor<2560x16xbf16> - return %2 : tensor<2560x16xbf16> - } -} diff --git a/gemm/mlir/gemm_2560_16_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_16_8192_f16_tA.mlir deleted file mode 100644 index fd4d377..0000000 --- a/gemm/mlir/gemm_2560_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x16xf16>) -> tensor<2560x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2560x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x16xf16>) -> tensor<2560x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x16xf16>) outs(%1 : tensor<2560x16xf16>) -> tensor<2560x16xf16> - return %2 : tensor<2560x16xf16> - } -} diff --git a/gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir deleted file mode 100644 index bf23aca..0000000 --- a/gemm/mlir/gemm_2560_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<2560x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2560x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x1xbf16>) -> tensor<2560x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<2560x1xbf16>) -> tensor<2560x1xbf16> - return %2 : tensor<2560x1xbf16> - } -} diff --git a/gemm/mlir/gemm_2560_1_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_1_8192_f16_tA.mlir deleted file mode 100644 index e6b86b4..0000000 --- a/gemm/mlir/gemm_2560_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x1xf16>) -> tensor<2560x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2560x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x1xf16>) -> tensor<2560x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x1xf16>) outs(%1 : tensor<2560x1xf16>) -> tensor<2560x1xf16> - return %2 : tensor<2560x1xf16> - } -} diff --git a/gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir deleted file mode 100644 index de185be..0000000 --- a/gemm/mlir/gemm_2560_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<2560x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2560x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x2xbf16>) -> tensor<2560x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<2560x2xbf16>) -> tensor<2560x2xbf16> - return %2 : tensor<2560x2xbf16> - } -} diff --git a/gemm/mlir/gemm_2560_2_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_2_8192_f16_tA.mlir deleted file mode 100644 index 3e6664e..0000000 --- a/gemm/mlir/gemm_2560_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x2xf16>) -> tensor<2560x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2560x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x2xf16>) -> tensor<2560x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x2xf16>) outs(%1 : tensor<2560x2xf16>) -> tensor<2560x2xf16> - return %2 : tensor<2560x2xf16> - } -} diff --git a/gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir deleted file mode 100644 index 45d0840..0000000 --- a/gemm/mlir/gemm_2560_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<2560x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2560x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x32xbf16>) -> tensor<2560x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<2560x32xbf16>) -> tensor<2560x32xbf16> - return %2 : tensor<2560x32xbf16> - } -} diff --git a/gemm/mlir/gemm_2560_32_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_32_8192_f16_tA.mlir deleted file mode 100644 index 456b6c6..0000000 --- a/gemm/mlir/gemm_2560_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x32xf16>) -> tensor<2560x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2560x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x32xf16>) -> tensor<2560x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x32xf16>) outs(%1 : tensor<2560x32xf16>) -> tensor<2560x32xf16> - return %2 : tensor<2560x32xf16> - } -} diff --git a/gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir deleted file mode 100644 index d377ec1..0000000 --- a/gemm/mlir/gemm_2560_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<2560x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2560x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x4xbf16>) -> tensor<2560x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<2560x4xbf16>) -> tensor<2560x4xbf16> - return %2 : tensor<2560x4xbf16> - } -} diff --git a/gemm/mlir/gemm_2560_4_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_4_8192_f16_tA.mlir deleted file mode 100644 index a152ec3..0000000 --- a/gemm/mlir/gemm_2560_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x4xf16>) -> tensor<2560x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2560x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x4xf16>) -> tensor<2560x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x4xf16>) outs(%1 : tensor<2560x4xf16>) -> tensor<2560x4xf16> - return %2 : tensor<2560x4xf16> - } -} diff --git a/gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir deleted file mode 100644 index 76c1250..0000000 --- a/gemm/mlir/gemm_2560_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<2560x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2560x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2560x8xbf16>) -> tensor<2560x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<2560x8xbf16>) -> tensor<2560x8xbf16> - return %2 : tensor<2560x8xbf16> - } -} diff --git a/gemm/mlir/gemm_2560_8_8192_f16_tA.mlir b/gemm/mlir/gemm_2560_8_8192_f16_tA.mlir deleted file mode 100644 index fff4a68..0000000 --- a/gemm/mlir/gemm_2560_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x2560xf16>, %arg1: tensor<8192x8xf16>) -> tensor<2560x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<2560x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<2560x8xf16>) -> tensor<2560x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x2560xf16>, tensor<8192x8xf16>) outs(%1 : tensor<2560x8xf16>) -> tensor<2560x8xf16> - return %2 : tensor<2560x8xf16> - } -} diff --git a/gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir deleted file mode 100644 index e06171a..0000000 --- a/gemm/mlir/gemm_27648_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<27648x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<27648x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x16xbf16>) -> tensor<27648x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<27648x16xbf16>) -> tensor<27648x16xbf16> - return %2 : tensor<27648x16xbf16> - } -} diff --git a/gemm/mlir/gemm_27648_16_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_16_5120_f16_tA.mlir deleted file mode 100644 index 9af970d..0000000 --- a/gemm/mlir/gemm_27648_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x16xf16>) -> tensor<27648x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<27648x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x16xf16>) -> tensor<27648x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x16xf16>) outs(%1 : tensor<27648x16xf16>) -> tensor<27648x16xf16> - return %2 : tensor<27648x16xf16> - } -} diff --git a/gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir deleted file mode 100644 index dda9b15..0000000 --- a/gemm/mlir/gemm_27648_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<27648x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<27648x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x1xbf16>) -> tensor<27648x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<27648x1xbf16>) -> tensor<27648x1xbf16> - return %2 : tensor<27648x1xbf16> - } -} diff --git a/gemm/mlir/gemm_27648_1_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_1_5120_f16_tA.mlir deleted file mode 100644 index f2d5c42..0000000 --- a/gemm/mlir/gemm_27648_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x1xf16>) -> tensor<27648x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<27648x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x1xf16>) -> tensor<27648x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x1xf16>) outs(%1 : tensor<27648x1xf16>) -> tensor<27648x1xf16> - return %2 : tensor<27648x1xf16> - } -} diff --git a/gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir deleted file mode 100644 index e16cd24..0000000 --- a/gemm/mlir/gemm_27648_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<27648x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<27648x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x2xbf16>) -> tensor<27648x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<27648x2xbf16>) -> tensor<27648x2xbf16> - return %2 : tensor<27648x2xbf16> - } -} diff --git a/gemm/mlir/gemm_27648_2_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_2_5120_f16_tA.mlir deleted file mode 100644 index dcf4508..0000000 --- a/gemm/mlir/gemm_27648_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x2xf16>) -> tensor<27648x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<27648x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x2xf16>) -> tensor<27648x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x2xf16>) outs(%1 : tensor<27648x2xf16>) -> tensor<27648x2xf16> - return %2 : tensor<27648x2xf16> - } -} diff --git a/gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir deleted file mode 100644 index 0a408fd..0000000 --- a/gemm/mlir/gemm_27648_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<27648x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<27648x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x32xbf16>) -> tensor<27648x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<27648x32xbf16>) -> tensor<27648x32xbf16> - return %2 : tensor<27648x32xbf16> - } -} diff --git a/gemm/mlir/gemm_27648_32_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_32_5120_f16_tA.mlir deleted file mode 100644 index 90927a3..0000000 --- a/gemm/mlir/gemm_27648_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x32xf16>) -> tensor<27648x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<27648x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x32xf16>) -> tensor<27648x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x32xf16>) outs(%1 : tensor<27648x32xf16>) -> tensor<27648x32xf16> - return %2 : tensor<27648x32xf16> - } -} diff --git a/gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir deleted file mode 100644 index 20f2150..0000000 --- a/gemm/mlir/gemm_27648_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<27648x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<27648x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x4xbf16>) -> tensor<27648x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<27648x4xbf16>) -> tensor<27648x4xbf16> - return %2 : tensor<27648x4xbf16> - } -} diff --git a/gemm/mlir/gemm_27648_4_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_4_5120_f16_tA.mlir deleted file mode 100644 index af948b5..0000000 --- a/gemm/mlir/gemm_27648_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x4xf16>) -> tensor<27648x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<27648x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x4xf16>) -> tensor<27648x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x4xf16>) outs(%1 : tensor<27648x4xf16>) -> tensor<27648x4xf16> - return %2 : tensor<27648x4xf16> - } -} diff --git a/gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir deleted file mode 100644 index fd43a3e..0000000 --- a/gemm/mlir/gemm_27648_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<27648x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<27648x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<27648x8xbf16>) -> tensor<27648x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<27648x8xbf16>) -> tensor<27648x8xbf16> - return %2 : tensor<27648x8xbf16> - } -} diff --git a/gemm/mlir/gemm_27648_8_5120_f16_tA.mlir b/gemm/mlir/gemm_27648_8_5120_f16_tA.mlir deleted file mode 100644 index 6d0ec2e..0000000 --- a/gemm/mlir/gemm_27648_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x27648xf16>, %arg1: tensor<5120x8xf16>) -> tensor<27648x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<27648x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<27648x8xf16>) -> tensor<27648x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x27648xf16>, tensor<5120x8xf16>) outs(%1 : tensor<27648x8xf16>) -> tensor<27648x8xf16> - return %2 : tensor<27648x8xf16> - } -} diff --git a/gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir deleted file mode 100644 index 10c20ee..0000000 --- a/gemm/mlir/gemm_28672_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<28672x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<28672x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x16xbf16>) -> tensor<28672x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<28672x16xbf16>) -> tensor<28672x16xbf16> - return %2 : tensor<28672x16xbf16> - } -} diff --git a/gemm/mlir/gemm_28672_16_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_16_8192_f16_tA.mlir deleted file mode 100644 index f923157..0000000 --- a/gemm/mlir/gemm_28672_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x16xf16>) -> tensor<28672x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<28672x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x16xf16>) -> tensor<28672x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x16xf16>) outs(%1 : tensor<28672x16xf16>) -> tensor<28672x16xf16> - return %2 : tensor<28672x16xf16> - } -} diff --git a/gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir deleted file mode 100644 index 6a24568..0000000 --- a/gemm/mlir/gemm_28672_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<28672x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<28672x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x1xbf16>) -> tensor<28672x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<28672x1xbf16>) -> tensor<28672x1xbf16> - return %2 : tensor<28672x1xbf16> - } -} diff --git a/gemm/mlir/gemm_28672_1_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_1_8192_f16_tA.mlir deleted file mode 100644 index a4bb37c..0000000 --- a/gemm/mlir/gemm_28672_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x1xf16>) -> tensor<28672x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<28672x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x1xf16>) -> tensor<28672x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x1xf16>) outs(%1 : tensor<28672x1xf16>) -> tensor<28672x1xf16> - return %2 : tensor<28672x1xf16> - } -} diff --git a/gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir deleted file mode 100644 index 24fd156..0000000 --- a/gemm/mlir/gemm_28672_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<28672x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<28672x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x2xbf16>) -> tensor<28672x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<28672x2xbf16>) -> tensor<28672x2xbf16> - return %2 : tensor<28672x2xbf16> - } -} diff --git a/gemm/mlir/gemm_28672_2_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_2_8192_f16_tA.mlir deleted file mode 100644 index 85df0ac..0000000 --- a/gemm/mlir/gemm_28672_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x2xf16>) -> tensor<28672x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<28672x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x2xf16>) -> tensor<28672x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x2xf16>) outs(%1 : tensor<28672x2xf16>) -> tensor<28672x2xf16> - return %2 : tensor<28672x2xf16> - } -} diff --git a/gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir deleted file mode 100644 index e920955..0000000 --- a/gemm/mlir/gemm_28672_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<28672x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<28672x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x32xbf16>) -> tensor<28672x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<28672x32xbf16>) -> tensor<28672x32xbf16> - return %2 : tensor<28672x32xbf16> - } -} diff --git a/gemm/mlir/gemm_28672_32_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_32_8192_f16_tA.mlir deleted file mode 100644 index 44a1361..0000000 --- a/gemm/mlir/gemm_28672_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x32xf16>) -> tensor<28672x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<28672x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x32xf16>) -> tensor<28672x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x32xf16>) outs(%1 : tensor<28672x32xf16>) -> tensor<28672x32xf16> - return %2 : tensor<28672x32xf16> - } -} diff --git a/gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir deleted file mode 100644 index 7ce0353..0000000 --- a/gemm/mlir/gemm_28672_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<28672x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<28672x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x4xbf16>) -> tensor<28672x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<28672x4xbf16>) -> tensor<28672x4xbf16> - return %2 : tensor<28672x4xbf16> - } -} diff --git a/gemm/mlir/gemm_28672_4_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_4_8192_f16_tA.mlir deleted file mode 100644 index a773111..0000000 --- a/gemm/mlir/gemm_28672_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x4xf16>) -> tensor<28672x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<28672x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x4xf16>) -> tensor<28672x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x4xf16>) outs(%1 : tensor<28672x4xf16>) -> tensor<28672x4xf16> - return %2 : tensor<28672x4xf16> - } -} diff --git a/gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir deleted file mode 100644 index 5a2541f..0000000 --- a/gemm/mlir/gemm_28672_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<28672x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<28672x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<28672x8xbf16>) -> tensor<28672x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<28672x8xbf16>) -> tensor<28672x8xbf16> - return %2 : tensor<28672x8xbf16> - } -} diff --git a/gemm/mlir/gemm_28672_8_8192_f16_tA.mlir b/gemm/mlir/gemm_28672_8_8192_f16_tA.mlir deleted file mode 100644 index 9226cfe..0000000 --- a/gemm/mlir/gemm_28672_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x28672xf16>, %arg1: tensor<8192x8xf16>) -> tensor<28672x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<28672x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<28672x8xf16>) -> tensor<28672x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x28672xf16>, tensor<8192x8xf16>) outs(%1 : tensor<28672x8xf16>) -> tensor<28672x8xf16> - return %2 : tensor<28672x8xf16> - } -} diff --git a/gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir b/gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir deleted file mode 100644 index 1040350..0000000 --- a/gemm/mlir/gemm_2_1280_8192_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2x8192xbf16>, %arg1: tensor<1280x8192xbf16>) -> tensor<2x1280xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2x1280xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2x1280xbf16>) -> tensor<2x1280xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2x8192xbf16>, tensor<1280x8192xbf16>) outs(%1 : tensor<2x1280xbf16>) -> tensor<2x1280xbf16> - return %2 : tensor<2x1280xbf16> - } -} diff --git a/gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir b/gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir deleted file mode 100644 index 7f6b6ea..0000000 --- a/gemm/mlir/gemm_2_3584_8192_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2x8192xbf16>, %arg1: tensor<3584x8192xbf16>) -> tensor<2x3584xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2x3584xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2x3584xbf16>) -> tensor<2x3584xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2x8192xbf16>, tensor<3584x8192xbf16>) outs(%1 : tensor<2x3584xbf16>) -> tensor<2x3584xbf16> - return %2 : tensor<2x3584xbf16> - } -} diff --git a/gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir b/gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir deleted file mode 100644 index 6ac8002..0000000 --- a/gemm/mlir/gemm_2_7168_8192_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2x8192xbf16>, %arg1: tensor<7168x8192xbf16>) -> tensor<2x7168xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<2x7168xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<2x7168xbf16>) -> tensor<2x7168xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<2x8192xbf16>, tensor<7168x8192xbf16>) outs(%1 : tensor<2x7168xbf16>) -> tensor<2x7168xbf16> - return %2 : tensor<2x7168xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir deleted file mode 100644 index 986fbe3..0000000 --- a/gemm/mlir/gemm_32000_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<32000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> - return %2 : tensor<32000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_16_5120_f16_tA.mlir deleted file mode 100644 index bb83872..0000000 --- a/gemm/mlir/gemm_32000_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<32000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x16xf16>) -> tensor<32000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<32000x16xf16>) -> tensor<32000x16xf16> - return %2 : tensor<32000x16xf16> - } -} diff --git a/gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir deleted file mode 100644 index af63a99..0000000 --- a/gemm/mlir/gemm_32000_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<32000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<32000x16xbf16>) -> tensor<32000x16xbf16> - return %2 : tensor<32000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_16_8192_f16_tA.mlir deleted file mode 100644 index 9881c6e..0000000 --- a/gemm/mlir/gemm_32000_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<32000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x16xf16>) -> tensor<32000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<32000x16xf16>) -> tensor<32000x16xf16> - return %2 : tensor<32000x16xf16> - } -} diff --git a/gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir deleted file mode 100644 index 4d33257..0000000 --- a/gemm/mlir/gemm_32000_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<32000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> - return %2 : tensor<32000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_1_5120_f16_tA.mlir deleted file mode 100644 index 9849f9c..0000000 --- a/gemm/mlir/gemm_32000_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<32000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x1xf16>) -> tensor<32000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<32000x1xf16>) -> tensor<32000x1xf16> - return %2 : tensor<32000x1xf16> - } -} diff --git a/gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir deleted file mode 100644 index cdf30e8..0000000 --- a/gemm/mlir/gemm_32000_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<32000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<32000x1xbf16>) -> tensor<32000x1xbf16> - return %2 : tensor<32000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_1_8192_f16_tA.mlir deleted file mode 100644 index fb063c9..0000000 --- a/gemm/mlir/gemm_32000_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<32000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x1xf16>) -> tensor<32000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<32000x1xf16>) -> tensor<32000x1xf16> - return %2 : tensor<32000x1xf16> - } -} diff --git a/gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir deleted file mode 100644 index ffcff1f..0000000 --- a/gemm/mlir/gemm_32000_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<32000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> - return %2 : tensor<32000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_2_5120_f16_tA.mlir deleted file mode 100644 index 74b1e6a..0000000 --- a/gemm/mlir/gemm_32000_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<32000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x2xf16>) -> tensor<32000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<32000x2xf16>) -> tensor<32000x2xf16> - return %2 : tensor<32000x2xf16> - } -} diff --git a/gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir deleted file mode 100644 index 5c6b46d..0000000 --- a/gemm/mlir/gemm_32000_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<32000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<32000x2xbf16>) -> tensor<32000x2xbf16> - return %2 : tensor<32000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_2_8192_f16_tA.mlir deleted file mode 100644 index 5623d69..0000000 --- a/gemm/mlir/gemm_32000_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<32000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x2xf16>) -> tensor<32000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<32000x2xf16>) -> tensor<32000x2xf16> - return %2 : tensor<32000x2xf16> - } -} diff --git a/gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir deleted file mode 100644 index 6585842..0000000 --- a/gemm/mlir/gemm_32000_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<32000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> - return %2 : tensor<32000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_32_5120_f16_tA.mlir deleted file mode 100644 index dfc38c7..0000000 --- a/gemm/mlir/gemm_32000_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<32000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x32xf16>) -> tensor<32000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<32000x32xf16>) -> tensor<32000x32xf16> - return %2 : tensor<32000x32xf16> - } -} diff --git a/gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir deleted file mode 100644 index efaefd2..0000000 --- a/gemm/mlir/gemm_32000_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<32000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<32000x32xbf16>) -> tensor<32000x32xbf16> - return %2 : tensor<32000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_32_8192_f16_tA.mlir deleted file mode 100644 index d82b086..0000000 --- a/gemm/mlir/gemm_32000_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<32000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x32xf16>) -> tensor<32000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<32000x32xf16>) -> tensor<32000x32xf16> - return %2 : tensor<32000x32xf16> - } -} diff --git a/gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir deleted file mode 100644 index f52612c..0000000 --- a/gemm/mlir/gemm_32000_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<32000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> - return %2 : tensor<32000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_4_5120_f16_tA.mlir deleted file mode 100644 index 43e179b..0000000 --- a/gemm/mlir/gemm_32000_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<32000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x4xf16>) -> tensor<32000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<32000x4xf16>) -> tensor<32000x4xf16> - return %2 : tensor<32000x4xf16> - } -} diff --git a/gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir deleted file mode 100644 index e3a7fcc..0000000 --- a/gemm/mlir/gemm_32000_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<32000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<32000x4xbf16>) -> tensor<32000x4xbf16> - return %2 : tensor<32000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_4_8192_f16_tA.mlir deleted file mode 100644 index c430b43..0000000 --- a/gemm/mlir/gemm_32000_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<32000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x4xf16>) -> tensor<32000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<32000x4xf16>) -> tensor<32000x4xf16> - return %2 : tensor<32000x4xf16> - } -} diff --git a/gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir deleted file mode 100644 index c3082b6..0000000 --- a/gemm/mlir/gemm_32000_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<32000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> - return %2 : tensor<32000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_32000_8_5120_f16_tA.mlir deleted file mode 100644 index 84959d3..0000000 --- a/gemm/mlir/gemm_32000_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x32000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<32000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x8xf16>) -> tensor<32000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x32000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<32000x8xf16>) -> tensor<32000x8xf16> - return %2 : tensor<32000x8xf16> - } -} diff --git a/gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir deleted file mode 100644 index 7cbee49..0000000 --- a/gemm/mlir/gemm_32000_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<32000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<32000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<32000x8xbf16>) -> tensor<32000x8xbf16> - return %2 : tensor<32000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_32000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_32000_8_8192_f16_tA.mlir deleted file mode 100644 index 67d245e..0000000 --- a/gemm/mlir/gemm_32000_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x32000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<32000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<32000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<32000x8xf16>) -> tensor<32000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x32000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<32000x8xf16>) -> tensor<32000x8xf16> - return %2 : tensor<32000x8xf16> - } -} diff --git a/gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir deleted file mode 100644 index ab4fa46..0000000 --- a/gemm/mlir/gemm_3456_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<3456x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3456x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x16xbf16>) -> tensor<3456x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<3456x16xbf16>) -> tensor<3456x16xbf16> - return %2 : tensor<3456x16xbf16> - } -} diff --git a/gemm/mlir/gemm_3456_16_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_16_5120_f16_tA.mlir deleted file mode 100644 index 0c15001..0000000 --- a/gemm/mlir/gemm_3456_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x16xf16>) -> tensor<3456x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3456x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x16xf16>) -> tensor<3456x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x16xf16>) outs(%1 : tensor<3456x16xf16>) -> tensor<3456x16xf16> - return %2 : tensor<3456x16xf16> - } -} diff --git a/gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir deleted file mode 100644 index 754923e..0000000 --- a/gemm/mlir/gemm_3456_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<3456x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3456x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x1xbf16>) -> tensor<3456x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<3456x1xbf16>) -> tensor<3456x1xbf16> - return %2 : tensor<3456x1xbf16> - } -} diff --git a/gemm/mlir/gemm_3456_1_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_1_5120_f16_tA.mlir deleted file mode 100644 index a179e69..0000000 --- a/gemm/mlir/gemm_3456_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x1xf16>) -> tensor<3456x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3456x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x1xf16>) -> tensor<3456x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x1xf16>) outs(%1 : tensor<3456x1xf16>) -> tensor<3456x1xf16> - return %2 : tensor<3456x1xf16> - } -} diff --git a/gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir deleted file mode 100644 index 68afe12..0000000 --- a/gemm/mlir/gemm_3456_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<3456x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3456x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x2xbf16>) -> tensor<3456x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<3456x2xbf16>) -> tensor<3456x2xbf16> - return %2 : tensor<3456x2xbf16> - } -} diff --git a/gemm/mlir/gemm_3456_2_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_2_5120_f16_tA.mlir deleted file mode 100644 index c0fe5f9..0000000 --- a/gemm/mlir/gemm_3456_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x2xf16>) -> tensor<3456x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3456x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x2xf16>) -> tensor<3456x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x2xf16>) outs(%1 : tensor<3456x2xf16>) -> tensor<3456x2xf16> - return %2 : tensor<3456x2xf16> - } -} diff --git a/gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir deleted file mode 100644 index 9b8159a..0000000 --- a/gemm/mlir/gemm_3456_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<3456x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3456x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x32xbf16>) -> tensor<3456x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<3456x32xbf16>) -> tensor<3456x32xbf16> - return %2 : tensor<3456x32xbf16> - } -} diff --git a/gemm/mlir/gemm_3456_32_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_32_5120_f16_tA.mlir deleted file mode 100644 index fe43487..0000000 --- a/gemm/mlir/gemm_3456_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x32xf16>) -> tensor<3456x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3456x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x32xf16>) -> tensor<3456x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x32xf16>) outs(%1 : tensor<3456x32xf16>) -> tensor<3456x32xf16> - return %2 : tensor<3456x32xf16> - } -} diff --git a/gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir deleted file mode 100644 index d6bbdaa..0000000 --- a/gemm/mlir/gemm_3456_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<3456x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3456x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x4xbf16>) -> tensor<3456x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<3456x4xbf16>) -> tensor<3456x4xbf16> - return %2 : tensor<3456x4xbf16> - } -} diff --git a/gemm/mlir/gemm_3456_4_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_4_5120_f16_tA.mlir deleted file mode 100644 index d1ba93e..0000000 --- a/gemm/mlir/gemm_3456_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x4xf16>) -> tensor<3456x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3456x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x4xf16>) -> tensor<3456x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x4xf16>) outs(%1 : tensor<3456x4xf16>) -> tensor<3456x4xf16> - return %2 : tensor<3456x4xf16> - } -} diff --git a/gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir deleted file mode 100644 index b7b3a1e..0000000 --- a/gemm/mlir/gemm_3456_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<3456x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3456x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3456x8xbf16>) -> tensor<3456x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<3456x8xbf16>) -> tensor<3456x8xbf16> - return %2 : tensor<3456x8xbf16> - } -} diff --git a/gemm/mlir/gemm_3456_8_5120_f16_tA.mlir b/gemm/mlir/gemm_3456_8_5120_f16_tA.mlir deleted file mode 100644 index 60f9e0c..0000000 --- a/gemm/mlir/gemm_3456_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3456xf16>, %arg1: tensor<5120x8xf16>) -> tensor<3456x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3456x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3456x8xf16>) -> tensor<3456x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3456xf16>, tensor<5120x8xf16>) outs(%1 : tensor<3456x8xf16>) -> tensor<3456x8xf16> - return %2 : tensor<3456x8xf16> - } -} diff --git a/gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir deleted file mode 100644 index 63c122d..0000000 --- a/gemm/mlir/gemm_3840_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<3840x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3840x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x16xbf16>) -> tensor<3840x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<3840x16xbf16>) -> tensor<3840x16xbf16> - return %2 : tensor<3840x16xbf16> - } -} diff --git a/gemm/mlir/gemm_3840_16_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_16_5120_f16_tA.mlir deleted file mode 100644 index 5ed7814..0000000 --- a/gemm/mlir/gemm_3840_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x16xf16>) -> tensor<3840x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3840x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x16xf16>) -> tensor<3840x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x16xf16>) outs(%1 : tensor<3840x16xf16>) -> tensor<3840x16xf16> - return %2 : tensor<3840x16xf16> - } -} diff --git a/gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir deleted file mode 100644 index 30fce43..0000000 --- a/gemm/mlir/gemm_3840_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<3840x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3840x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x1xbf16>) -> tensor<3840x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<3840x1xbf16>) -> tensor<3840x1xbf16> - return %2 : tensor<3840x1xbf16> - } -} diff --git a/gemm/mlir/gemm_3840_1_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_1_5120_f16_tA.mlir deleted file mode 100644 index c83b20c..0000000 --- a/gemm/mlir/gemm_3840_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x1xf16>) -> tensor<3840x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3840x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x1xf16>) -> tensor<3840x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x1xf16>) outs(%1 : tensor<3840x1xf16>) -> tensor<3840x1xf16> - return %2 : tensor<3840x1xf16> - } -} diff --git a/gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir deleted file mode 100644 index fde61e4..0000000 --- a/gemm/mlir/gemm_3840_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<3840x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3840x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x2xbf16>) -> tensor<3840x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<3840x2xbf16>) -> tensor<3840x2xbf16> - return %2 : tensor<3840x2xbf16> - } -} diff --git a/gemm/mlir/gemm_3840_2_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_2_5120_f16_tA.mlir deleted file mode 100644 index 3526c21..0000000 --- a/gemm/mlir/gemm_3840_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x2xf16>) -> tensor<3840x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3840x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x2xf16>) -> tensor<3840x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x2xf16>) outs(%1 : tensor<3840x2xf16>) -> tensor<3840x2xf16> - return %2 : tensor<3840x2xf16> - } -} diff --git a/gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir deleted file mode 100644 index aae821a..0000000 --- a/gemm/mlir/gemm_3840_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<3840x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3840x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x32xbf16>) -> tensor<3840x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<3840x32xbf16>) -> tensor<3840x32xbf16> - return %2 : tensor<3840x32xbf16> - } -} diff --git a/gemm/mlir/gemm_3840_32_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_32_5120_f16_tA.mlir deleted file mode 100644 index 1491630..0000000 --- a/gemm/mlir/gemm_3840_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x32xf16>) -> tensor<3840x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3840x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x32xf16>) -> tensor<3840x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x32xf16>) outs(%1 : tensor<3840x32xf16>) -> tensor<3840x32xf16> - return %2 : tensor<3840x32xf16> - } -} diff --git a/gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir deleted file mode 100644 index fe34d3f..0000000 --- a/gemm/mlir/gemm_3840_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<3840x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3840x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x4xbf16>) -> tensor<3840x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<3840x4xbf16>) -> tensor<3840x4xbf16> - return %2 : tensor<3840x4xbf16> - } -} diff --git a/gemm/mlir/gemm_3840_4_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_4_5120_f16_tA.mlir deleted file mode 100644 index eab6a7c..0000000 --- a/gemm/mlir/gemm_3840_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x4xf16>) -> tensor<3840x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3840x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x4xf16>) -> tensor<3840x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x4xf16>) outs(%1 : tensor<3840x4xf16>) -> tensor<3840x4xf16> - return %2 : tensor<3840x4xf16> - } -} diff --git a/gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir deleted file mode 100644 index 84bb52a..0000000 --- a/gemm/mlir/gemm_3840_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<3840x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<3840x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<3840x8xbf16>) -> tensor<3840x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<3840x8xbf16>) -> tensor<3840x8xbf16> - return %2 : tensor<3840x8xbf16> - } -} diff --git a/gemm/mlir/gemm_3840_8_5120_f16_tA.mlir b/gemm/mlir/gemm_3840_8_5120_f16_tA.mlir deleted file mode 100644 index 8c91198..0000000 --- a/gemm/mlir/gemm_3840_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x3840xf16>, %arg1: tensor<5120x8xf16>) -> tensor<3840x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<3840x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<3840x8xf16>) -> tensor<3840x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x3840xf16>, tensor<5120x8xf16>) outs(%1 : tensor<3840x8xf16>) -> tensor<3840x8xf16> - return %2 : tensor<3840x8xf16> - } -} diff --git a/gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir deleted file mode 100644 index 01c0a78..0000000 --- a/gemm/mlir/gemm_4000_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<4000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> - return %2 : tensor<4000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_16_5120_f16_tA.mlir deleted file mode 100644 index 3eb9fe7..0000000 --- a/gemm/mlir/gemm_4000_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<4000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x16xf16>) -> tensor<4000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<4000x16xf16>) -> tensor<4000x16xf16> - return %2 : tensor<4000x16xf16> - } -} diff --git a/gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir deleted file mode 100644 index a64464a..0000000 --- a/gemm/mlir/gemm_4000_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<4000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<4000x16xbf16>) -> tensor<4000x16xbf16> - return %2 : tensor<4000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_16_8192_f16_tA.mlir deleted file mode 100644 index 68f9cda..0000000 --- a/gemm/mlir/gemm_4000_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<4000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x16xf16>) -> tensor<4000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<4000x16xf16>) -> tensor<4000x16xf16> - return %2 : tensor<4000x16xf16> - } -} diff --git a/gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir deleted file mode 100644 index 857de41..0000000 --- a/gemm/mlir/gemm_4000_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<4000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> - return %2 : tensor<4000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_1_5120_f16_tA.mlir deleted file mode 100644 index f64c226..0000000 --- a/gemm/mlir/gemm_4000_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<4000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x1xf16>) -> tensor<4000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<4000x1xf16>) -> tensor<4000x1xf16> - return %2 : tensor<4000x1xf16> - } -} diff --git a/gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir deleted file mode 100644 index c98f58c..0000000 --- a/gemm/mlir/gemm_4000_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<4000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<4000x1xbf16>) -> tensor<4000x1xbf16> - return %2 : tensor<4000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_1_8192_f16_tA.mlir deleted file mode 100644 index 5aaef53..0000000 --- a/gemm/mlir/gemm_4000_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<4000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x1xf16>) -> tensor<4000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<4000x1xf16>) -> tensor<4000x1xf16> - return %2 : tensor<4000x1xf16> - } -} diff --git a/gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir deleted file mode 100644 index cf6d890..0000000 --- a/gemm/mlir/gemm_4000_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<4000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> - return %2 : tensor<4000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_2_5120_f16_tA.mlir deleted file mode 100644 index 1d7ef35..0000000 --- a/gemm/mlir/gemm_4000_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<4000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x2xf16>) -> tensor<4000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<4000x2xf16>) -> tensor<4000x2xf16> - return %2 : tensor<4000x2xf16> - } -} diff --git a/gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir deleted file mode 100644 index 1081115..0000000 --- a/gemm/mlir/gemm_4000_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<4000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<4000x2xbf16>) -> tensor<4000x2xbf16> - return %2 : tensor<4000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_2_8192_f16_tA.mlir deleted file mode 100644 index 5d645df..0000000 --- a/gemm/mlir/gemm_4000_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<4000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x2xf16>) -> tensor<4000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<4000x2xf16>) -> tensor<4000x2xf16> - return %2 : tensor<4000x2xf16> - } -} diff --git a/gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir deleted file mode 100644 index faa22ff..0000000 --- a/gemm/mlir/gemm_4000_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<4000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> - return %2 : tensor<4000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_32_5120_f16_tA.mlir deleted file mode 100644 index eb8e87e..0000000 --- a/gemm/mlir/gemm_4000_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<4000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x32xf16>) -> tensor<4000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<4000x32xf16>) -> tensor<4000x32xf16> - return %2 : tensor<4000x32xf16> - } -} diff --git a/gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir deleted file mode 100644 index 0688fe2..0000000 --- a/gemm/mlir/gemm_4000_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<4000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<4000x32xbf16>) -> tensor<4000x32xbf16> - return %2 : tensor<4000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_32_8192_f16_tA.mlir deleted file mode 100644 index d261394..0000000 --- a/gemm/mlir/gemm_4000_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<4000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x32xf16>) -> tensor<4000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<4000x32xf16>) -> tensor<4000x32xf16> - return %2 : tensor<4000x32xf16> - } -} diff --git a/gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir deleted file mode 100644 index ee32dc1..0000000 --- a/gemm/mlir/gemm_4000_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<4000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> - return %2 : tensor<4000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_4_5120_f16_tA.mlir deleted file mode 100644 index 61b5e3d..0000000 --- a/gemm/mlir/gemm_4000_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<4000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x4xf16>) -> tensor<4000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<4000x4xf16>) -> tensor<4000x4xf16> - return %2 : tensor<4000x4xf16> - } -} diff --git a/gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir deleted file mode 100644 index 1f73b7e..0000000 --- a/gemm/mlir/gemm_4000_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<4000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<4000x4xbf16>) -> tensor<4000x4xbf16> - return %2 : tensor<4000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_4_8192_f16_tA.mlir deleted file mode 100644 index f85ff47..0000000 --- a/gemm/mlir/gemm_4000_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<4000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x4xf16>) -> tensor<4000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<4000x4xf16>) -> tensor<4000x4xf16> - return %2 : tensor<4000x4xf16> - } -} diff --git a/gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir deleted file mode 100644 index a59e9b6..0000000 --- a/gemm/mlir/gemm_4000_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<4000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> - return %2 : tensor<4000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_4000_8_5120_f16_tA.mlir deleted file mode 100644 index 2821933..0000000 --- a/gemm/mlir/gemm_4000_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x4000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<4000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x8xf16>) -> tensor<4000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x4000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<4000x8xf16>) -> tensor<4000x8xf16> - return %2 : tensor<4000x8xf16> - } -} diff --git a/gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir deleted file mode 100644 index bbaeb69..0000000 --- a/gemm/mlir/gemm_4000_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<4000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<4000x8xbf16>) -> tensor<4000x8xbf16> - return %2 : tensor<4000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_4000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_4000_8_8192_f16_tA.mlir deleted file mode 100644 index 3bd900f..0000000 --- a/gemm/mlir/gemm_4000_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<4000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4000x8xf16>) -> tensor<4000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<4000x8xf16>) -> tensor<4000x8xf16> - return %2 : tensor<4000x8xf16> - } -} diff --git a/gemm/mlir/gemm_4096_20480_2560_f16_tB.mlir b/gemm/mlir/gemm_4096_20480_2560_f16_tB.mlir deleted file mode 100644 index 461726c..0000000 --- a/gemm/mlir/gemm_4096_20480_2560_f16_tB.mlir +++ /dev/null @@ -1,145 +0,0 @@ -#translation = #iree_codegen.translation_info -module attributes {transform.with_named_sequence} { - stream.executable private @gemm { - stream.executable.export public @gemm workgroups() -> (index, index, index) { - %c64 = arith.constant 64 : index - %c320 = arith.constant 320 : index - %c1 = arith.constant 1 : index - stream.return %c64, %c320, %c1 : index, index, index - } - builtin.module { - func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) attributes {translation_info = #translation} { - %c19 = arith.constant 19 : index - %c18 = arith.constant 18 : index - %c17 = arith.constant 17 : index - %c3 = arith.constant 3 : index - %c2 = arith.constant 2 : index - %c16 = arith.constant 16 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c32 = arith.constant 32 : index - %c64 = arith.constant 64 : index - %c1 = arith.constant 1 : index - %c80 = arith.constant 80 : index - %c0 = arith.constant 0 : index - %cst = arith.constant dense<0.000000e+00> : vector<4xf32> - %workgroup_id_0 = stream.dispatch.workgroup.id[0] : index - %workgroup_id_1 = stream.dispatch.workgroup.id[1] : index - %thread_id_x = gpu.thread_id x - %thread_id_y = gpu.thread_id y - %alloc = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %alloc_0 = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> memref<4096x2560xf16, strided<[2560, 1], offset: ?>> - %1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> memref<20480x2560xf16, strided<[2560, 1], offset: ?>> - %2 = arith.muli %workgroup_id_0, %c64 : index - %3 = arith.muli %thread_id_y, %c32 : index - %4 = arith.divsi %thread_id_x, %c4 : index - %5 = arith.addi %4, %3 : index - %6 = arith.remsi %5, %c64 : index - %7 = arith.addi %6, %2 : index - %8 = arith.remsi %thread_id_x, %c4 : index - %9 = arith.muli %8, %c8 : index - %10 = arith.divsi %thread_id_x, %c64 : index - %11 = arith.muli %10, %c32 : index - %12 = arith.remsi %thread_id_x, %c16 : index - %13 = arith.addi %12, %11 : index - %14 = arith.remsi %thread_id_x, %c64 : index - %15 = arith.divsi %14, %c16 : index - %16 = arith.muli %15, %c4 : index - %17 = arith.addi %16, %c16 : index - %18 = arith.addi %13, %c16 : index - %19 = arith.muli %workgroup_id_1, %c64 : index - %20 = arith.addi %6, %19 : index - %21 = arith.addi %12, %3 : index - %22 = arith.addi %21, %c16 : index - %23:4 = scf.for %arg3 = %c0 to %c80 step %c1 iter_args(%arg4 = %cst, %arg5 = %cst, %arg6 = %cst, %arg7 = %cst) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { - %62 = arith.muli %arg3, %c32 : index - %63 = arith.addi %62, %9 : index - %64 = vector.load %0[%7, %63] : memref<4096x2560xf16, strided<[2560, 1], offset: ?>>, vector<8xf16> - vector.store %64, %alloc[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %65 = vector.load %alloc[%13, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %66 = vector.load %alloc[%13, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %67 = vector.load %alloc[%18, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %68 = vector.load %alloc[%18, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %69 = vector.load %1[%20, %63] : memref<20480x2560xf16, strided<[2560, 1], offset: ?>>, vector<8xf16> - amdgpu.lds_barrier - vector.store %69, %alloc_0[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %70 = vector.load %alloc_0[%21, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %71 = vector.load %alloc_0[%21, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %72 = vector.load %alloc_0[%22, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %73 = vector.load %alloc_0[%22, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %74 = amdgpu.mfma %65 * %70 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %75 = amdgpu.mfma %66 * %71 + %74 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %76 = amdgpu.mfma %67 * %72 + %arg7 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %77 = amdgpu.mfma %68 * %73 + %76 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %78 = amdgpu.mfma %67 * %70 + %arg6 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %79 = amdgpu.mfma %68 * %71 + %78 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %80 = amdgpu.mfma %65 * %72 + %arg5 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %81 = amdgpu.mfma %66 * %73 + %80 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - scf.yield %75, %81, %79, %77 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> - } - %24 = vector.extract_strided_slice %23#0 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %25 = stream.binding.subspan %arg2[%c0] : !stream.binding -> memref<4096x20480xf32, strided<[20480, 1], offset: ?>> - %26 = arith.remsi %thread_id_x, %c64 : index - %27 = arith.divsi %26, %c16 : index - %28 = arith.muli %27, %c4 : index - %29 = arith.divsi %thread_id_x, %c64 : index - %30 = arith.muli %29, %c32 : index - %31 = arith.muli %workgroup_id_0, %c64 : index - %32 = arith.addi %31, %30 : index - %33 = arith.addi %32, %28 : index - %34 = arith.muli %thread_id_y, %c32 : index - %35 = arith.muli %workgroup_id_1, %c64 : index - %36 = arith.remsi %thread_id_x, %c16 : index - %37 = arith.addi %36, %35 : index - %38 = arith.addi %37, %34 : index - vector.store %24, %25[%33, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %39 = vector.extract_strided_slice %23#0 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %40 = arith.addi %33, %c1 : index - vector.store %39, %25[%40, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %41 = vector.extract_strided_slice %23#0 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %42 = arith.addi %33, %c2 : index - vector.store %41, %25[%42, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %43 = vector.extract_strided_slice %23#0 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %44 = arith.addi %33, %c3 : index - vector.store %43, %25[%44, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %45 = vector.extract_strided_slice %23#3 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %46 = arith.addi %33, %c16 : index - %47 = arith.addi %38, %c16 : index - vector.store %45, %25[%46, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %48 = vector.extract_strided_slice %23#3 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %49 = arith.addi %33, %c17 : index - vector.store %48, %25[%49, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %50 = vector.extract_strided_slice %23#3 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %51 = arith.addi %33, %c18 : index - vector.store %50, %25[%51, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %52 = vector.extract_strided_slice %23#3 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %53 = arith.addi %33, %c19 : index - vector.store %52, %25[%53, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %54 = vector.extract_strided_slice %23#2 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %54, %25[%46, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %55 = vector.extract_strided_slice %23#2 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %55, %25[%49, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %56 = vector.extract_strided_slice %23#2 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %56, %25[%51, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %57 = vector.extract_strided_slice %23#2 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %57, %25[%53, %38] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %58 = vector.extract_strided_slice %23#1 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %58, %25[%33, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %59 = vector.extract_strided_slice %23#1 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %59, %25[%40, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %60 = vector.extract_strided_slice %23#1 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %60, %25[%42, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - %61 = vector.extract_strided_slice %23#1 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %61, %25[%44, %47] : memref<4096x20480xf32, strided<[20480, 1], offset: ?>>, vector<1xf32> - return - } - } - } - func.func @isolated_benchmark(%arg0: tensor<4096x2560xf16>, %arg1: tensor<20480x2560xf16>) -> tensor<4096x20480xf32> { - %0 = flow.dispatch @gemm::@gemm(%arg0, %arg1) : (tensor<4096x2560xf16>, tensor<20480x2560xf16>) -> tensor<4096x20480xf32> - return %0 : tensor<4096x20480xf32> - } -} diff --git a/gemm/mlir/gemm_4096_4096_8192_bf16.mlir b/gemm/mlir/gemm_4096_4096_8192_bf16.mlir deleted file mode 100644 index da783d2..0000000 --- a/gemm/mlir/gemm_4096_4096_8192_bf16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<8192x4096xbf16>) -> tensor<4096x4096xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4096x4096xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<8192x4096xbf16>) outs(%1 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> - return %2 : tensor<4096x4096xbf16> - } -} diff --git a/gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir b/gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir deleted file mode 100644 index f9c0df8..0000000 --- a/gemm/mlir/gemm_4096_4096_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4096xbf16>, %arg1: tensor<8192x4096xbf16>) -> tensor<4096x4096xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4096x4096xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4096xbf16>, tensor<8192x4096xbf16>) outs(%1 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> - return %2 : tensor<4096x4096xbf16> - } -} diff --git a/gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir b/gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir deleted file mode 100644 index ff2a1ac..0000000 --- a/gemm/mlir/gemm_4096_4096_8192_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x8192xbf16>) -> tensor<4096x4096xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<4096x4096xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x8192xbf16>) outs(%1 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16> - return %2 : tensor<4096x4096xbf16> - } -} diff --git a/gemm/mlir/gemm_4096_4096_8192_f16.mlir b/gemm/mlir/gemm_4096_4096_8192_f16.mlir deleted file mode 100644 index d21690a..0000000 --- a/gemm/mlir/gemm_4096_4096_8192_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<8192x4096xf16>) -> tensor<4096x4096xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4096x4096xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<8192x4096xf16>) outs(%1 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> - return %2 : tensor<4096x4096xf16> - } -} diff --git a/gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir b/gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir deleted file mode 100644 index f4ba892..0000000 --- a/gemm/mlir/gemm_4096_4096_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x4096xf16>, %arg1: tensor<8192x4096xf16>) -> tensor<4096x4096xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4096x4096xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x4096xf16>, tensor<8192x4096xf16>) outs(%1 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> - return %2 : tensor<4096x4096xf16> - } -} diff --git a/gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir b/gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir deleted file mode 100644 index d96e00f..0000000 --- a/gemm/mlir/gemm_4096_4096_8192_f16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x8192xf16>) -> tensor<4096x4096xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<4096x4096xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x8192xf16>) outs(%1 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16> - return %2 : tensor<4096x4096xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir deleted file mode 100644 index 7e21b10..0000000 --- a/gemm/mlir/gemm_5120_16_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_16_1280_f16_tA.mlir deleted file mode 100644 index e777fe8..0000000 --- a/gemm/mlir/gemm_5120_16_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir deleted file mode 100644 index 712a5a3..0000000 --- a/gemm/mlir/gemm_5120_16_13824_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_16_13824_f16_tA.mlir deleted file mode 100644 index e95a174..0000000 --- a/gemm/mlir/gemm_5120_16_13824_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir deleted file mode 100644 index 1f0b6cf..0000000 --- a/gemm/mlir/gemm_5120_16_1728_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_16_1728_f16_tA.mlir deleted file mode 100644 index c0efaf2..0000000 --- a/gemm/mlir/gemm_5120_16_1728_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir deleted file mode 100644 index d850d73..0000000 --- a/gemm/mlir/gemm_5120_16_2560_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_16_2560_f16_tA.mlir deleted file mode 100644 index e4183f4..0000000 --- a/gemm/mlir/gemm_5120_16_2560_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir deleted file mode 100644 index dab5177..0000000 --- a/gemm/mlir/gemm_5120_16_3456_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_16_3456_f16_tA.mlir deleted file mode 100644 index e4d9277..0000000 --- a/gemm/mlir/gemm_5120_16_3456_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir deleted file mode 100644 index f5dfe26..0000000 --- a/gemm/mlir/gemm_5120_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_16_5120_f16_tA.mlir deleted file mode 100644 index 71c7f1f..0000000 --- a/gemm/mlir/gemm_5120_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_640_bf16_tA.mlir deleted file mode 100644 index 20d9a68..0000000 --- a/gemm/mlir/gemm_5120_16_640_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_640_f16_tA.mlir b/gemm/mlir/gemm_5120_16_640_f16_tA.mlir deleted file mode 100644 index bf06141..0000000 --- a/gemm/mlir/gemm_5120_16_640_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir deleted file mode 100644 index 4ab4378..0000000 --- a/gemm/mlir/gemm_5120_16_6912_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_16_6912_f16_tA.mlir deleted file mode 100644 index 476253e..0000000 --- a/gemm/mlir/gemm_5120_16_6912_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir deleted file mode 100644 index af65c87..0000000 --- a/gemm/mlir/gemm_5120_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<5120x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<5120x16xbf16>) -> tensor<5120x16xbf16> - return %2 : tensor<5120x16xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_16_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_16_8192_f16_tA.mlir deleted file mode 100644 index 9acb611..0000000 --- a/gemm/mlir/gemm_5120_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x16xf16>) -> tensor<5120x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x16xf16>) outs(%1 : tensor<5120x16xf16>) -> tensor<5120x16xf16> - return %2 : tensor<5120x16xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir deleted file mode 100644 index fbad7cb..0000000 --- a/gemm/mlir/gemm_5120_1_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_1_1280_f16_tA.mlir deleted file mode 100644 index a7e29cd..0000000 --- a/gemm/mlir/gemm_5120_1_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir deleted file mode 100644 index d006ff7..0000000 --- a/gemm/mlir/gemm_5120_1_13824_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_1_13824_f16_tA.mlir deleted file mode 100644 index a9fcf15..0000000 --- a/gemm/mlir/gemm_5120_1_13824_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir deleted file mode 100644 index 9417831..0000000 --- a/gemm/mlir/gemm_5120_1_1728_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_1_1728_f16_tA.mlir deleted file mode 100644 index 124f5a6..0000000 --- a/gemm/mlir/gemm_5120_1_1728_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir deleted file mode 100644 index 3779817..0000000 --- a/gemm/mlir/gemm_5120_1_2560_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_1_2560_f16_tA.mlir deleted file mode 100644 index 6258f4f..0000000 --- a/gemm/mlir/gemm_5120_1_2560_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir deleted file mode 100644 index c2c0363..0000000 --- a/gemm/mlir/gemm_5120_1_3456_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_1_3456_f16_tA.mlir deleted file mode 100644 index 27728e7..0000000 --- a/gemm/mlir/gemm_5120_1_3456_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir deleted file mode 100644 index e8652a1..0000000 --- a/gemm/mlir/gemm_5120_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_1_5120_f16_tA.mlir deleted file mode 100644 index d36e54c..0000000 --- a/gemm/mlir/gemm_5120_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_640_bf16_tA.mlir deleted file mode 100644 index 3b414a8..0000000 --- a/gemm/mlir/gemm_5120_1_640_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_640_f16_tA.mlir b/gemm/mlir/gemm_5120_1_640_f16_tA.mlir deleted file mode 100644 index f8bbbe2..0000000 --- a/gemm/mlir/gemm_5120_1_640_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir deleted file mode 100644 index fdc2298..0000000 --- a/gemm/mlir/gemm_5120_1_6912_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_1_6912_f16_tA.mlir deleted file mode 100644 index be5c109..0000000 --- a/gemm/mlir/gemm_5120_1_6912_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir deleted file mode 100644 index 13e6f69..0000000 --- a/gemm/mlir/gemm_5120_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<5120x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<5120x1xbf16>) -> tensor<5120x1xbf16> - return %2 : tensor<5120x1xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_1_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_1_8192_f16_tA.mlir deleted file mode 100644 index 572ff85..0000000 --- a/gemm/mlir/gemm_5120_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x1xf16>) -> tensor<5120x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x1xf16>) outs(%1 : tensor<5120x1xf16>) -> tensor<5120x1xf16> - return %2 : tensor<5120x1xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir deleted file mode 100644 index 07b6e62..0000000 --- a/gemm/mlir/gemm_5120_2_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_2_1280_f16_tA.mlir deleted file mode 100644 index 70ad768..0000000 --- a/gemm/mlir/gemm_5120_2_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir deleted file mode 100644 index e83f65d..0000000 --- a/gemm/mlir/gemm_5120_2_13824_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_2_13824_f16_tA.mlir deleted file mode 100644 index e30738c..0000000 --- a/gemm/mlir/gemm_5120_2_13824_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir deleted file mode 100644 index 8a04fb2..0000000 --- a/gemm/mlir/gemm_5120_2_1728_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_2_1728_f16_tA.mlir deleted file mode 100644 index 2c77846..0000000 --- a/gemm/mlir/gemm_5120_2_1728_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir deleted file mode 100644 index 25d142a..0000000 --- a/gemm/mlir/gemm_5120_2_2560_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_2_2560_f16_tA.mlir deleted file mode 100644 index 414bd86..0000000 --- a/gemm/mlir/gemm_5120_2_2560_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir deleted file mode 100644 index 3b81d86..0000000 --- a/gemm/mlir/gemm_5120_2_3456_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_2_3456_f16_tA.mlir deleted file mode 100644 index fe954d2..0000000 --- a/gemm/mlir/gemm_5120_2_3456_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir deleted file mode 100644 index 6599984..0000000 --- a/gemm/mlir/gemm_5120_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_2_5120_f16_tA.mlir deleted file mode 100644 index f88163e..0000000 --- a/gemm/mlir/gemm_5120_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_640_bf16_tA.mlir deleted file mode 100644 index 8ade0ca..0000000 --- a/gemm/mlir/gemm_5120_2_640_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_640_f16_tA.mlir b/gemm/mlir/gemm_5120_2_640_f16_tA.mlir deleted file mode 100644 index 3c50f2f..0000000 --- a/gemm/mlir/gemm_5120_2_640_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir deleted file mode 100644 index 5f8b20a..0000000 --- a/gemm/mlir/gemm_5120_2_6912_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_2_6912_f16_tA.mlir deleted file mode 100644 index 7fe73cd..0000000 --- a/gemm/mlir/gemm_5120_2_6912_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir deleted file mode 100644 index 4460592..0000000 --- a/gemm/mlir/gemm_5120_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<5120x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<5120x2xbf16>) -> tensor<5120x2xbf16> - return %2 : tensor<5120x2xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_2_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_2_8192_f16_tA.mlir deleted file mode 100644 index 6e9ac82..0000000 --- a/gemm/mlir/gemm_5120_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x2xf16>) -> tensor<5120x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x2xf16>) outs(%1 : tensor<5120x2xf16>) -> tensor<5120x2xf16> - return %2 : tensor<5120x2xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir deleted file mode 100644 index 256678e..0000000 --- a/gemm/mlir/gemm_5120_32_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_32_1280_f16_tA.mlir deleted file mode 100644 index e7f5580..0000000 --- a/gemm/mlir/gemm_5120_32_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir deleted file mode 100644 index d84ed24..0000000 --- a/gemm/mlir/gemm_5120_32_13824_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_32_13824_f16_tA.mlir deleted file mode 100644 index f50d0d0..0000000 --- a/gemm/mlir/gemm_5120_32_13824_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir deleted file mode 100644 index a4af4b4..0000000 --- a/gemm/mlir/gemm_5120_32_1728_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_32_1728_f16_tA.mlir deleted file mode 100644 index 16e7179..0000000 --- a/gemm/mlir/gemm_5120_32_1728_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir deleted file mode 100644 index bea8cb5..0000000 --- a/gemm/mlir/gemm_5120_32_2560_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_32_2560_f16_tA.mlir deleted file mode 100644 index d4d7491..0000000 --- a/gemm/mlir/gemm_5120_32_2560_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir deleted file mode 100644 index a1ec40e..0000000 --- a/gemm/mlir/gemm_5120_32_3456_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_32_3456_f16_tA.mlir deleted file mode 100644 index 8f6301c..0000000 --- a/gemm/mlir/gemm_5120_32_3456_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir deleted file mode 100644 index 4c72158..0000000 --- a/gemm/mlir/gemm_5120_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_32_5120_f16_tA.mlir deleted file mode 100644 index 027a09f..0000000 --- a/gemm/mlir/gemm_5120_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_640_bf16_tA.mlir deleted file mode 100644 index fec70cb..0000000 --- a/gemm/mlir/gemm_5120_32_640_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_640_f16_tA.mlir b/gemm/mlir/gemm_5120_32_640_f16_tA.mlir deleted file mode 100644 index d2e3949..0000000 --- a/gemm/mlir/gemm_5120_32_640_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir deleted file mode 100644 index 7e22180..0000000 --- a/gemm/mlir/gemm_5120_32_6912_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_32_6912_f16_tA.mlir deleted file mode 100644 index 1d9947a..0000000 --- a/gemm/mlir/gemm_5120_32_6912_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir deleted file mode 100644 index 323437a..0000000 --- a/gemm/mlir/gemm_5120_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<5120x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<5120x32xbf16>) -> tensor<5120x32xbf16> - return %2 : tensor<5120x32xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_32_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_32_8192_f16_tA.mlir deleted file mode 100644 index 91e0026..0000000 --- a/gemm/mlir/gemm_5120_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x32xf16>) -> tensor<5120x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x32xf16>) outs(%1 : tensor<5120x32xf16>) -> tensor<5120x32xf16> - return %2 : tensor<5120x32xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir deleted file mode 100644 index b02b975..0000000 --- a/gemm/mlir/gemm_5120_4_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_4_1280_f16_tA.mlir deleted file mode 100644 index cdbe240..0000000 --- a/gemm/mlir/gemm_5120_4_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir deleted file mode 100644 index c024c59..0000000 --- a/gemm/mlir/gemm_5120_4_13824_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_4_13824_f16_tA.mlir deleted file mode 100644 index 1b355e9..0000000 --- a/gemm/mlir/gemm_5120_4_13824_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir deleted file mode 100644 index 77d316d..0000000 --- a/gemm/mlir/gemm_5120_4_1728_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_4_1728_f16_tA.mlir deleted file mode 100644 index b77fd46..0000000 --- a/gemm/mlir/gemm_5120_4_1728_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir deleted file mode 100644 index b441065..0000000 --- a/gemm/mlir/gemm_5120_4_2560_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_4_2560_f16_tA.mlir deleted file mode 100644 index 78af1ae..0000000 --- a/gemm/mlir/gemm_5120_4_2560_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir deleted file mode 100644 index 65e3813..0000000 --- a/gemm/mlir/gemm_5120_4_3456_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_4_3456_f16_tA.mlir deleted file mode 100644 index 055a56a..0000000 --- a/gemm/mlir/gemm_5120_4_3456_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir deleted file mode 100644 index 133c6e2..0000000 --- a/gemm/mlir/gemm_5120_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_4_5120_f16_tA.mlir deleted file mode 100644 index 3b6cabf..0000000 --- a/gemm/mlir/gemm_5120_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_640_bf16_tA.mlir deleted file mode 100644 index 1e22dd9..0000000 --- a/gemm/mlir/gemm_5120_4_640_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_640_f16_tA.mlir b/gemm/mlir/gemm_5120_4_640_f16_tA.mlir deleted file mode 100644 index f7459f4..0000000 --- a/gemm/mlir/gemm_5120_4_640_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir deleted file mode 100644 index 9244683..0000000 --- a/gemm/mlir/gemm_5120_4_6912_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_4_6912_f16_tA.mlir deleted file mode 100644 index f3c0b6a..0000000 --- a/gemm/mlir/gemm_5120_4_6912_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir deleted file mode 100644 index 1e39bcc..0000000 --- a/gemm/mlir/gemm_5120_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<5120x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<5120x4xbf16>) -> tensor<5120x4xbf16> - return %2 : tensor<5120x4xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_4_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_4_8192_f16_tA.mlir deleted file mode 100644 index 59ff5c1..0000000 --- a/gemm/mlir/gemm_5120_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x4xf16>) -> tensor<5120x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x4xf16>) outs(%1 : tensor<5120x4xf16>) -> tensor<5120x4xf16> - return %2 : tensor<5120x4xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir deleted file mode 100644 index 090d0a3..0000000 --- a/gemm/mlir/gemm_5120_8_1280_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xbf16>, %arg1: tensor<1280x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xbf16>, tensor<1280x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_1280_f16_tA.mlir b/gemm/mlir/gemm_5120_8_1280_f16_tA.mlir deleted file mode 100644 index 68c2973..0000000 --- a/gemm/mlir/gemm_5120_8_1280_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1280x5120xf16>, %arg1: tensor<1280x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1280x5120xf16>, tensor<1280x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir deleted file mode 100644 index b80c0d8..0000000 --- a/gemm/mlir/gemm_5120_8_13824_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xbf16>, %arg1: tensor<13824x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xbf16>, tensor<13824x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_13824_f16_tA.mlir b/gemm/mlir/gemm_5120_8_13824_f16_tA.mlir deleted file mode 100644 index 77658a9..0000000 --- a/gemm/mlir/gemm_5120_8_13824_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<13824x5120xf16>, %arg1: tensor<13824x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<13824x5120xf16>, tensor<13824x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir deleted file mode 100644 index 3d405b3..0000000 --- a/gemm/mlir/gemm_5120_8_1728_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xbf16>, %arg1: tensor<1728x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xbf16>, tensor<1728x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_1728_f16_tA.mlir b/gemm/mlir/gemm_5120_8_1728_f16_tA.mlir deleted file mode 100644 index 9717a1c..0000000 --- a/gemm/mlir/gemm_5120_8_1728_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1728x5120xf16>, %arg1: tensor<1728x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1728x5120xf16>, tensor<1728x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir deleted file mode 100644 index e20b534..0000000 --- a/gemm/mlir/gemm_5120_8_2560_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xbf16>, %arg1: tensor<2560x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xbf16>, tensor<2560x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_2560_f16_tA.mlir b/gemm/mlir/gemm_5120_8_2560_f16_tA.mlir deleted file mode 100644 index fcb3692..0000000 --- a/gemm/mlir/gemm_5120_8_2560_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2560x5120xf16>, %arg1: tensor<2560x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2560x5120xf16>, tensor<2560x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir deleted file mode 100644 index e86a941..0000000 --- a/gemm/mlir/gemm_5120_8_3456_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xbf16>, %arg1: tensor<3456x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xbf16>, tensor<3456x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_3456_f16_tA.mlir b/gemm/mlir/gemm_5120_8_3456_f16_tA.mlir deleted file mode 100644 index b81b946..0000000 --- a/gemm/mlir/gemm_5120_8_3456_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3456x5120xf16>, %arg1: tensor<3456x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3456x5120xf16>, tensor<3456x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir deleted file mode 100644 index b66fabd..0000000 --- a/gemm/mlir/gemm_5120_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_5120_f16_tA.mlir b/gemm/mlir/gemm_5120_8_5120_f16_tA.mlir deleted file mode 100644 index b42ef4d..0000000 --- a/gemm/mlir/gemm_5120_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x5120xf16>, %arg1: tensor<5120x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x5120xf16>, tensor<5120x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_640_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_640_bf16_tA.mlir deleted file mode 100644 index 919f4aa..0000000 --- a/gemm/mlir/gemm_5120_8_640_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xbf16>, %arg1: tensor<640x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xbf16>, tensor<640x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_640_f16_tA.mlir b/gemm/mlir/gemm_5120_8_640_f16_tA.mlir deleted file mode 100644 index 2667615..0000000 --- a/gemm/mlir/gemm_5120_8_640_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x5120xf16>, %arg1: tensor<640x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x5120xf16>, tensor<640x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir deleted file mode 100644 index 68be7d6..0000000 --- a/gemm/mlir/gemm_5120_8_6912_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xbf16>, %arg1: tensor<6912x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xbf16>, tensor<6912x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_6912_f16_tA.mlir b/gemm/mlir/gemm_5120_8_6912_f16_tA.mlir deleted file mode 100644 index b423ad4..0000000 --- a/gemm/mlir/gemm_5120_8_6912_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<6912x5120xf16>, %arg1: tensor<6912x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<6912x5120xf16>, tensor<6912x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir deleted file mode 100644 index 70c44a9..0000000 --- a/gemm/mlir/gemm_5120_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<5120x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<5120x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<5120x8xbf16>) -> tensor<5120x8xbf16> - return %2 : tensor<5120x8xbf16> - } -} diff --git a/gemm/mlir/gemm_5120_8_8192_f16_tA.mlir b/gemm/mlir/gemm_5120_8_8192_f16_tA.mlir deleted file mode 100644 index 79a3420..0000000 --- a/gemm/mlir/gemm_5120_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x5120xf16>, %arg1: tensor<8192x8xf16>) -> tensor<5120x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<5120x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x5120xf16>, tensor<8192x8xf16>) outs(%1 : tensor<5120x8xf16>) -> tensor<5120x8xf16> - return %2 : tensor<5120x8xf16> - } -} diff --git a/gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir deleted file mode 100644 index c05fd42..0000000 --- a/gemm/mlir/gemm_57344_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<57344x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<57344x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x16xbf16>) -> tensor<57344x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<57344x16xbf16>) -> tensor<57344x16xbf16> - return %2 : tensor<57344x16xbf16> - } -} diff --git a/gemm/mlir/gemm_57344_16_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_16_8192_f16_tA.mlir deleted file mode 100644 index a27c3ca..0000000 --- a/gemm/mlir/gemm_57344_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x16xf16>) -> tensor<57344x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<57344x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x16xf16>) -> tensor<57344x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x16xf16>) outs(%1 : tensor<57344x16xf16>) -> tensor<57344x16xf16> - return %2 : tensor<57344x16xf16> - } -} diff --git a/gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir deleted file mode 100644 index 6b50b47..0000000 --- a/gemm/mlir/gemm_57344_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<57344x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<57344x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x1xbf16>) -> tensor<57344x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<57344x1xbf16>) -> tensor<57344x1xbf16> - return %2 : tensor<57344x1xbf16> - } -} diff --git a/gemm/mlir/gemm_57344_1_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_1_8192_f16_tA.mlir deleted file mode 100644 index a391e24..0000000 --- a/gemm/mlir/gemm_57344_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x1xf16>) -> tensor<57344x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<57344x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x1xf16>) -> tensor<57344x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x1xf16>) outs(%1 : tensor<57344x1xf16>) -> tensor<57344x1xf16> - return %2 : tensor<57344x1xf16> - } -} diff --git a/gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir deleted file mode 100644 index b176f2c..0000000 --- a/gemm/mlir/gemm_57344_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<57344x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<57344x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x2xbf16>) -> tensor<57344x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<57344x2xbf16>) -> tensor<57344x2xbf16> - return %2 : tensor<57344x2xbf16> - } -} diff --git a/gemm/mlir/gemm_57344_2_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_2_8192_f16_tA.mlir deleted file mode 100644 index ffac68f..0000000 --- a/gemm/mlir/gemm_57344_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x2xf16>) -> tensor<57344x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<57344x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x2xf16>) -> tensor<57344x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x2xf16>) outs(%1 : tensor<57344x2xf16>) -> tensor<57344x2xf16> - return %2 : tensor<57344x2xf16> - } -} diff --git a/gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir deleted file mode 100644 index bbe0c75..0000000 --- a/gemm/mlir/gemm_57344_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<57344x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<57344x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x32xbf16>) -> tensor<57344x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<57344x32xbf16>) -> tensor<57344x32xbf16> - return %2 : tensor<57344x32xbf16> - } -} diff --git a/gemm/mlir/gemm_57344_32_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_32_8192_f16_tA.mlir deleted file mode 100644 index 34675d0..0000000 --- a/gemm/mlir/gemm_57344_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x32xf16>) -> tensor<57344x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<57344x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x32xf16>) -> tensor<57344x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x32xf16>) outs(%1 : tensor<57344x32xf16>) -> tensor<57344x32xf16> - return %2 : tensor<57344x32xf16> - } -} diff --git a/gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir deleted file mode 100644 index 2189c7e..0000000 --- a/gemm/mlir/gemm_57344_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<57344x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<57344x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x4xbf16>) -> tensor<57344x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<57344x4xbf16>) -> tensor<57344x4xbf16> - return %2 : tensor<57344x4xbf16> - } -} diff --git a/gemm/mlir/gemm_57344_4_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_4_8192_f16_tA.mlir deleted file mode 100644 index 5419137..0000000 --- a/gemm/mlir/gemm_57344_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x4xf16>) -> tensor<57344x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<57344x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x4xf16>) -> tensor<57344x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x4xf16>) outs(%1 : tensor<57344x4xf16>) -> tensor<57344x4xf16> - return %2 : tensor<57344x4xf16> - } -} diff --git a/gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir deleted file mode 100644 index 84785f7..0000000 --- a/gemm/mlir/gemm_57344_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<57344x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<57344x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<57344x8xbf16>) -> tensor<57344x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<57344x8xbf16>) -> tensor<57344x8xbf16> - return %2 : tensor<57344x8xbf16> - } -} diff --git a/gemm/mlir/gemm_57344_8_8192_f16_tA.mlir b/gemm/mlir/gemm_57344_8_8192_f16_tA.mlir deleted file mode 100644 index 58d7ded..0000000 --- a/gemm/mlir/gemm_57344_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x57344xf16>, %arg1: tensor<8192x8xf16>) -> tensor<57344x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<57344x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<57344x8xf16>) -> tensor<57344x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x57344xf16>, tensor<8192x8xf16>) outs(%1 : tensor<57344x8xf16>) -> tensor<57344x8xf16> - return %2 : tensor<57344x8xf16> - } -} diff --git a/gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir deleted file mode 100644 index a9180ad..0000000 --- a/gemm/mlir/gemm_6912_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<6912x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<6912x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x16xbf16>) -> tensor<6912x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<6912x16xbf16>) -> tensor<6912x16xbf16> - return %2 : tensor<6912x16xbf16> - } -} diff --git a/gemm/mlir/gemm_6912_16_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_16_5120_f16_tA.mlir deleted file mode 100644 index 47aaf92..0000000 --- a/gemm/mlir/gemm_6912_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x16xf16>) -> tensor<6912x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<6912x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x16xf16>) -> tensor<6912x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x16xf16>) outs(%1 : tensor<6912x16xf16>) -> tensor<6912x16xf16> - return %2 : tensor<6912x16xf16> - } -} diff --git a/gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir deleted file mode 100644 index f087893..0000000 --- a/gemm/mlir/gemm_6912_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<6912x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<6912x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x1xbf16>) -> tensor<6912x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<6912x1xbf16>) -> tensor<6912x1xbf16> - return %2 : tensor<6912x1xbf16> - } -} diff --git a/gemm/mlir/gemm_6912_1_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_1_5120_f16_tA.mlir deleted file mode 100644 index beee00d..0000000 --- a/gemm/mlir/gemm_6912_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x1xf16>) -> tensor<6912x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<6912x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x1xf16>) -> tensor<6912x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x1xf16>) outs(%1 : tensor<6912x1xf16>) -> tensor<6912x1xf16> - return %2 : tensor<6912x1xf16> - } -} diff --git a/gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir deleted file mode 100644 index 441ec83..0000000 --- a/gemm/mlir/gemm_6912_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<6912x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<6912x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x2xbf16>) -> tensor<6912x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<6912x2xbf16>) -> tensor<6912x2xbf16> - return %2 : tensor<6912x2xbf16> - } -} diff --git a/gemm/mlir/gemm_6912_2_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_2_5120_f16_tA.mlir deleted file mode 100644 index 397c7b2..0000000 --- a/gemm/mlir/gemm_6912_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x2xf16>) -> tensor<6912x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<6912x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x2xf16>) -> tensor<6912x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x2xf16>) outs(%1 : tensor<6912x2xf16>) -> tensor<6912x2xf16> - return %2 : tensor<6912x2xf16> - } -} diff --git a/gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir deleted file mode 100644 index 926a24a..0000000 --- a/gemm/mlir/gemm_6912_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<6912x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<6912x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x32xbf16>) -> tensor<6912x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<6912x32xbf16>) -> tensor<6912x32xbf16> - return %2 : tensor<6912x32xbf16> - } -} diff --git a/gemm/mlir/gemm_6912_32_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_32_5120_f16_tA.mlir deleted file mode 100644 index 75888ec..0000000 --- a/gemm/mlir/gemm_6912_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x32xf16>) -> tensor<6912x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<6912x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x32xf16>) -> tensor<6912x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x32xf16>) outs(%1 : tensor<6912x32xf16>) -> tensor<6912x32xf16> - return %2 : tensor<6912x32xf16> - } -} diff --git a/gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir deleted file mode 100644 index 105402a..0000000 --- a/gemm/mlir/gemm_6912_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<6912x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<6912x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x4xbf16>) -> tensor<6912x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<6912x4xbf16>) -> tensor<6912x4xbf16> - return %2 : tensor<6912x4xbf16> - } -} diff --git a/gemm/mlir/gemm_6912_4_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_4_5120_f16_tA.mlir deleted file mode 100644 index 2938490..0000000 --- a/gemm/mlir/gemm_6912_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x4xf16>) -> tensor<6912x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<6912x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x4xf16>) -> tensor<6912x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x4xf16>) outs(%1 : tensor<6912x4xf16>) -> tensor<6912x4xf16> - return %2 : tensor<6912x4xf16> - } -} diff --git a/gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir deleted file mode 100644 index c62dc28..0000000 --- a/gemm/mlir/gemm_6912_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<6912x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<6912x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<6912x8xbf16>) -> tensor<6912x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<6912x8xbf16>) -> tensor<6912x8xbf16> - return %2 : tensor<6912x8xbf16> - } -} diff --git a/gemm/mlir/gemm_6912_8_5120_f16_tA.mlir b/gemm/mlir/gemm_6912_8_5120_f16_tA.mlir deleted file mode 100644 index 0fc7b88..0000000 --- a/gemm/mlir/gemm_6912_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x6912xf16>, %arg1: tensor<5120x8xf16>) -> tensor<6912x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<6912x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<6912x8xf16>) -> tensor<6912x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x6912xf16>, tensor<5120x8xf16>) outs(%1 : tensor<6912x8xf16>) -> tensor<6912x8xf16> - return %2 : tensor<6912x8xf16> - } -} diff --git a/gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir deleted file mode 100644 index c7660f1..0000000 --- a/gemm/mlir/gemm_7168_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<7168x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7168x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x16xbf16>) -> tensor<7168x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<7168x16xbf16>) -> tensor<7168x16xbf16> - return %2 : tensor<7168x16xbf16> - } -} diff --git a/gemm/mlir/gemm_7168_16_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_16_8192_f16_tA.mlir deleted file mode 100644 index 3b4e48c..0000000 --- a/gemm/mlir/gemm_7168_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x16xf16>) -> tensor<7168x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7168x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x16xf16>) -> tensor<7168x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x16xf16>) outs(%1 : tensor<7168x16xf16>) -> tensor<7168x16xf16> - return %2 : tensor<7168x16xf16> - } -} diff --git a/gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir deleted file mode 100644 index 41d8ee8..0000000 --- a/gemm/mlir/gemm_7168_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<7168x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7168x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x1xbf16>) -> tensor<7168x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<7168x1xbf16>) -> tensor<7168x1xbf16> - return %2 : tensor<7168x1xbf16> - } -} diff --git a/gemm/mlir/gemm_7168_1_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_1_8192_f16_tA.mlir deleted file mode 100644 index 93b1d5e..0000000 --- a/gemm/mlir/gemm_7168_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x1xf16>) -> tensor<7168x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7168x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x1xf16>) -> tensor<7168x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x1xf16>) outs(%1 : tensor<7168x1xf16>) -> tensor<7168x1xf16> - return %2 : tensor<7168x1xf16> - } -} diff --git a/gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir deleted file mode 100644 index 555cca9..0000000 --- a/gemm/mlir/gemm_7168_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<7168x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7168x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x2xbf16>) -> tensor<7168x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<7168x2xbf16>) -> tensor<7168x2xbf16> - return %2 : tensor<7168x2xbf16> - } -} diff --git a/gemm/mlir/gemm_7168_2_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_2_8192_f16_tA.mlir deleted file mode 100644 index 4ab13c2..0000000 --- a/gemm/mlir/gemm_7168_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x2xf16>) -> tensor<7168x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7168x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x2xf16>) -> tensor<7168x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x2xf16>) outs(%1 : tensor<7168x2xf16>) -> tensor<7168x2xf16> - return %2 : tensor<7168x2xf16> - } -} diff --git a/gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir deleted file mode 100644 index e6b536c..0000000 --- a/gemm/mlir/gemm_7168_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<7168x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7168x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x32xbf16>) -> tensor<7168x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<7168x32xbf16>) -> tensor<7168x32xbf16> - return %2 : tensor<7168x32xbf16> - } -} diff --git a/gemm/mlir/gemm_7168_32_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_32_8192_f16_tA.mlir deleted file mode 100644 index 2d2744f..0000000 --- a/gemm/mlir/gemm_7168_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x32xf16>) -> tensor<7168x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7168x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x32xf16>) -> tensor<7168x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x32xf16>) outs(%1 : tensor<7168x32xf16>) -> tensor<7168x32xf16> - return %2 : tensor<7168x32xf16> - } -} diff --git a/gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir deleted file mode 100644 index 98c5839..0000000 --- a/gemm/mlir/gemm_7168_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<7168x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7168x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x4xbf16>) -> tensor<7168x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<7168x4xbf16>) -> tensor<7168x4xbf16> - return %2 : tensor<7168x4xbf16> - } -} diff --git a/gemm/mlir/gemm_7168_4_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_4_8192_f16_tA.mlir deleted file mode 100644 index 1bf5e1c..0000000 --- a/gemm/mlir/gemm_7168_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x4xf16>) -> tensor<7168x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7168x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x4xf16>) -> tensor<7168x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x4xf16>) outs(%1 : tensor<7168x4xf16>) -> tensor<7168x4xf16> - return %2 : tensor<7168x4xf16> - } -} diff --git a/gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir deleted file mode 100644 index c7dbcb9..0000000 --- a/gemm/mlir/gemm_7168_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<7168x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7168x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7168x8xbf16>) -> tensor<7168x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<7168x8xbf16>) -> tensor<7168x8xbf16> - return %2 : tensor<7168x8xbf16> - } -} diff --git a/gemm/mlir/gemm_7168_8_8192_f16_tA.mlir b/gemm/mlir/gemm_7168_8_8192_f16_tA.mlir deleted file mode 100644 index f36208c..0000000 --- a/gemm/mlir/gemm_7168_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x7168xf16>, %arg1: tensor<8192x8xf16>) -> tensor<7168x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7168x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7168x8xf16>) -> tensor<7168x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x7168xf16>, tensor<8192x8xf16>) outs(%1 : tensor<7168x8xf16>) -> tensor<7168x8xf16> - return %2 : tensor<7168x8xf16> - } -} diff --git a/gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir deleted file mode 100644 index db4ed5e..0000000 --- a/gemm/mlir/gemm_7680_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<7680x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7680x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x16xbf16>) -> tensor<7680x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<7680x16xbf16>) -> tensor<7680x16xbf16> - return %2 : tensor<7680x16xbf16> - } -} diff --git a/gemm/mlir/gemm_7680_16_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_16_5120_f16_tA.mlir deleted file mode 100644 index 884fae5..0000000 --- a/gemm/mlir/gemm_7680_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x16xf16>) -> tensor<7680x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7680x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x16xf16>) -> tensor<7680x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x16xf16>) outs(%1 : tensor<7680x16xf16>) -> tensor<7680x16xf16> - return %2 : tensor<7680x16xf16> - } -} diff --git a/gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir deleted file mode 100644 index 3e9229a..0000000 --- a/gemm/mlir/gemm_7680_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<7680x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7680x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x1xbf16>) -> tensor<7680x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<7680x1xbf16>) -> tensor<7680x1xbf16> - return %2 : tensor<7680x1xbf16> - } -} diff --git a/gemm/mlir/gemm_7680_1_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_1_5120_f16_tA.mlir deleted file mode 100644 index 8852272..0000000 --- a/gemm/mlir/gemm_7680_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x1xf16>) -> tensor<7680x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7680x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x1xf16>) -> tensor<7680x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x1xf16>) outs(%1 : tensor<7680x1xf16>) -> tensor<7680x1xf16> - return %2 : tensor<7680x1xf16> - } -} diff --git a/gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir deleted file mode 100644 index 91b162d..0000000 --- a/gemm/mlir/gemm_7680_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<7680x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7680x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x2xbf16>) -> tensor<7680x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<7680x2xbf16>) -> tensor<7680x2xbf16> - return %2 : tensor<7680x2xbf16> - } -} diff --git a/gemm/mlir/gemm_7680_2_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_2_5120_f16_tA.mlir deleted file mode 100644 index 0b11af3..0000000 --- a/gemm/mlir/gemm_7680_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x2xf16>) -> tensor<7680x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7680x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x2xf16>) -> tensor<7680x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x2xf16>) outs(%1 : tensor<7680x2xf16>) -> tensor<7680x2xf16> - return %2 : tensor<7680x2xf16> - } -} diff --git a/gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir deleted file mode 100644 index a89c462..0000000 --- a/gemm/mlir/gemm_7680_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<7680x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7680x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x32xbf16>) -> tensor<7680x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<7680x32xbf16>) -> tensor<7680x32xbf16> - return %2 : tensor<7680x32xbf16> - } -} diff --git a/gemm/mlir/gemm_7680_32_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_32_5120_f16_tA.mlir deleted file mode 100644 index 6dd24ce..0000000 --- a/gemm/mlir/gemm_7680_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x32xf16>) -> tensor<7680x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7680x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x32xf16>) -> tensor<7680x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x32xf16>) outs(%1 : tensor<7680x32xf16>) -> tensor<7680x32xf16> - return %2 : tensor<7680x32xf16> - } -} diff --git a/gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir deleted file mode 100644 index b0334e4..0000000 --- a/gemm/mlir/gemm_7680_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<7680x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7680x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x4xbf16>) -> tensor<7680x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<7680x4xbf16>) -> tensor<7680x4xbf16> - return %2 : tensor<7680x4xbf16> - } -} diff --git a/gemm/mlir/gemm_7680_4_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_4_5120_f16_tA.mlir deleted file mode 100644 index c927588..0000000 --- a/gemm/mlir/gemm_7680_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x4xf16>) -> tensor<7680x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7680x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x4xf16>) -> tensor<7680x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x4xf16>) outs(%1 : tensor<7680x4xf16>) -> tensor<7680x4xf16> - return %2 : tensor<7680x4xf16> - } -} diff --git a/gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir deleted file mode 100644 index 4d799fa..0000000 --- a/gemm/mlir/gemm_7680_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<7680x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<7680x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<7680x8xbf16>) -> tensor<7680x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<7680x8xbf16>) -> tensor<7680x8xbf16> - return %2 : tensor<7680x8xbf16> - } -} diff --git a/gemm/mlir/gemm_7680_8_5120_f16_tA.mlir b/gemm/mlir/gemm_7680_8_5120_f16_tA.mlir deleted file mode 100644 index f817f9d..0000000 --- a/gemm/mlir/gemm_7680_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x7680xf16>, %arg1: tensor<5120x8xf16>) -> tensor<7680x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<7680x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<7680x8xf16>) -> tensor<7680x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x7680xf16>, tensor<5120x8xf16>) outs(%1 : tensor<7680x8xf16>) -> tensor<7680x8xf16> - return %2 : tensor<7680x8xf16> - } -} diff --git a/gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir deleted file mode 100644 index 50cb640..0000000 --- a/gemm/mlir/gemm_8000_16_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x16xbf16>) -> tensor<8000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x16xbf16>) outs(%1 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> - return %2 : tensor<8000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_16_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_16_5120_f16_tA.mlir deleted file mode 100644 index 53b5315..0000000 --- a/gemm/mlir/gemm_8000_16_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x16xf16>) -> tensor<8000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x16xf16>) -> tensor<8000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x16xf16>) outs(%1 : tensor<8000x16xf16>) -> tensor<8000x16xf16> - return %2 : tensor<8000x16xf16> - } -} diff --git a/gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir deleted file mode 100644 index d61ae44..0000000 --- a/gemm/mlir/gemm_8000_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<8000x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<8000x16xbf16>) -> tensor<8000x16xbf16> - return %2 : tensor<8000x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_16_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_16_8192_f16_tA.mlir deleted file mode 100644 index 2fdaae0..0000000 --- a/gemm/mlir/gemm_8000_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x16xf16>) -> tensor<8000x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x16xf16>) -> tensor<8000x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x16xf16>) outs(%1 : tensor<8000x16xf16>) -> tensor<8000x16xf16> - return %2 : tensor<8000x16xf16> - } -} diff --git a/gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir deleted file mode 100644 index 2103508..0000000 --- a/gemm/mlir/gemm_8000_1_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x1xbf16>) -> tensor<8000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x1xbf16>) outs(%1 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> - return %2 : tensor<8000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_1_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_1_5120_f16_tA.mlir deleted file mode 100644 index d168465..0000000 --- a/gemm/mlir/gemm_8000_1_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x1xf16>) -> tensor<8000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x1xf16>) -> tensor<8000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x1xf16>) outs(%1 : tensor<8000x1xf16>) -> tensor<8000x1xf16> - return %2 : tensor<8000x1xf16> - } -} diff --git a/gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir deleted file mode 100644 index 0f58095..0000000 --- a/gemm/mlir/gemm_8000_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<8000x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<8000x1xbf16>) -> tensor<8000x1xbf16> - return %2 : tensor<8000x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_1_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_1_8192_f16_tA.mlir deleted file mode 100644 index 52e5c03..0000000 --- a/gemm/mlir/gemm_8000_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x1xf16>) -> tensor<8000x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x1xf16>) -> tensor<8000x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x1xf16>) outs(%1 : tensor<8000x1xf16>) -> tensor<8000x1xf16> - return %2 : tensor<8000x1xf16> - } -} diff --git a/gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir deleted file mode 100644 index 668917b..0000000 --- a/gemm/mlir/gemm_8000_2_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x2xbf16>) -> tensor<8000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x2xbf16>) outs(%1 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> - return %2 : tensor<8000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_2_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_2_5120_f16_tA.mlir deleted file mode 100644 index d85abf9..0000000 --- a/gemm/mlir/gemm_8000_2_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x2xf16>) -> tensor<8000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x2xf16>) -> tensor<8000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x2xf16>) outs(%1 : tensor<8000x2xf16>) -> tensor<8000x2xf16> - return %2 : tensor<8000x2xf16> - } -} diff --git a/gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir deleted file mode 100644 index fa3aeca..0000000 --- a/gemm/mlir/gemm_8000_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<8000x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<8000x2xbf16>) -> tensor<8000x2xbf16> - return %2 : tensor<8000x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_2_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_2_8192_f16_tA.mlir deleted file mode 100644 index 4d8cb01..0000000 --- a/gemm/mlir/gemm_8000_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x2xf16>) -> tensor<8000x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x2xf16>) -> tensor<8000x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x2xf16>) outs(%1 : tensor<8000x2xf16>) -> tensor<8000x2xf16> - return %2 : tensor<8000x2xf16> - } -} diff --git a/gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir deleted file mode 100644 index e76d224..0000000 --- a/gemm/mlir/gemm_8000_32_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x32xbf16>) -> tensor<8000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x32xbf16>) outs(%1 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> - return %2 : tensor<8000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_32_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_32_5120_f16_tA.mlir deleted file mode 100644 index 5c226af..0000000 --- a/gemm/mlir/gemm_8000_32_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x32xf16>) -> tensor<8000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x32xf16>) -> tensor<8000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x32xf16>) outs(%1 : tensor<8000x32xf16>) -> tensor<8000x32xf16> - return %2 : tensor<8000x32xf16> - } -} diff --git a/gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir deleted file mode 100644 index 4df2655..0000000 --- a/gemm/mlir/gemm_8000_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<8000x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<8000x32xbf16>) -> tensor<8000x32xbf16> - return %2 : tensor<8000x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_32_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_32_8192_f16_tA.mlir deleted file mode 100644 index 656010a..0000000 --- a/gemm/mlir/gemm_8000_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x32xf16>) -> tensor<8000x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x32xf16>) -> tensor<8000x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x32xf16>) outs(%1 : tensor<8000x32xf16>) -> tensor<8000x32xf16> - return %2 : tensor<8000x32xf16> - } -} diff --git a/gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir deleted file mode 100644 index f45eab2..0000000 --- a/gemm/mlir/gemm_8000_4_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x4xbf16>) -> tensor<8000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x4xbf16>) outs(%1 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> - return %2 : tensor<8000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_4_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_4_5120_f16_tA.mlir deleted file mode 100644 index a715200..0000000 --- a/gemm/mlir/gemm_8000_4_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x4xf16>) -> tensor<8000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x4xf16>) -> tensor<8000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x4xf16>) outs(%1 : tensor<8000x4xf16>) -> tensor<8000x4xf16> - return %2 : tensor<8000x4xf16> - } -} diff --git a/gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir deleted file mode 100644 index a0bf7e7..0000000 --- a/gemm/mlir/gemm_8000_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<8000x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<8000x4xbf16>) -> tensor<8000x4xbf16> - return %2 : tensor<8000x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_4_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_4_8192_f16_tA.mlir deleted file mode 100644 index 4d5e8c5..0000000 --- a/gemm/mlir/gemm_8000_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x4xf16>) -> tensor<8000x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x4xf16>) -> tensor<8000x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x4xf16>) outs(%1 : tensor<8000x4xf16>) -> tensor<8000x4xf16> - return %2 : tensor<8000x4xf16> - } -} diff --git a/gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir b/gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir deleted file mode 100644 index 5a7f7e8..0000000 --- a/gemm/mlir/gemm_8000_8_5120_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xbf16>, %arg1: tensor<5120x8xbf16>) -> tensor<8000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xbf16>, tensor<5120x8xbf16>) outs(%1 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> - return %2 : tensor<8000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_8_5120_f16_tA.mlir b/gemm/mlir/gemm_8000_8_5120_f16_tA.mlir deleted file mode 100644 index 5552aa7..0000000 --- a/gemm/mlir/gemm_8000_8_5120_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<5120x8000xf16>, %arg1: tensor<5120x8xf16>) -> tensor<8000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x8xf16>) -> tensor<8000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<5120x8000xf16>, tensor<5120x8xf16>) outs(%1 : tensor<8000x8xf16>) -> tensor<8000x8xf16> - return %2 : tensor<8000x8xf16> - } -} diff --git a/gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir deleted file mode 100644 index a79aac9..0000000 --- a/gemm/mlir/gemm_8000_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<8000x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8000x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<8000x8xbf16>) -> tensor<8000x8xbf16> - return %2 : tensor<8000x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8000_8_8192_f16_tA.mlir b/gemm/mlir/gemm_8000_8_8192_f16_tA.mlir deleted file mode 100644 index 4f1ed4c..0000000 --- a/gemm/mlir/gemm_8000_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8000xf16>, %arg1: tensor<8192x8xf16>) -> tensor<8000x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8000x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8000x8xf16>) -> tensor<8000x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8000xf16>, tensor<8192x8xf16>) outs(%1 : tensor<8000x8xf16>) -> tensor<8000x8xf16> - return %2 : tensor<8000x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir deleted file mode 100644 index 665dc34..0000000 --- a/gemm/mlir/gemm_8192_16_1024_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_16_1024_f16_tA.mlir deleted file mode 100644 index b37806e..0000000 --- a/gemm/mlir/gemm_8192_16_1024_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir deleted file mode 100644 index 5a46495..0000000 --- a/gemm/mlir/gemm_8192_16_14336_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_16_14336_f16_tA.mlir deleted file mode 100644 index e9fab90..0000000 --- a/gemm/mlir/gemm_8192_16_14336_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir deleted file mode 100644 index d5390e1..0000000 --- a/gemm/mlir/gemm_8192_16_2048_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_16_2048_f16_tA.mlir deleted file mode 100644 index 899b396..0000000 --- a/gemm/mlir/gemm_8192_16_2048_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir deleted file mode 100644 index a052c9d..0000000 --- a/gemm/mlir/gemm_8192_16_28672_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_16_28672_f16_tA.mlir deleted file mode 100644 index 937f6ca..0000000 --- a/gemm/mlir/gemm_8192_16_28672_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir deleted file mode 100644 index 956f501..0000000 --- a/gemm/mlir/gemm_8192_16_3584_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_16_3584_f16_tA.mlir deleted file mode 100644 index 5182fe9..0000000 --- a/gemm/mlir/gemm_8192_16_3584_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir deleted file mode 100644 index ae4ae02..0000000 --- a/gemm/mlir/gemm_8192_16_4096_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_16_4096_f16_tA.mlir deleted file mode 100644 index 1510fbb..0000000 --- a/gemm/mlir/gemm_8192_16_4096_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir deleted file mode 100644 index ce83d31..0000000 --- a/gemm/mlir/gemm_8192_16_7168_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_16_7168_f16_tA.mlir deleted file mode 100644 index 63da0a2..0000000 --- a/gemm/mlir/gemm_8192_16_7168_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir deleted file mode 100644 index b269adb..0000000 --- a/gemm/mlir/gemm_8192_16_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x16xbf16>) -> tensor<8192x16xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x16xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x16xbf16>) outs(%1 : tensor<8192x16xbf16>) -> tensor<8192x16xbf16> - return %2 : tensor<8192x16xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_16_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_16_8192_f16_tA.mlir deleted file mode 100644 index a00d5ed..0000000 --- a/gemm/mlir/gemm_8192_16_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x16xf16>) -> tensor<8192x16xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x16xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x16xf16>) outs(%1 : tensor<8192x16xf16>) -> tensor<8192x16xf16> - return %2 : tensor<8192x16xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir deleted file mode 100644 index 4dbf154..0000000 --- a/gemm/mlir/gemm_8192_1_1024_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_1_1024_f16_tA.mlir deleted file mode 100644 index cfe91b7..0000000 --- a/gemm/mlir/gemm_8192_1_1024_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir deleted file mode 100644 index bb3ee2c..0000000 --- a/gemm/mlir/gemm_8192_1_14336_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_1_14336_f16_tA.mlir deleted file mode 100644 index d1bf657..0000000 --- a/gemm/mlir/gemm_8192_1_14336_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir deleted file mode 100644 index dcb9440..0000000 --- a/gemm/mlir/gemm_8192_1_2048_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_1_2048_f16_tA.mlir deleted file mode 100644 index bb68ab7..0000000 --- a/gemm/mlir/gemm_8192_1_2048_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir deleted file mode 100644 index 9d8ba4c..0000000 --- a/gemm/mlir/gemm_8192_1_28672_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_1_28672_f16_tA.mlir deleted file mode 100644 index 7f24658..0000000 --- a/gemm/mlir/gemm_8192_1_28672_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir deleted file mode 100644 index db96ed7..0000000 --- a/gemm/mlir/gemm_8192_1_3584_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_1_3584_f16_tA.mlir deleted file mode 100644 index 0c6617c..0000000 --- a/gemm/mlir/gemm_8192_1_3584_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir deleted file mode 100644 index fef2bfe..0000000 --- a/gemm/mlir/gemm_8192_1_4096_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_1_4096_f16_tA.mlir deleted file mode 100644 index 76cdec0..0000000 --- a/gemm/mlir/gemm_8192_1_4096_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir deleted file mode 100644 index 83f7005..0000000 --- a/gemm/mlir/gemm_8192_1_7168_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_1_7168_f16_tA.mlir deleted file mode 100644 index 1eedb82..0000000 --- a/gemm/mlir/gemm_8192_1_7168_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir deleted file mode 100644 index 2ea5414..0000000 --- a/gemm/mlir/gemm_8192_1_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x1xbf16>) -> tensor<8192x1xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x1xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x1xbf16>) outs(%1 : tensor<8192x1xbf16>) -> tensor<8192x1xbf16> - return %2 : tensor<8192x1xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_1_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_1_8192_f16_tA.mlir deleted file mode 100644 index ea88565..0000000 --- a/gemm/mlir/gemm_8192_1_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x1xf16>) -> tensor<8192x1xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x1xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x1xf16>) outs(%1 : tensor<8192x1xf16>) -> tensor<8192x1xf16> - return %2 : tensor<8192x1xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2048_1024_f16.mlir b/gemm/mlir/gemm_8192_2048_1024_f16.mlir deleted file mode 100644 index f42dfba..0000000 --- a/gemm/mlir/gemm_8192_2048_1024_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x1024xf16>, %arg1: tensor<1024x2048xf16>) -> tensor<8192x2048xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2048xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x1024xf16>, tensor<1024x2048xf16>) outs(%1 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> - return %2 : tensor<8192x2048xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2048_65536_f16.mlir b/gemm/mlir/gemm_8192_2048_65536_f16.mlir deleted file mode 100644 index 59ae1bc..0000000 --- a/gemm/mlir/gemm_8192_2048_65536_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x65536xf16>, %arg1: tensor<65536x2048xf16>) -> tensor<8192x2048xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2048xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x65536xf16>, tensor<65536x2048xf16>) outs(%1 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> - return %2 : tensor<8192x2048xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2048_8192_f16.mlir b/gemm/mlir/gemm_8192_2048_8192_f16.mlir deleted file mode 100644 index 8e0a0a6..0000000 --- a/gemm/mlir/gemm_8192_2048_8192_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x2048xf16>) -> tensor<8192x2048xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2048xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x2048xf16>) outs(%1 : tensor<8192x2048xf16>) -> tensor<8192x2048xf16> - return %2 : tensor<8192x2048xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir deleted file mode 100644 index 7b07b58..0000000 --- a/gemm/mlir/gemm_8192_2_1024_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_2_1024_f16_tA.mlir deleted file mode 100644 index fad9863..0000000 --- a/gemm/mlir/gemm_8192_2_1024_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir deleted file mode 100644 index c3ba4e7..0000000 --- a/gemm/mlir/gemm_8192_2_14336_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_2_14336_f16_tA.mlir deleted file mode 100644 index ac6a2f1..0000000 --- a/gemm/mlir/gemm_8192_2_14336_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir deleted file mode 100644 index 154421f..0000000 --- a/gemm/mlir/gemm_8192_2_2048_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_2_2048_f16_tA.mlir deleted file mode 100644 index 531fb51..0000000 --- a/gemm/mlir/gemm_8192_2_2048_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir deleted file mode 100644 index a26e286..0000000 --- a/gemm/mlir/gemm_8192_2_28672_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_2_28672_f16_tA.mlir deleted file mode 100644 index fa64b0d..0000000 --- a/gemm/mlir/gemm_8192_2_28672_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir deleted file mode 100644 index 6bfc9dc..0000000 --- a/gemm/mlir/gemm_8192_2_3584_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_2_3584_f16_tA.mlir deleted file mode 100644 index 5891198..0000000 --- a/gemm/mlir/gemm_8192_2_3584_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir deleted file mode 100644 index 5bad65e..0000000 --- a/gemm/mlir/gemm_8192_2_4096_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_2_4096_f16_tA.mlir deleted file mode 100644 index 2ff588d..0000000 --- a/gemm/mlir/gemm_8192_2_4096_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir deleted file mode 100644 index 6017644..0000000 --- a/gemm/mlir/gemm_8192_2_7168_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_2_7168_f16_tA.mlir deleted file mode 100644 index ada61d1..0000000 --- a/gemm/mlir/gemm_8192_2_7168_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir deleted file mode 100644 index 4a5c210..0000000 --- a/gemm/mlir/gemm_8192_2_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x2xbf16>) -> tensor<8192x2xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x2xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x2xbf16>) outs(%1 : tensor<8192x2xbf16>) -> tensor<8192x2xbf16> - return %2 : tensor<8192x2xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_2_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_2_8192_f16_tA.mlir deleted file mode 100644 index 070a5ba..0000000 --- a/gemm/mlir/gemm_8192_2_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x2xf16>) -> tensor<8192x2xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x2xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x2xf16>) outs(%1 : tensor<8192x2xf16>) -> tensor<8192x2xf16> - return %2 : tensor<8192x2xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir deleted file mode 100644 index 852e767..0000000 --- a/gemm/mlir/gemm_8192_32_1024_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_32_1024_f16_tA.mlir deleted file mode 100644 index c5f5846..0000000 --- a/gemm/mlir/gemm_8192_32_1024_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir deleted file mode 100644 index 7f3f684..0000000 --- a/gemm/mlir/gemm_8192_32_14336_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_32_14336_f16_tA.mlir deleted file mode 100644 index 4670ddd..0000000 --- a/gemm/mlir/gemm_8192_32_14336_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir deleted file mode 100644 index 9b7cb18..0000000 --- a/gemm/mlir/gemm_8192_32_2048_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_32_2048_f16_tA.mlir deleted file mode 100644 index ad5ff98..0000000 --- a/gemm/mlir/gemm_8192_32_2048_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir deleted file mode 100644 index 0e75daf..0000000 --- a/gemm/mlir/gemm_8192_32_28672_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_32_28672_f16_tA.mlir deleted file mode 100644 index 1809761..0000000 --- a/gemm/mlir/gemm_8192_32_28672_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir deleted file mode 100644 index ddbba11..0000000 --- a/gemm/mlir/gemm_8192_32_3584_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_32_3584_f16_tA.mlir deleted file mode 100644 index 45b7ca4..0000000 --- a/gemm/mlir/gemm_8192_32_3584_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir deleted file mode 100644 index 7134984..0000000 --- a/gemm/mlir/gemm_8192_32_4096_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_32_4096_f16_tA.mlir deleted file mode 100644 index 7df2c92..0000000 --- a/gemm/mlir/gemm_8192_32_4096_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir deleted file mode 100644 index 672f613..0000000 --- a/gemm/mlir/gemm_8192_32_7168_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_32_7168_f16_tA.mlir deleted file mode 100644 index aa39da1..0000000 --- a/gemm/mlir/gemm_8192_32_7168_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir deleted file mode 100644 index beeb9f6..0000000 --- a/gemm/mlir/gemm_8192_32_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x32xbf16>) -> tensor<8192x32xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x32xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x32xbf16>) outs(%1 : tensor<8192x32xbf16>) -> tensor<8192x32xbf16> - return %2 : tensor<8192x32xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_32_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_32_8192_f16_tA.mlir deleted file mode 100644 index 538b2a5..0000000 --- a/gemm/mlir/gemm_8192_32_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x32xf16>) -> tensor<8192x32xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x32xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x32xf16>) outs(%1 : tensor<8192x32xf16>) -> tensor<8192x32xf16> - return %2 : tensor<8192x32xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir deleted file mode 100644 index dadcc8c..0000000 --- a/gemm/mlir/gemm_8192_4_1024_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_4_1024_f16_tA.mlir deleted file mode 100644 index ae01271..0000000 --- a/gemm/mlir/gemm_8192_4_1024_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir deleted file mode 100644 index a91f9bf..0000000 --- a/gemm/mlir/gemm_8192_4_14336_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_4_14336_f16_tA.mlir deleted file mode 100644 index 925676f..0000000 --- a/gemm/mlir/gemm_8192_4_14336_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir deleted file mode 100644 index 63f589c..0000000 --- a/gemm/mlir/gemm_8192_4_2048_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_4_2048_f16_tA.mlir deleted file mode 100644 index 043dba2..0000000 --- a/gemm/mlir/gemm_8192_4_2048_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir deleted file mode 100644 index ffcc49d..0000000 --- a/gemm/mlir/gemm_8192_4_28672_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_4_28672_f16_tA.mlir deleted file mode 100644 index 3e66079..0000000 --- a/gemm/mlir/gemm_8192_4_28672_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir deleted file mode 100644 index 13ea765..0000000 --- a/gemm/mlir/gemm_8192_4_3584_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_4_3584_f16_tA.mlir deleted file mode 100644 index b3a4aca..0000000 --- a/gemm/mlir/gemm_8192_4_3584_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir deleted file mode 100644 index 111e1b9..0000000 --- a/gemm/mlir/gemm_8192_4_4096_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_4_4096_f16_tA.mlir deleted file mode 100644 index e9059bf..0000000 --- a/gemm/mlir/gemm_8192_4_4096_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir deleted file mode 100644 index 7c140f3..0000000 --- a/gemm/mlir/gemm_8192_4_7168_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_4_7168_f16_tA.mlir deleted file mode 100644 index 81e98c6..0000000 --- a/gemm/mlir/gemm_8192_4_7168_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir deleted file mode 100644 index 7ae31d7..0000000 --- a/gemm/mlir/gemm_8192_4_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x4xbf16>) -> tensor<8192x4xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x4xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x4xbf16>) outs(%1 : tensor<8192x4xbf16>) -> tensor<8192x4xbf16> - return %2 : tensor<8192x4xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_4_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_4_8192_f16_tA.mlir deleted file mode 100644 index 2378c0f..0000000 --- a/gemm/mlir/gemm_8192_4_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x4xf16>) -> tensor<8192x4xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x4xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x4xf16>) outs(%1 : tensor<8192x4xf16>) -> tensor<8192x4xf16> - return %2 : tensor<8192x4xf16> - } -} diff --git a/gemm/mlir/gemm_8192_5120_640_bf16.mlir b/gemm/mlir/gemm_8192_5120_640_bf16.mlir deleted file mode 100644 index 5f59098..0000000 --- a/gemm/mlir/gemm_8192_5120_640_bf16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x640xbf16>, %arg1: tensor<640x5120xbf16>) -> tensor<8192x5120xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x5120xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x640xbf16>, tensor<640x5120xbf16>) outs(%1 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> - return %2 : tensor<8192x5120xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir b/gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir deleted file mode 100644 index 177684d..0000000 --- a/gemm/mlir/gemm_8192_5120_640_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x8192xbf16>, %arg1: tensor<640x5120xbf16>) -> tensor<8192x5120xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x5120xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x8192xbf16>, tensor<640x5120xbf16>) outs(%1 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> - return %2 : tensor<8192x5120xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir b/gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir deleted file mode 100644 index 629f56f..0000000 --- a/gemm/mlir/gemm_8192_5120_640_bf16_tB.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x640xbf16>, %arg1: tensor<5120x640xbf16>) -> tensor<8192x5120xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x5120xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> - %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<8192x640xbf16>, tensor<5120x640xbf16>) outs(%1 : tensor<8192x5120xbf16>) -> tensor<8192x5120xbf16> - return %2 : tensor<8192x5120xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_5120_640_f16.mlir b/gemm/mlir/gemm_8192_5120_640_f16.mlir deleted file mode 100644 index 52be98c..0000000 --- a/gemm/mlir/gemm_8192_5120_640_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x640xf16>, %arg1: tensor<640x5120xf16>) -> tensor<8192x5120xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x5120xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x640xf16>, tensor<640x5120xf16>) outs(%1 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> - return %2 : tensor<8192x5120xf16> - } -} diff --git a/gemm/mlir/gemm_8192_5120_640_f16_tA.mlir b/gemm/mlir/gemm_8192_5120_640_f16_tA.mlir deleted file mode 100644 index 97875f6..0000000 --- a/gemm/mlir/gemm_8192_5120_640_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<640x8192xf16>, %arg1: tensor<640x5120xf16>) -> tensor<8192x5120xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x5120xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<640x8192xf16>, tensor<640x5120xf16>) outs(%1 : tensor<8192x5120xf16>) -> tensor<8192x5120xf16> - return %2 : tensor<8192x5120xf16> - } -} diff --git a/gemm/mlir/gemm_8192_5120_640_f16_tB.mlir b/gemm/mlir/gemm_8192_5120_640_f16_tB.mlir deleted file mode 100644 index c512215..0000000 --- a/gemm/mlir/gemm_8192_5120_640_f16_tB.mlir +++ /dev/null @@ -1,145 +0,0 @@ -#translation = #iree_codegen.translation_info -module attributes {transform.with_named_sequence} { - stream.executable private @gemm { - stream.executable.export public @gemm workgroups() -> (index, index, index) { - %c128 = arith.constant 128 : index - %c80 = arith.constant 80 : index - %c1 = arith.constant 1 : index - stream.return %c128, %c80, %c1 : index, index, index - } - builtin.module { - func.func @gemm(%arg0: !stream.binding, %arg1: !stream.binding, %arg2: !stream.binding) attributes {translation_info = #translation} { - %c19 = arith.constant 19 : index - %c18 = arith.constant 18 : index - %c17 = arith.constant 17 : index - %c3 = arith.constant 3 : index - %c2 = arith.constant 2 : index - %c16 = arith.constant 16 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c32 = arith.constant 32 : index - %c64 = arith.constant 64 : index - %c1 = arith.constant 1 : index - %c20 = arith.constant 20 : index - %c0 = arith.constant 0 : index - %cst = arith.constant dense<0.000000e+00> : vector<4xf32> - %workgroup_id_0 = stream.dispatch.workgroup.id[0] : index - %workgroup_id_1 = stream.dispatch.workgroup.id[1] : index - %thread_id_x = gpu.thread_id x - %thread_id_y = gpu.thread_id y - %alloc = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %alloc_0 = memref.alloc() : memref<64x32xf16, #gpu.address_space> - %0 = stream.binding.subspan %arg0[%c0] : !stream.binding -> memref<8192x640xf16, strided<[640, 1], offset: ?>> - %1 = stream.binding.subspan %arg1[%c0] : !stream.binding -> memref<5120x640xf16, strided<[640, 1], offset: ?>> - %2 = arith.muli %workgroup_id_0, %c64 : index - %3 = arith.muli %thread_id_y, %c32 : index - %4 = arith.divsi %thread_id_x, %c4 : index - %5 = arith.addi %4, %3 : index - %6 = arith.remsi %5, %c64 : index - %7 = arith.addi %6, %2 : index - %8 = arith.remsi %thread_id_x, %c4 : index - %9 = arith.muli %8, %c8 : index - %10 = arith.divsi %thread_id_x, %c64 : index - %11 = arith.muli %10, %c32 : index - %12 = arith.remsi %thread_id_x, %c16 : index - %13 = arith.addi %12, %11 : index - %14 = arith.remsi %thread_id_x, %c64 : index - %15 = arith.divsi %14, %c16 : index - %16 = arith.muli %15, %c4 : index - %17 = arith.addi %16, %c16 : index - %18 = arith.addi %13, %c16 : index - %19 = arith.muli %workgroup_id_1, %c64 : index - %20 = arith.addi %6, %19 : index - %21 = arith.addi %12, %3 : index - %22 = arith.addi %21, %c16 : index - %23:4 = scf.for %arg3 = %c0 to %c20 step %c1 iter_args(%arg4 = %cst, %arg5 = %cst, %arg6 = %cst, %arg7 = %cst) -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) { - %62 = arith.muli %arg3, %c32 : index - %63 = arith.addi %62, %9 : index - %64 = vector.load %0[%7, %63] : memref<8192x640xf16, strided<[640, 1], offset: ?>>, vector<8xf16> - vector.store %64, %alloc[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %65 = vector.load %alloc[%13, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %66 = vector.load %alloc[%13, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %67 = vector.load %alloc[%18, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %68 = vector.load %alloc[%18, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %69 = vector.load %1[%20, %63] : memref<5120x640xf16, strided<[640, 1], offset: ?>>, vector<8xf16> - amdgpu.lds_barrier - vector.store %69, %alloc_0[%6, %9] : memref<64x32xf16, #gpu.address_space>, vector<8xf16> - amdgpu.lds_barrier - %70 = vector.load %alloc_0[%21, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %71 = vector.load %alloc_0[%21, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %72 = vector.load %alloc_0[%22, %16] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %73 = vector.load %alloc_0[%22, %17] : memref<64x32xf16, #gpu.address_space>, vector<4xf16> - %74 = amdgpu.mfma %65 * %70 + %arg4 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %75 = amdgpu.mfma %66 * %71 + %74 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %76 = amdgpu.mfma %67 * %72 + %arg7 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %77 = amdgpu.mfma %68 * %73 + %76 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %78 = amdgpu.mfma %67 * %70 + %arg6 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %79 = amdgpu.mfma %68 * %71 + %78 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %80 = amdgpu.mfma %65 * %72 + %arg5 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - %81 = amdgpu.mfma %66 * %73 + %80 {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32> - scf.yield %75, %81, %79, %77 : vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32> - } - %24 = vector.extract_strided_slice %23#0 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %25 = stream.binding.subspan %arg2[%c0] : !stream.binding -> memref<8192x5120xf32, strided<[5120, 1], offset: ?>> - %26 = arith.remsi %thread_id_x, %c64 : index - %27 = arith.divsi %26, %c16 : index - %28 = arith.muli %27, %c4 : index - %29 = arith.divsi %thread_id_x, %c64 : index - %30 = arith.muli %29, %c32 : index - %31 = arith.muli %workgroup_id_0, %c64 : index - %32 = arith.addi %31, %30 : index - %33 = arith.addi %32, %28 : index - %34 = arith.muli %thread_id_y, %c32 : index - %35 = arith.muli %workgroup_id_1, %c64 : index - %36 = arith.remsi %thread_id_x, %c16 : index - %37 = arith.addi %36, %35 : index - %38 = arith.addi %37, %34 : index - vector.store %24, %25[%33, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %39 = vector.extract_strided_slice %23#0 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %40 = arith.addi %33, %c1 : index - vector.store %39, %25[%40, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %41 = vector.extract_strided_slice %23#0 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %42 = arith.addi %33, %c2 : index - vector.store %41, %25[%42, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %43 = vector.extract_strided_slice %23#0 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %44 = arith.addi %33, %c3 : index - vector.store %43, %25[%44, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %45 = vector.extract_strided_slice %23#3 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %46 = arith.addi %33, %c16 : index - %47 = arith.addi %38, %c16 : index - vector.store %45, %25[%46, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %48 = vector.extract_strided_slice %23#3 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %49 = arith.addi %33, %c17 : index - vector.store %48, %25[%49, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %50 = vector.extract_strided_slice %23#3 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %51 = arith.addi %33, %c18 : index - vector.store %50, %25[%51, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %52 = vector.extract_strided_slice %23#3 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - %53 = arith.addi %33, %c19 : index - vector.store %52, %25[%53, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %54 = vector.extract_strided_slice %23#2 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %54, %25[%46, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %55 = vector.extract_strided_slice %23#2 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %55, %25[%49, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %56 = vector.extract_strided_slice %23#2 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %56, %25[%51, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %57 = vector.extract_strided_slice %23#2 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %57, %25[%53, %38] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %58 = vector.extract_strided_slice %23#1 {offsets = [0], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %58, %25[%33, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %59 = vector.extract_strided_slice %23#1 {offsets = [1], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %59, %25[%40, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %60 = vector.extract_strided_slice %23#1 {offsets = [2], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %60, %25[%42, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - %61 = vector.extract_strided_slice %23#1 {offsets = [3], sizes = [1], strides = [1]} : vector<4xf32> to vector<1xf32> - vector.store %61, %25[%44, %47] : memref<8192x5120xf32, strided<[5120, 1], offset: ?>>, vector<1xf32> - return - } - } - } - func.func @isolated_benchmark(%arg0: tensor<8192x640xf16>, %arg1: tensor<5120x640xf16>) -> tensor<8192x5120xf32> { - %0 = flow.dispatch @gemm::@gemm(%arg0, %arg1) : (tensor<8192x640xf16>, tensor<5120x640xf16>) -> tensor<8192x5120xf32> - return %0 : tensor<8192x5120xf32> - } -} diff --git a/gemm/mlir/gemm_8192_8192_1024_f16.mlir b/gemm/mlir/gemm_8192_8192_1024_f16.mlir deleted file mode 100644 index 8c5ec54..0000000 --- a/gemm/mlir/gemm_8192_8192_1024_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x1024xf16>, %arg1: tensor<1024x8192xf16>) -> tensor<8192x8192xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8192xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x1024xf16>, tensor<1024x8192xf16>) outs(%1 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> - return %2 : tensor<8192x8192xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8192_65536_f16.mlir b/gemm/mlir/gemm_8192_8192_65536_f16.mlir deleted file mode 100644 index 04bdc92..0000000 --- a/gemm/mlir/gemm_8192_8192_65536_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x65536xf16>, %arg1: tensor<65536x8192xf16>) -> tensor<8192x8192xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8192xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x65536xf16>, tensor<65536x8192xf16>) outs(%1 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> - return %2 : tensor<8192x8192xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8192_8192_f16.mlir b/gemm/mlir/gemm_8192_8192_8192_f16.mlir deleted file mode 100644 index 232fdb7..0000000 --- a/gemm/mlir/gemm_8192_8192_8192_f16.mlir +++ /dev/null @@ -1,9 +0,0 @@ -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x8192xf16>) -> tensor<8192x8192xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8192xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> - %2 = linalg.matmul ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x8192xf16>) outs(%1 : tensor<8192x8192xf16>) -> tensor<8192x8192xf16> - return %2 : tensor<8192x8192xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir deleted file mode 100644 index 90fbed3..0000000 --- a/gemm/mlir/gemm_8192_8_1024_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xbf16>, %arg1: tensor<1024x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xbf16>, tensor<1024x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_1024_f16_tA.mlir b/gemm/mlir/gemm_8192_8_1024_f16_tA.mlir deleted file mode 100644 index b3d0f26..0000000 --- a/gemm/mlir/gemm_8192_8_1024_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<1024x8192xf16>, %arg1: tensor<1024x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<1024x8192xf16>, tensor<1024x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir deleted file mode 100644 index 17fe727..0000000 --- a/gemm/mlir/gemm_8192_8_14336_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xbf16>, %arg1: tensor<14336x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xbf16>, tensor<14336x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_14336_f16_tA.mlir b/gemm/mlir/gemm_8192_8_14336_f16_tA.mlir deleted file mode 100644 index bbf21b1..0000000 --- a/gemm/mlir/gemm_8192_8_14336_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<14336x8192xf16>, %arg1: tensor<14336x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<14336x8192xf16>, tensor<14336x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir deleted file mode 100644 index d46ec59..0000000 --- a/gemm/mlir/gemm_8192_8_2048_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xbf16>, tensor<2048x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_2048_f16_tA.mlir b/gemm/mlir/gemm_8192_8_2048_f16_tA.mlir deleted file mode 100644 index 30f757f..0000000 --- a/gemm/mlir/gemm_8192_8_2048_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<2048x8192xf16>, %arg1: tensor<2048x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<2048x8192xf16>, tensor<2048x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir deleted file mode 100644 index 7fdd508..0000000 --- a/gemm/mlir/gemm_8192_8_28672_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xbf16>, %arg1: tensor<28672x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xbf16>, tensor<28672x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_28672_f16_tA.mlir b/gemm/mlir/gemm_8192_8_28672_f16_tA.mlir deleted file mode 100644 index aafb576..0000000 --- a/gemm/mlir/gemm_8192_8_28672_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<28672x8192xf16>, %arg1: tensor<28672x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<28672x8192xf16>, tensor<28672x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir deleted file mode 100644 index caa3522..0000000 --- a/gemm/mlir/gemm_8192_8_3584_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xbf16>, %arg1: tensor<3584x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xbf16>, tensor<3584x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_3584_f16_tA.mlir b/gemm/mlir/gemm_8192_8_3584_f16_tA.mlir deleted file mode 100644 index 9964378..0000000 --- a/gemm/mlir/gemm_8192_8_3584_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<3584x8192xf16>, %arg1: tensor<3584x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<3584x8192xf16>, tensor<3584x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir deleted file mode 100644 index ed9262d..0000000 --- a/gemm/mlir/gemm_8192_8_4096_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xbf16>, %arg1: tensor<4096x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xbf16>, tensor<4096x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_4096_f16_tA.mlir b/gemm/mlir/gemm_8192_8_4096_f16_tA.mlir deleted file mode 100644 index 0fca3dc..0000000 --- a/gemm/mlir/gemm_8192_8_4096_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<4096x8192xf16>, %arg1: tensor<4096x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<4096x8192xf16>, tensor<4096x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir deleted file mode 100644 index b7f68ff..0000000 --- a/gemm/mlir/gemm_8192_8_7168_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xbf16>, %arg1: tensor<7168x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xbf16>, tensor<7168x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_7168_f16_tA.mlir b/gemm/mlir/gemm_8192_8_7168_f16_tA.mlir deleted file mode 100644 index c143d7f..0000000 --- a/gemm/mlir/gemm_8192_8_7168_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<7168x8192xf16>, %arg1: tensor<7168x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<7168x8192xf16>, tensor<7168x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir b/gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir deleted file mode 100644 index be2e86d..0000000 --- a/gemm/mlir/gemm_8192_8_8192_bf16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8xbf16>) -> tensor<8192x8xbf16> { - %cst = arith.constant 0.000000e+00 : bf16 - %0 = tensor.empty() : tensor<8192x8xbf16> - %1 = linalg.fill ins(%cst : bf16) outs(%0 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xbf16>, tensor<8192x8xbf16>) outs(%1 : tensor<8192x8xbf16>) -> tensor<8192x8xbf16> - return %2 : tensor<8192x8xbf16> - } -} diff --git a/gemm/mlir/gemm_8192_8_8192_f16_tA.mlir b/gemm/mlir/gemm_8192_8_8192_f16_tA.mlir deleted file mode 100644 index 62431ce..0000000 --- a/gemm/mlir/gemm_8192_8_8192_f16_tA.mlir +++ /dev/null @@ -1,10 +0,0 @@ - -module { - func.func @main(%arg0: tensor<8192x8192xf16>, %arg1: tensor<8192x8xf16>) -> tensor<8192x8xf16> { - %cst = arith.constant 0.000000e+00 : f16 - %0 = tensor.empty() : tensor<8192x8xf16> - %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - %2 = linalg.matmul_transpose_a ins(%arg0, %arg1 : tensor<8192x8192xf16>, tensor<8192x8xf16>) outs(%1 : tensor<8192x8xf16>) -> tensor<8192x8xf16> - return %2 : tensor<8192x8xf16> - } -}