Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Loop tiling, shuffle and expansion passes #922

Closed
wants to merge 36 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
f817e32
Loop tiling, shuffle and expansion passes
KavithaTipturMadhu Jun 6, 2024
6ddd878
Gemm test case for tiling
KavithaTipturMadhu Jun 12, 2024
2180e4e
Loop shuffle test case
KavithaTipturMadhu Jun 12, 2024
8bc1882
Outer loop parallelization test
KavithaTipturMadhu Jun 12, 2024
b4c2c6b
Gemm correctness test
KavithaTipturMadhu Jun 12, 2024
63f75db
MLP correctness test
KavithaTipturMadhu Jun 12, 2024
7fee200
Warning fix
KavithaTipturMadhu Jun 13, 2024
3215a7f
Review comments
KavithaTipturMadhu Jun 13, 2024
fd35884
Updated pass description
KavithaTipturMadhu Jun 13, 2024
e44cb7a
BF16 correctness tests
KavithaTipturMadhu Jun 13, 2024
a81db87
Review comments
KavithaTipturMadhu Jun 13, 2024
7696d81
Changes to constant second argument
KavithaTipturMadhu Jun 14, 2024
ee1c6bf
Loop Shuffle bug fix
KavithaTipturMadhu Jun 14, 2024
aba5a7a
Support for 1 sized tile
KavithaTipturMadhu Jun 15, 2024
1df8b75
Benchmarks update
KavithaTipturMadhu Jun 15, 2024
51a2205
Cleanup
KavithaTipturMadhu Jun 15, 2024
fc5fcbf
Removed unnecessary includes
KavithaTipturMadhu Jun 18, 2024
eab1d18
Validation for shape of output
KavithaTipturMadhu Jun 18, 2024
bc96248
Unit tests for loop insertion
KavithaTipturMadhu Jun 18, 2024
3f4a624
shuffling pass changes and test cases
KavithaTipturMadhu Jun 19, 2024
776b7ec
Loop expansion pass changes and tests
KavithaTipturMadhu Jun 19, 2024
79f3632
Updated pass description
KavithaTipturMadhu Jun 19, 2024
c3432a0
Cleanup
KavithaTipturMadhu Jun 19, 2024
dbcd89b
Regex replacement of bf16 correctness test
KavithaTipturMadhu Jun 19, 2024
f9218ef
Pipeline changes
KavithaTipturMadhu Jun 21, 2024
1b760dd
MLP integration test for tiling
KavithaTipturMadhu Jun 21, 2024
d72ccc5
Single tiling factor argument with second tiling factor inferred from…
KavithaTipturMadhu Jun 21, 2024
961ef31
Review comments
KavithaTipturMadhu Jun 24, 2024
a24e2fb
Retire old 2d parallelization
KavithaTipturMadhu Jun 24, 2024
a61ebe7
Benchmarks update
KavithaTipturMadhu Jun 25, 2024
b9483e1
Code cleanup
KavithaTipturMadhu Jun 25, 2024
461322f
Canonicalize in Low level parallelization
KavithaTipturMadhu Jun 25, 2024
8a498bc
Fixes
KavithaTipturMadhu Jul 11, 2024
c024990
Review comments
KavithaTipturMadhu Jul 12, 2024
64af8ed
Presburger relation
KavithaTipturMadhu Jul 16, 2024
9a0ab09
In parallel op nesting
KavithaTipturMadhu Jul 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions benchmarks/config/omp/mlir-bf16.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=2 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2)" ]
},
"bf16_dp2_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=4 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2)" ]
},
"bf16_dp2_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=2 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2)" ]
},
"bf16_dp2_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=4 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2)" ]
}
}},
Expand All @@ -36,28 +36,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=2 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2)" ]
},
"bf16_dp2_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=4 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2)" ]
},
"bf16_dp2_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=2 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2)" ]
},
"bf16_dp2_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=4 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2)" ]
}
}},
Expand All @@ -67,28 +67,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=2 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(svebf16)" ]
},
"bf16_dp4_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=4 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(svebf16)" ]
},
"bf16_dp4_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=2 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(svebf16)" ]
},
"bf16_dp4_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=4 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(svebf16)" ]
}
}},
Expand All @@ -98,28 +98,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=2 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(svebf16)" ]
},
"bf16_dp4_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=4 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(svebf16)" ]
},
"bf16_dp4_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=2 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(svebf16)" ]
},
"bf16_dp4_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=4" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=4 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(svebf16)" ]
}
}}
Expand Down
16 changes: 8 additions & 8 deletions benchmarks/config/omp/mlir-fp32.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=2 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=4 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=2 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=4 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2|asimd)" ]
}
}},
Expand All @@ -36,28 +36,28 @@
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=2 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=8 -N-tile-shape=4 --loop-shuffle-order=2,0,1,3 --num-outer-parallel=1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=2 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8'" ],
"flags": [ "-n", "100", "-run-args='--def-parallel -M-tile-shape=4 -N-tile-shape=4 --loop-shuffle-order=0,2,1,3 --num-outer-parallel=2'" ],
"extensions": [ "(avx2|asimd)" ]
}
}}
Expand Down
Loading