Skip to content

Commit

Permalink
Vector-to-kernel pipeline bundle (#987)
Browse files Browse the repository at this point in the history
1) Bundles the four passes: brgemm tiling, vectorization, hoisting vector
transfers, and vector to fma together for the vector-to-kernel pipeline.
2) New `json` files are added to benchmark folder for benchmarking the
`vector-to-kernel` pipeline.
  • Loading branch information
arun-thmn authored Dec 6, 2024
1 parent 4672763 commit f07997a
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ set(BENCH_OMP_CFGS
${CONFIG_DIR}/omp/dnn-bf16.json
${CONFIG_DIR}/omp/mlir-fp32.json
${CONFIG_DIR}/omp/mlir-bf16.json
${CONFIG_DIR}/omp/mlir-fp32-vector-to-kernel.json
${CONFIG_DIR}/omp/torch-dynamo.json
${CONFIG_DIR}/omp/torch-dynamo-vector-to-kernel.json
)
string(JOIN ',' BENCH_OMP_CFGS_STR ${BENCH_OMP_CFGS})
add_custom_target(benchmarks-omp ${BENCHMARK_DIR}/driver.py -v --build ${PROJECT_BINARY_DIR} -n 10
Expand Down
42 changes: 42 additions & 0 deletions benchmarks/config/base/base.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@
"flags": [ "-n", "100" ],
"extensions": []
},
"gemm_fp32_mlir_vector": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": {},
"flags": [ "-n", "100", "-run-args='--vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": []
},
"gemm_bf16_dp2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
Expand All @@ -57,6 +64,13 @@
"flags": [ "-n", "100" ],
"extensions": []
},
"mlp_fp32_mlir_vector": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": {},
"flags": [ "-n", "100", "-run-args='--def-parallel --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": []
},
"mlp_bf16_dp2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32 --vnni=2" ],
Expand All @@ -81,12 +95,26 @@
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_const_mlir_vector": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100", "-run-args='--vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_args_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_args_mlir_vector": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100", "-run-args='--vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_3x1024_const_mlir": {
"type": "IR-GEN",
Expand All @@ -112,13 +140,27 @@
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_const_mlir_vector": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100", "-run-args='--def-parallel --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_args_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_args_mlir_vector": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=args --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024" ],
"environment": {},
"flags": [ "-n", "100", "-run-args=' --def-parallel --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"bf16_3x1024_const_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=bf16 --batch=256 --layers=1024,1024,1024,1024" ],
Expand Down
128 changes: 128 additions & 0 deletions benchmarks/config/omp/mlir-fp32-vector-to-kernel.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@

[
{
"gemm_fp32_mlir_vector_kernel_32": {
"fp32_3x1024_omp_2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
}
}},
{
"mlp_fp32_mlir_vector_kernel_32": {
"fp32_3x1024_omp_2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=32,32,32" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ "(avx2|asimd)" ]
}
}},
{
"gemm_fp32_mlir_vector_kernel_64": {
"fp32_3x1024_omp_2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,4 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,4 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=1,4 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
}
}},
{
"mlp_fp32_mlir_vector_kernel_64": {
"fp32_3x1024_omp_2_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,4 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,4 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "IR-GEN",
"benchmark": [ "mlir-gen", "--kernel=const --bias --relu --float-type=f32 --batch=256 --layers=1024,1024,1024,1024 --tiles=64,64,64" ],
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=1,4 --vector-to-kernels --lhsTile=16,64 --rhsTile=64,1'" ],
"extensions": [ "(avx2|asimd)" ]
}
}}
]

64 changes: 64 additions & 0 deletions benchmarks/config/omp/torch-dynamo-vector-to-kernel.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[
{
"gemm_fp32_torch_vector_kernel" : {
"fp32_3x1024_omp_2_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-gemm-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
}
}},
{
"mlp_fp32_torch_vector_kernel" : {
"fp32_3x1024_omp_2_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "2", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,16 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_4_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "4", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=8,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_8_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "8", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=4,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
},
"fp32_3x1024_omp_16_mlir": {
"type": "MLIR",
"benchmark": "pytorch/torch-dynamo-mlp-fp32-3x1024.mlir",
"environment": { "OMP_NUM_THREADS": "16", "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" },
"flags": [ "-n", "100", "-run-args='--def-parallel --parallel-task-grid=2,8 --vector-to-kernels --lhsTile=4,32 --rhsTile=32,1'" ],
"extensions": [ ]
}
}}
]
5 changes: 5 additions & 0 deletions lib/TPP/DefaultPipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ llvm::cl::opt<bool> linalgToVector("linalg-to-vector",
llvm::cl::desc("Lower linalg to vector"),
llvm::cl::init(false));

llvm::cl::opt<bool> vectorToKernel("vector-to-kernels",
llvm::cl::desc("Lower vector to micro-kernels"),
llvm::cl::init(false));

llvm::cl::opt<bool> lowerPackUnpackWithoutTranspose(
"lower-pack-unpack-without-transpose",
llvm::cl::desc("Lower packs and unpacks reverting any dim permutations"),
Expand Down Expand Up @@ -158,6 +162,7 @@ struct DefaultPipeline : public tpp::impl::DefaultPipelineBase<DefaultPipeline>,
lowerPackUnpackWithoutTranspose;
tppDefaultOptions.lhsTile = lhsTile;
tppDefaultOptions.rhsTile = rhsTile;
tppDefaultOptions.vectorToKernel = vectorToKernel;

pm.addPass(createDefaultTppPasses(tppDefaultOptions));
}
Expand Down
13 changes: 8 additions & 5 deletions lib/TPP/DefaultTppPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,14 @@ struct DefaultTppPasses
SmallVector<std::string> skipOperations;
// General "linalg-to-vector" choice needs to skip all XSMM matching at
// linalg level.
if (linalgToVector) {
if (linalgToVector || vectorToKernel) {
skipOperations.push_back("all");
}
if (vectorToXSMM) {
skipOperations.clear();
skipOperations.push_back("transpose");
skipOperations.push_back("vnni");
}
if (vectorToKernel)
skipOperations.clear();

// Pipeline building starts here.
pm.addPass(createFoldAddIntoDest());
Expand Down Expand Up @@ -141,8 +139,12 @@ struct DefaultTppPasses
BrgemmLinalgTilingOptions{lhsTile, rhsTile}));
pm.addNestedPass<func::FuncOp>(createLoopInvariantCodeMotionPass());
pm.addNestedPass<func::FuncOp>(createVectorizationPass());
pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
if (vectorToXSMM) {

//Please note, canonicalizer should be after hoisting pass because
//it fuses outer tiling loops and it results in no pattern
//matching for hoisting pass. Moved inside VectorToKernel Path.

if (vectorToXSMM) {
pm.addPass(createVectorToXSMM());
}
if (vectorToKernel) {
Expand Down Expand Up @@ -187,3 +189,4 @@ struct DefaultTppPasses
};

} // namespace

6 changes: 4 additions & 2 deletions lib/TPP/PassBundles/VectorToKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "llvm/Support/Debug.h"
#include "mlir/Transforms/Passes.h"

#include "TPP/PassBundles.h"
#include "TPP/PassUtils.h"
Expand Down Expand Up @@ -48,7 +49,8 @@ struct VectorToKernel : public tpp::impl::VectorToKernelBase<VectorToKernel>,

private:
void constructPipeline() override {
LLVM_DEBUG(llvm::dbgs() << "Adding vector-to-kernel passes\n");
// Not Implemented Yet.
pm.addNestedPass<func::FuncOp>(createHoistVectorTransfers());
pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
pm.addNestedPass<func::FuncOp>(createVectorContractToFMA());
}
};
2 changes: 1 addition & 1 deletion scripts/benchmarks/build_and_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ echo_run ./driver.py -vv \
--build "${BUILD_DIR}"

echo " ========= OpenMP Benchmarks ==========="
for cfg in dnn-fp32 dnn-bf16 mlir-fp32 mlir-bf16; do
for cfg in dnn-fp32 dnn-bf16 mlir-fp32 mlir-bf16 mlir-fp32-vector-to-kernel; do
echo_run ./driver.py -vv \
-n ${NUM_ITER} \
-c "${CONFIG_DIR}/omp/${cfg}.json" \
Expand Down
2 changes: 2 additions & 0 deletions scripts/github/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,10 @@ if [ "$BENCH_OMP" ]; then
benchmark omp/dnn-fp32.json "OpenMP XSMM-DNN FP32"
benchmark omp/dnn-bf16.json "OpenMP XSMM-DNN BF16"
benchmark omp/mlir-fp32.json "OpenMP TPP-MLIR FP32"
benchmark omp/mlir-fp32-vector-to-kernel.json "OpenMP TPP-MLIR VECTOR-TO-KERNEL FP32"
benchmark omp/mlir-bf16.json "OpenMP TPP-MLIR BF16"
benchmark omp/torch-dynamo.json "OpenMP TPP-MLIR PyTorch"
benchmark omp/torch-dynamo-vector-to-kernel.json "OpenMP TPP-MLIR VECTOR-TO-KERNEL PyTorch"
fi

# Matmul Benchmarks
Expand Down

0 comments on commit f07997a

Please sign in to comment.