From 6d14b4f01c12e9b1108ce414a7185c1932b3593f Mon Sep 17 00:00:00 2001 From: Adam Siemieniuk Date: Mon, 19 Aug 2024 18:45:46 +0200 Subject: [PATCH] Bump LLVM Fixes for upstream API changes and improves tests' checks. --- build_tools/llvm_version.txt | 2 +- lib/TPP/GPU/LinalgToXeGPU.cpp | 21 ++++++++++----------- lib/TPP/Transforms/ToBlockLayoutAndBack.cpp | 6 ++++-- test/GPU/linalg-to-xegpu-dpas.mlir | 4 ++-- test/Passes/tile-and-fuse-default.mlir | 2 +- test/Passes/tile-and-fuse.mlir | 2 +- 6 files changed, 19 insertions(+), 18 deletions(-) diff --git a/build_tools/llvm_version.txt b/build_tools/llvm_version.txt index 385d4451a..f0b50cd20 100644 --- a/build_tools/llvm_version.txt +++ b/build_tools/llvm_version.txt @@ -1 +1 @@ -1846523bb77275de954ac573110171bd39bfa930 +79f6ae05c139d3d5b6446f8a265a3c6e3f5b18f8 diff --git a/lib/TPP/GPU/LinalgToXeGPU.cpp b/lib/TPP/GPU/LinalgToXeGPU.cpp index 983c04728..33bf2e470 100644 --- a/lib/TPP/GPU/LinalgToXeGPU.cpp +++ b/lib/TPP/GPU/LinalgToXeGPU.cpp @@ -344,7 +344,7 @@ static std::optional lowerEltwiseOp(linalg::LinalgOp linalgOp, // Unhandled type. Bail out. return std::nullopt; }) - .Case([&](linalg::NegfOp negfOp) -> std::optional { + .Case([&](linalg::NegFOp negfOp) -> std::optional { assert(operands.size() == 1 && "Invalid number of operands for negf"); return rewriter.create(loc, resType, operands[0]) .getResult(); @@ -724,9 +724,9 @@ loadNdDescTiles(PatternRewriter &rewriter, Location loc, ValueRange loadTiles, VectorType vecLoadType = VectorType::get(tileType.getShape(), tileType.getElementType()); - IntegerAttr vnniAxisAttr = nullptr; + UnitAttr vnniPackedAttr = nullptr; if (vnniConf) { - vnniAxisAttr = IntegerAttr::get(rewriter.getI64Type(), vnniConf->vnniAxis); + vnniPackedAttr = rewriter.getUnitAttr(); vecLoadType = getVnniVector(tileType.getShape(), tileType.getElementType(), *vnniConf); } @@ -734,7 +734,7 @@ loadNdDescTiles(PatternRewriter &rewriter, Location loc, ValueRange loadTiles, SmallVector loadVec; for (auto tile : loadTiles) { auto loadOp = rewriter.create( - loc, vecLoadType, tile, vnniAxisAttr, transpose, + loc, vecLoadType, tile, vnniPackedAttr, transpose, /*l1_hint=*/hint, /*l2_hint=*/hint, /*l3_hint=*/hint); loadVec.push_back(loadOp); @@ -1043,12 +1043,11 @@ static LogicalResult createDPASKernel(linalg::LinalgOp linalgOp, if (vnniFactor == -1) return failure(); - VnniConfig vnniConfA{.vnniFactor = vnniFactor, .vnniAxis = 1}; VnniConfig vnniConfB{.vnniFactor = vnniFactor, .vnniAxis = 0}; // Load A sub-tiles. - SmallVector loadVecA = - loadNdDescTiles(rewriter, loc, tilesA, readCacheHint, vnniConfA); + SmallVector loadVecA = loadNdDescTiles( + rewriter, loc, tilesA, readCacheHint, /*vnniConf=*/std::nullopt); auto tileTypeA = cast(tilesA[0].getType()); // Load B sub-tiles. @@ -1077,9 +1076,9 @@ static LogicalResult createDPASKernel(linalg::LinalgOp linalgOp, } // Extract DPAS tiles from loaded sub-tiles. - TilesArray dpasVecA = extractVecSubTiles(rewriter, loc, loadVecA, - {dimM, kTile}, tileTypeA.getShape(), - {dpasTileM, dpasTileK}, vnniConfA); + TilesArray dpasVecA = extractVecSubTiles( + rewriter, loc, loadVecA, {dimM, kTile}, tileTypeA.getShape(), + {dpasTileM, dpasTileK}, /*vnniConf=*/std::nullopt); TilesArray dpasVecB = extractVecSubTiles(rewriter, loc, loadVecB, {kTile, dimN}, tileTypeB.getShape(), {dpasTileK, dpasTileN}, vnniConfB); @@ -1378,7 +1377,7 @@ void populateLinalgEltwiseToXeGPUPatterns(RewritePatternSet &patterns, ConvertNamedEltwiseToXeGPU, ConvertNamedEltwiseToXeGPU, ConvertNamedEltwiseToXeGPU, - ConvertNamedEltwiseToXeGPU, + ConvertNamedEltwiseToXeGPU, ConvertNamedEltwiseToXeGPU>(patterns.getContext(), options); } diff --git a/lib/TPP/Transforms/ToBlockLayoutAndBack.cpp b/lib/TPP/Transforms/ToBlockLayoutAndBack.cpp index bc678c07b..bb13c16a5 100644 --- a/lib/TPP/Transforms/ToBlockLayoutAndBack.cpp +++ b/lib/TPP/Transforms/ToBlockLayoutAndBack.cpp @@ -652,7 +652,7 @@ struct PropagatePackUnPack MLIRContext *ctx = getOperation().getContext(); RewritePatternSet patterns(ctx); linalg::populateDataLayoutPropagationPatterns( - patterns, [](Operation *op) { return true; }); + patterns, [](OpOperand *operand) { return true; }); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } }; @@ -813,7 +813,9 @@ void mlir::tpp::populateSimplifyPacking(RewritePatternSet &patterns) { // Propagate packs/unpacks only through expand shapes at this point. // This captures the transformation scope of the replaced downstream pass. linalg::populateDataLayoutPropagationPatterns( - patterns, [](Operation *op) { return isa(op); }); + patterns, [](OpOperand *operand) { + return isa(operand->get().getDefiningOp()); + }); ctx->getLoadedDialect()->getCanonicalizationPatterns( patterns); patterns.add(ctx); diff --git a/test/GPU/linalg-to-xegpu-dpas.mlir b/test/GPU/linalg-to-xegpu-dpas.mlir index a64a93f7a..cd976e64f 100644 --- a/test/GPU/linalg-to-xegpu-dpas.mlir +++ b/test/GPU/linalg-to-xegpu-dpas.mlir @@ -63,9 +63,9 @@ func.func @matmul(%arg0: memref<32x32xf16>, %arg1: memref<32x32xf16>, %arg2: mem // Extract DPAS-sized chunks from larger loaded tile A. // Tile B is already in the correct shape. -// CHECK: %[[vA_flat:.+]] = vector.shape_cast %[[vA]] : vector<32x8x2xf16> to vector<512xf16> +// CHECK: %[[vA_flat:.+]] = vector.shape_cast %[[vA]] : vector<32x16xf16> to vector<512xf16> // CHECK: %[[vA_dpas_flat:.+]] = vector.extract_strided_slice{{.*}}: vector<512xf16> to vector<128xf16> -// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x8x2xf16> +// CHECK: %[[vA_dpas:.+]] = vector.shape_cast %[[vA_dpas_flat]] : vector<128xf16> to vector<8x16xf16> // CHECK-COUNT-3: vector.extract_strided_slice // Perform DPAS computation. diff --git a/test/Passes/tile-and-fuse-default.mlir b/test/Passes/tile-and-fuse-default.mlir index d1e5f1079..bb4f9dfd9 100644 --- a/test/Passes/tile-and-fuse-default.mlir +++ b/test/Passes/tile-and-fuse-default.mlir @@ -595,7 +595,7 @@ func.func @check_tile_propagation_to_eltwise_consumer(%arg0: tensor<2x2x2x4xf32> // CHECK-LABEL: check_tile_propagation_to_eltwise_consumer // CHECK-SAME: %[[ARG0:.+]]: tensor<2x2x2x4xf32>, %[[ARG1:.+]]: tensor<2x4x8x2xf32>, // CHECK-SAME: %[[ARG2:.+]]: tensor<2x2x8x2xf32>, %[[ARG3:.+]]: tensor<2x2x8x2xf32> -// CHECK: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index diff --git a/test/Passes/tile-and-fuse.mlir b/test/Passes/tile-and-fuse.mlir index 7a95761d1..daf9ad426 100644 --- a/test/Passes/tile-and-fuse.mlir +++ b/test/Passes/tile-and-fuse.mlir @@ -342,7 +342,7 @@ func.func @mlp(%arg0: tensor<8x112x32x32xbf16>, %arg1: tensor<112x112x32x32xbf16 %max = arith.maximumf %in, %cst : bf16 linalg.yield %max : bf16 } -> tensor<8x112x32x32xbf16> - // CHECK: %[[C112:.+]] = arith.constant 112 : index + // CHECK-DAG: %[[C112:.+]] = arith.constant 112 : index // CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index