diff --git a/include/tpu_mlir/Dialect/Top/IR/TopOps.td b/include/tpu_mlir/Dialect/Top/IR/TopOps.td index 4e086f696..120e16991 100755 --- a/include/tpu_mlir/Dialect/Top/IR/TopOps.td +++ b/include/tpu_mlir/Dialect/Top/IR/TopOps.td @@ -729,7 +729,6 @@ def Top_AttentionOp: Top_Op<"Attention"> { ); let results = (outs AnyTensor:$output); - let hasCanonicalizer = 1; } def Top_PadOp:Top_Op<"Pad"> { diff --git a/lib/Dialect/Top/Canonicalize/Attention.cpp b/lib/Dialect/Top/Canonicalize/Attention.cpp deleted file mode 100644 index a6f84896d..000000000 --- a/lib/Dialect/Top/Canonicalize/Attention.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Copyright (C) 2022 Sophgo Technologies Inc. All rights reserved. -// -// TPU-MLIR is licensed under the 2-Clause BSD License except for the -// third-party components. -// -//===----------------------------------------------------------------------===// - -#include "mlir/IR/PatternMatch.h" -#include "mlir/Pass/Pass.h" -#include "tpu_mlir/Dialect/Top/IR/TopOps.h" -#include "tpu_mlir/Support/Module.h" - - -using namespace tpu_mlir::top; - - -struct TopFuseAttention : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(AttentionOp op, - PatternRewriter &rewriter) const override { - - return failure(); - } -}; - -void AttentionOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.insert(context); -} diff --git a/lib/Dialect/Top/Transforms/ChipOptimize/OptimizeBM1684X.cpp b/lib/Dialect/Top/Transforms/ChipOptimize/OptimizeBM1684X.cpp index eb281268d..9e05a6d00 100644 --- a/lib/Dialect/Top/Transforms/ChipOptimize/OptimizeBM1684X.cpp +++ b/lib/Dialect/Top/Transforms/ChipOptimize/OptimizeBM1684X.cpp @@ -102,8 +102,9 @@ class ConvertMatMul2Attention : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(top::MatMulOp op, PatternRewriter &rewriter) const override { + // sd_decoder_pt error in bm1684x/bm1686 + return failure(); auto filter = op.getRight(); - // return failure(); if (module::isWeight(filter) == false) { return failure(); } diff --git a/lib/Dialect/Tpu/Interfaces/BM1684X/Load.cpp b/lib/Dialect/Tpu/Interfaces/BM1684X/Load.cpp index 34b03b419..89f24a832 100644 --- a/lib/Dialect/Tpu/Interfaces/BM1684X/Load.cpp +++ b/lib/Dialect/Tpu/Interfaces/BM1684X/Load.cpp @@ -73,8 +73,8 @@ void tpu::LoadOp::codegen_local_bm1684x(int64_t n_step, int64_t c_step, gdma_format = BM168x::getGdmaFormat(data_type); auto fmt_bytes = BM168x::getFmtBytes(data_type); auto g_addr = module::getAddress(getInput()); - int64_t dhw = D * H * W; - int64_t eu_num = BM168x::eu_num(fmt_bytes); + // int64_t dhw = D * H * W; + // int64_t eu_num = BM168x::eu_num(fmt_bytes); int64_t use_3ic = getUse_3icOptimize(); if (use_3ic < 4 && use_3ic > 0) { auto g_stride = BM168x::getGlobalStride(N, C, H, W); @@ -103,7 +103,9 @@ void tpu::LoadOp::codegen_local_bm1684x(int64_t n_step, int64_t c_step, s_stride.N, s_stride.H, gdma_format, true, GDMA_VALUE_DIR_S2L, pid_node); } - } else if (dhw <= eu_num && (C & 0xff) == 0 && data_type == DTYPE_INT8 && + } +#if 0 + else if (dhw <= eu_num && (C & 0xff) == 0 && data_type == DTYPE_INT8 && real_dslice == D && real_hslice == H && real_wslice == W && real_cslice == C && N == 1) { // optimize coeff load shape @@ -125,7 +127,9 @@ void tpu::LoadOp::codegen_local_bm1684x(int64_t n_step, int64_t c_step, N, C, H, W, nstride, cstride, hstride, wstride, dst_nstride, dst_cstride, dst_hstride, dst_wstride, gdma_format, GDMA_VALUE_DIR_S2L, 0, pid_node); - } else { + } +#endif + else { int64_t c_num_local = ceiling_func(real_cslice, Arch::NPU_NUM); int64_t c_stride = gi.eu_align ? align_up(real_hslice * real_wslice, Arch::eu_num(fmt_bytes)) diff --git a/python/test/test_torch.py b/python/test/test_torch.py index 74a66d484..d8b033133 100755 --- a/python/test/test_torch.py +++ b/python/test/test_torch.py @@ -47,7 +47,7 @@ def __init__(self, "Addmm": (self.test_Addmm, Y, Y, Y), "Arange": (self.test_Arange, Y, Y, Y), "Attention": (self.test_Attention, Y, Y, Y), - "AttentionNew": (self.test_AttentionNew, Y, N, N), + "AttentionNew": (self.test_AttentionNew, N, N, N), "AvgPool1d": (self.test_AvgPool1d, Y, Y, Y), "AvgPool2d": (self.test_AvgPool2d, Y, Y, Y), "AvgPool3d": (self.test_AvgPool3d, Y, Y, Y), diff --git a/python/transform/TFLiteConverter.py b/python/transform/TFLiteConverter.py index 8c1264ac9..87e939c57 100644 --- a/python/transform/TFLiteConverter.py +++ b/python/transform/TFLiteConverter.py @@ -240,6 +240,8 @@ def __init__(self, else: self.output_names = output_names self.input_shapes = [x.shape for x in self.graph.inputs] + for x in self.graph.inputs: + self.addShape(x.name, x.shape) self.output_shapes = [] self.outputs = [] for op in self.graph.operators: @@ -248,6 +250,7 @@ def __init__(self, self.outputs.append(out) self.__nhwc2nchw(out) self.output_shapes.append(out.shape) + self.addShape(out.name, out.shape) self.mlir = MLIRImporter( self.input_shapes, diff --git a/third_party/nntoolchain/lib/libbackend_1686.so b/third_party/nntoolchain/lib/libbackend_1686.so index f9a54f4c4..4135e7242 100755 Binary files a/third_party/nntoolchain/lib/libbackend_1686.so and b/third_party/nntoolchain/lib/libbackend_1686.so differ