From 7051e794ceb6399429ab1b961a13e6876ea93943 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 Jan 2021 15:21:13 -0800 Subject: [PATCH 001/318] Drop the 'git' suffix from various version variables --- libcxx/CMakeLists.txt | 2 +- libcxxabi/CMakeLists.txt | 2 +- libunwind/CMakeLists.txt | 2 +- llvm/CMakeLists.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 4e7e8f978546..9bf1a02f0908 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -29,7 +29,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL project(libcxx CXX C) set(PACKAGE_NAME libcxx) - set(PACKAGE_VERSION 12.0.0git) + set(PACKAGE_VERSION 12.0.0) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index b803347c2a8e..426c855288fc 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -28,7 +28,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXXABI_STANDALONE_B project(libcxxabi CXX C) set(PACKAGE_NAME libcxxabi) - set(PACKAGE_VERSION 11.0.0git) + set(PACKAGE_VERSION 11.0.0) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index 8ae32fbccf4e..48cb8e004e08 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -24,7 +24,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B project(libunwind LANGUAGES C CXX ASM) set(PACKAGE_NAME libunwind) - set(PACKAGE_VERSION 12.0.0git) + set(PACKAGE_VERSION 12.0.0) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 454ec561af9a..277d0fe54d7b 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -14,7 +14,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH) set(LLVM_VERSION_PATCH 0) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) - set(LLVM_VERSION_SUFFIX git) + set(LLVM_VERSION_SUFFIX "") endif() if (NOT PACKAGE_VERSION) From f2a45d31b9c11f2b3e12f161391fe845025b5177 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 Jan 2021 15:17:48 -0800 Subject: [PATCH 002/318] Import workflows from release/11.x branch --- .github/workflows/clang-tests.yml | 43 +++++++++++ .github/workflows/libclc-tests.yml | 53 +++++++++++++ .github/workflows/lld-tests.yml | 43 +++++++++++ .github/workflows/lldb-tests.yml | 48 ++++++++++++ .github/workflows/llvm-tests.yml | 116 +++++++++++++++++++++++++++++ 5 files changed, 303 insertions(+) create mode 100644 .github/workflows/clang-tests.yml create mode 100644 .github/workflows/libclc-tests.yml create mode 100644 .github/workflows/lld-tests.yml create mode 100644 .github/workflows/lldb-tests.yml create mode 100644 .github/workflows/llvm-tests.yml diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml new file mode 100644 index 000000000000..f8ca65e10726 --- /dev/null +++ b/.github/workflows/clang-tests.yml @@ -0,0 +1,43 @@ +name: Clang Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - 'llvm/**' + - '.github/workflows/clang-tests.yml' + pull_request: + paths: + - 'clang/**' + - 'llvm/**' + - '.github/workflows/clang-tests.yml' + +jobs: + build_clang: + name: clang check-all + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Test clang + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release + build_target: check-clang diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml new file mode 100644 index 000000000000..4e8639b1c89a --- /dev/null +++ b/.github/workflows/libclc-tests.yml @@ -0,0 +1,53 @@ +name: libclc Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - 'llvm/**' + - 'libclc/**' + - '.github/workflows/libclc-tests.yml' + pull_request: + paths: + - 'clang/**' + - 'llvm/**' + - 'libclc/**' + - '.github/workflows/libclc-tests.yml' + +jobs: + build_libclc: + name: libclc test + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + # Disable build on windows, because I can't figure out where llvm-config is. + #- windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Build clang + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release + build_target: "" + - name: Build and test libclc + run: | + mkdir libclc-build + cd libclc-build + cmake -G Ninja ../libclc -DLLVM_CONFIG=../build/bin/llvm-config + ninja + ninja test diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml new file mode 100644 index 000000000000..9b4cbe95f231 --- /dev/null +++ b/.github/workflows/lld-tests.yml @@ -0,0 +1,43 @@ +name: LLD Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'lld/**' + - 'llvm/**' + - '.github/workflows/lld-tests.yml' + pull_request: + paths: + - 'lld/**' + - 'llvm/**' + - '.github/workflows/lld-tests.yml' + +jobs: + build_lld: + name: lld check-all + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Test lld + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="lld" -DCMAKE_BUILD_TYPE=Release + build_target: check-lld diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml new file mode 100644 index 000000000000..229e6deece6e --- /dev/null +++ b/.github/workflows/lldb-tests.yml @@ -0,0 +1,48 @@ +name: lldb Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - 'llvm/**' + - 'lldb/**' + - '.github/workflows/lldb-tests.yml' + pull_request: + paths: + - 'clang/**' + - 'llvm/**' + - 'lldb/**' + - '.github/workflows/lldb-tests.yml' + +jobs: + build_lldb: + name: lldb build + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + # macOS build disabled due to: llvm.org/PR46190 + #- macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Build lldb + uses: llvm/actions/build-test-llvm-project@master + with: + # Mac OS requries that libcxx is enabled for lldb tests, so we need to disable them. + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang;lldb" -DCMAKE_BUILD_TYPE=Release -DLLDB_INCLUDE_TESTS=OFF + # check-lldb is not consistent, so we only build lldb. + build_target: "" diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml new file mode 100644 index 000000000000..67f318ad849f --- /dev/null +++ b/.github/workflows/llvm-tests.yml @@ -0,0 +1,116 @@ +name: LLVM Tests + +env: + release_major: 12 + +on: + push: + branches: + - 'release/**' + paths: + - 'llvm/**' + - '.github/workflows/llvm-tests.yml' + pull_request: + paths: + - 'llvm/**' + - '.github/workflows/llvm-tests.yml' + +jobs: + build_llvm: + name: llvm check-all + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Test llvm + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release + + abi-dump: + runs-on: ubuntu-latest + strategy: + matrix: + name: + - build-baseline + - build-latest + include: + - name: build-baseline + # FIXME: Referencing the env context does not work here + # ref: llvmorg-${{ env.release_major }}.0.0 + ref: llvmorg-12.0.0 + repo: llvm/llvm-project + - name: build-latest + ref: ${{ github.sha }} + repo: ${{ github.repository }} + steps: + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - name: Install abi-compliance-checker + run: | + sudo apt-get install abi-dumper autoconf pkg-config + - name: Install universal-ctags + run: | + git clone https://github.com/universal-ctags/ctags.git + cd ctags + ./autogen.sh + ./configure + sudo make install + - name: Download source code + uses: llvm/actions/get-llvm-project-src@master + with: + ref: ${{ matrix.ref }} + repo: ${{ matrix.repo }} + - name: Configure + run: | + mkdir build + cd build + cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" ../llvm + - name: Build + run: ninja -C build libLLVM-${{ env.release_major }}.so + - name: Dump ABI + run: abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers llvm/include -o ${{ matrix.ref }}.abi.tar.gz build/lib/libLLVM-${{ env.release_major }}.so + - name: Upload ABI file + uses: actions/upload-artifact@v1 + with: + name: ${{ matrix.name }} + path: ${{ matrix.ref }}.abi.tar.gz + + abi-compare: + runs-on: ubuntu-latest + needs: + - abi-dump + steps: + - name: Download baseline + uses: actions/download-artifact@v1 + with: + name: build-baseline + - name: Download latest + uses: actions/download-artifact@v1 + with: + name: build-latest + - name: Install abi-compliance-checker + run: sudo apt-get install abi-compliance-checker + - name: Compare ABI + run: abi-compliance-checker -l libLLVM-${{ env.release_major}}.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz + - name: Upload ABI Comparison + if: always() + uses: actions/upload-artifact@v1 + with: + name: compat-report-${{ github.sha }} + path: compat_reports/ From d64226e8fab8fc7b4d947223c61036a60eb6a871 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 27 Jan 2021 15:32:05 +0100 Subject: [PATCH 003/318] [clangd] Work around GCC bug 66735 (cherry picked from commit 12de8e1399fecf691639ba430b3824acb1311e70) --- clang-tools-extra/clangd/ParsedAST.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 403d3fe3e64f..1020282f5ee8 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -316,8 +316,8 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, Check->registerMatchers(&CTFinder); } - ASTDiags.setLevelAdjuster([&, &Cfg(Config::current())]( - DiagnosticsEngine::Level DiagLevel, + const Config& Cfg = Config::current(); + ASTDiags.setLevelAdjuster([&](DiagnosticsEngine::Level DiagLevel, const clang::Diagnostic &Info) { if (Cfg.Diagnostics.SuppressAll || isBuiltinDiagnosticSuppressed(Info.getID(), Cfg.Diagnostics.Suppress)) From ea99c885a63de9af673a5e5cd51f44fb70c83c1b Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 27 Jan 2021 12:24:30 -0800 Subject: [PATCH 004/318] Permit __VA_OPT__ in all language modes and allow it to be detected with #ifdef. These changes are intended to give code a path to move away from the GNU ,##__VA_ARGS__ extension, which is non-conforming in some situations and which we'd like to disable in our conforming mode in those cases. (cherry picked from commit 0436ec2128c9775ba13b0308937238fc79673fdd) --- clang/include/clang/Lex/Preprocessor.h | 19 ++++++++++++ .../include/clang/Lex/VariadicMacroSupport.h | 10 ++---- clang/lib/Lex/PPDirectives.cpp | 5 +++ clang/lib/Lex/PPExpressions.cpp | 5 +++ clang/lib/Lex/PPMacroExpansion.cpp | 6 +++- clang/lib/Lex/Preprocessor.cpp | 19 +++++------- clang/test/Preprocessor/macro_vaopt_check.cpp | 31 ++++++++++++++++++- .../test/Preprocessor/macro_vaopt_expand.cpp | 4 ++- 8 files changed, 78 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 68139cb24b31..ba8bdaa23c4c 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -447,6 +447,25 @@ class Preprocessor { ElseLoc(ElseLoc) {} }; + class IfdefMacroNameScopeRAII { + Preprocessor &PP; + bool VAOPTWasPoisoned; + + public: + IfdefMacroNameScopeRAII(Preprocessor &PP) + : PP(PP), VAOPTWasPoisoned(PP.Ident__VA_OPT__->isPoisoned()) { + PP.Ident__VA_OPT__->setIsPoisoned(false); + } + IfdefMacroNameScopeRAII(const IfdefMacroNameScopeRAII&) = delete; + IfdefMacroNameScopeRAII &operator=(const IfdefMacroNameScopeRAII&) = delete; + ~IfdefMacroNameScopeRAII() { Exit(); } + + void Exit() { + if (VAOPTWasPoisoned) + PP.Ident__VA_OPT__->setIsPoisoned(true); + } + }; + private: friend class ASTReader; friend class MacroArgs; diff --git a/clang/include/clang/Lex/VariadicMacroSupport.h b/clang/include/clang/Lex/VariadicMacroSupport.h index 989e0ac703c9..119f02201fc6 100644 --- a/clang/include/clang/Lex/VariadicMacroSupport.h +++ b/clang/include/clang/Lex/VariadicMacroSupport.h @@ -39,17 +39,14 @@ namespace clang { assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned " "outside an ISO C/C++ variadic " "macro definition!"); - assert( - !Ident__VA_OPT__ || - (Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!")); + assert(Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!"); } /// Client code should call this function just before the Preprocessor is /// about to Lex tokens from the definition of a variadic (ISO C/C++) macro. void enterScope() { Ident__VA_ARGS__->setIsPoisoned(false); - if (Ident__VA_OPT__) - Ident__VA_OPT__->setIsPoisoned(false); + Ident__VA_OPT__->setIsPoisoned(false); } /// Client code should call this function as soon as the Preprocessor has @@ -58,8 +55,7 @@ namespace clang { /// (might be explicitly called, and then reinvoked via the destructor). void exitScope() { Ident__VA_ARGS__->setIsPoisoned(true); - if (Ident__VA_OPT__) - Ident__VA_OPT__->setIsPoisoned(true); + Ident__VA_OPT__->setIsPoisoned(true); } ~VariadicMacroScopeGuard() { exitScope(); } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index d6b03d85913d..e2aa93455ea5 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2928,9 +2928,14 @@ void Preprocessor::HandleIfdefDirective(Token &Result, ++NumIf; Token DirectiveTok = Result; + // __VA_OPT__ is allowed as the operand of #if[n]def. + IfdefMacroNameScopeRAII IfdefMacroNameScope(*this); + Token MacroNameTok; ReadMacroName(MacroNameTok); + IfdefMacroNameScope.Exit(); + // Error reading macro name? If so, diagnostic already issued. if (MacroNameTok.is(tok::eod)) { // Skip code until we get to #endif. This helps with recovery by not diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 8c120c13d7d2..952fb8f121dc 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -104,6 +104,9 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, SourceLocation beginLoc(PeekTok.getLocation()); Result.setBegin(beginLoc); + // __VA_OPT__ is allowed as the operand of 'defined'. + Preprocessor::IfdefMacroNameScopeRAII IfdefMacroNameScope(PP); + // Get the next token, don't expand it. PP.LexUnexpandedNonComment(PeekTok); @@ -122,6 +125,8 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.LexUnexpandedNonComment(PeekTok); } + IfdefMacroNameScope.Exit(); + // If we don't have a pp-identifier now, this is an error. if (PP.CheckMacroName(PeekTok, MU_Other)) return true; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 43d31d6c5732..f6ca04defeb9 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -323,13 +323,16 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) { /// RegisterBuiltinMacro - Register the specified identifier in the identifier /// table and mark it as a builtin macro to be expanded. -static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){ +static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name, + bool Disabled = false) { // Get the identifier. IdentifierInfo *Id = PP.getIdentifierInfo(Name); // Mark it as being a macro that is builtin. MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation()); MI->setIsBuiltinMacro(); + if (Disabled) + MI->DisableMacro(); PP.appendDefMacroDirective(Id, MI); return Id; } @@ -343,6 +346,7 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__"); Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__"); Ident_Pragma = RegisterBuiltinMacro(*this, "_Pragma"); + Ident__VA_OPT__ = RegisterBuiltinMacro(*this, "__VA_OPT__", true); // C++ Standing Document Extensions. if (getLangOpts().CPlusPlus) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 94f1ce91f884..9baba204b324 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -115,23 +115,20 @@ Preprocessor::Preprocessor(std::shared_ptr PPOpts, BuiltinInfo = std::make_unique(); - // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of - // a macro. They get unpoisoned where it is allowed. - (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); - SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); - if (getLangOpts().CPlusPlus20) { - (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); - SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); - } else { - Ident__VA_OPT__ = nullptr; - } - // Initialize the pragma handlers. RegisterBuiltinPragmas(); // Initialize builtin macros like __LINE__ and friends. RegisterBuiltinMacros(); + // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of + // a macro. They get unpoisoned where it is allowed. Note that we model + // __VA_OPT__ as a builtin macro to allow #ifdef and friends to detect it. + (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_ARGS__, diag::ext_pp_bad_vaargs_use); + Ident__VA_OPT__->setIsPoisoned(); + SetPoisonReason(Ident__VA_OPT__, diag::ext_pp_bad_vaopt_use); + if(LangOpts.Borland) { Ident__exception_info = getIdentifierInfo("_exception_info"); Ident___exception_info = getIdentifierInfo("__exception_info"); diff --git a/clang/test/Preprocessor/macro_vaopt_check.cpp b/clang/test/Preprocessor/macro_vaopt_check.cpp index fb52e9946af3..84f3b85871dd 100644 --- a/clang/test/Preprocessor/macro_vaopt_check.cpp +++ b/clang/test/Preprocessor/macro_vaopt_check.cpp @@ -1,4 +1,20 @@ -// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++2a +// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++20 +// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++11 +// RUN: %clang_cc1 -x c %s -Eonly -verify -Wno-all -pedantic -std=c99 + +// Check that support for __VA_OPT__ can be detected by #ifdef. +#ifndef __VA_OPT__ +#error should be defined +#endif + +#ifdef __VA_OPT__ +#else +#error should be defined +#endif + +#if !defined(__VA_OPT__) +#error should be defined +#endif //expected-error@+1{{missing '('}} #define V1(...) __VA_OPT__ @@ -62,3 +78,16 @@ #define V1(...) __VA_OPT__ ((()) #undef V1 +// __VA_OPT__ can't appear anywhere else. +#if __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} +#endif + +#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} + +// Check defined(__VA_OPT__) doesn't leave __VA_OPT__ poisoned. +#define Z(...) (0 __VA_OPT__(|| 1)) +#if defined(__VA_OPT__) && Z(hello) +// OK +#else +#error bad +#endif diff --git a/clang/test/Preprocessor/macro_vaopt_expand.cpp b/clang/test/Preprocessor/macro_vaopt_expand.cpp index 7ec4f6128cfa..5eb0facb83f7 100644 --- a/clang/test/Preprocessor/macro_vaopt_expand.cpp +++ b/clang/test/Preprocessor/macro_vaopt_expand.cpp @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -E %s -pedantic -std=c++2a | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E %s -pedantic -std=c++20 | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E %s -pedantic -std=c++11 | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E -x c %s -pedantic -std=c99 | FileCheck -strict-whitespace %s #define LPAREN ( #define RPAREN ) From 9ea2a107ca4055a3a4960cb6dffb84b7f43bd8ea Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 27 Jan 2021 13:14:02 -0800 Subject: [PATCH 005/318] Don't allow __VA_OPT__ to be detected by #ifdef. More study has discovered this to not actually be useful: because current C++20 implementations reject `#ifdef __VA_OPT__`, this can't really be used as a feature-test mechanism. And it's not too hard to detect __VA_OPT__ without this, for example: #define THIRD_ARG(a, b, c, ...) c #define HAS_VA_OPT(...) THIRD_ARG(__VA_OPT__(,), 1, 0, ) #if HAS_VA_OPT(?) Partially reverts 0436ec2128c9775ba13b0308937238fc79673fdd. (cherry picked from commit 5dfa37a76153f2a18ac7fe30721cc1332b672ea2) --- clang/include/clang/Lex/Preprocessor.h | 19 -------------- clang/lib/Lex/PPDirectives.cpp | 5 ---- clang/lib/Lex/PPExpressions.cpp | 5 ---- clang/lib/Lex/PPMacroExpansion.cpp | 6 +---- clang/lib/Lex/Preprocessor.cpp | 15 ++++++----- clang/test/Preprocessor/macro_vaopt_check.cpp | 25 +++---------------- 6 files changed, 11 insertions(+), 64 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index ba8bdaa23c4c..68139cb24b31 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -447,25 +447,6 @@ class Preprocessor { ElseLoc(ElseLoc) {} }; - class IfdefMacroNameScopeRAII { - Preprocessor &PP; - bool VAOPTWasPoisoned; - - public: - IfdefMacroNameScopeRAII(Preprocessor &PP) - : PP(PP), VAOPTWasPoisoned(PP.Ident__VA_OPT__->isPoisoned()) { - PP.Ident__VA_OPT__->setIsPoisoned(false); - } - IfdefMacroNameScopeRAII(const IfdefMacroNameScopeRAII&) = delete; - IfdefMacroNameScopeRAII &operator=(const IfdefMacroNameScopeRAII&) = delete; - ~IfdefMacroNameScopeRAII() { Exit(); } - - void Exit() { - if (VAOPTWasPoisoned) - PP.Ident__VA_OPT__->setIsPoisoned(true); - } - }; - private: friend class ASTReader; friend class MacroArgs; diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index e2aa93455ea5..d6b03d85913d 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2928,14 +2928,9 @@ void Preprocessor::HandleIfdefDirective(Token &Result, ++NumIf; Token DirectiveTok = Result; - // __VA_OPT__ is allowed as the operand of #if[n]def. - IfdefMacroNameScopeRAII IfdefMacroNameScope(*this); - Token MacroNameTok; ReadMacroName(MacroNameTok); - IfdefMacroNameScope.Exit(); - // Error reading macro name? If so, diagnostic already issued. if (MacroNameTok.is(tok::eod)) { // Skip code until we get to #endif. This helps with recovery by not diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 952fb8f121dc..8c120c13d7d2 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -104,9 +104,6 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, SourceLocation beginLoc(PeekTok.getLocation()); Result.setBegin(beginLoc); - // __VA_OPT__ is allowed as the operand of 'defined'. - Preprocessor::IfdefMacroNameScopeRAII IfdefMacroNameScope(PP); - // Get the next token, don't expand it. PP.LexUnexpandedNonComment(PeekTok); @@ -125,8 +122,6 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.LexUnexpandedNonComment(PeekTok); } - IfdefMacroNameScope.Exit(); - // If we don't have a pp-identifier now, this is an error. if (PP.CheckMacroName(PeekTok, MU_Other)) return true; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index f6ca04defeb9..43d31d6c5732 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -323,16 +323,13 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) { /// RegisterBuiltinMacro - Register the specified identifier in the identifier /// table and mark it as a builtin macro to be expanded. -static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name, - bool Disabled = false) { +static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){ // Get the identifier. IdentifierInfo *Id = PP.getIdentifierInfo(Name); // Mark it as being a macro that is builtin. MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation()); MI->setIsBuiltinMacro(); - if (Disabled) - MI->DisableMacro(); PP.appendDefMacroDirective(Id, MI); return Id; } @@ -346,7 +343,6 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__"); Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__"); Ident_Pragma = RegisterBuiltinMacro(*this, "_Pragma"); - Ident__VA_OPT__ = RegisterBuiltinMacro(*this, "__VA_OPT__", true); // C++ Standing Document Extensions. if (getLangOpts().CPlusPlus) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 9baba204b324..177786d90390 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -115,20 +115,19 @@ Preprocessor::Preprocessor(std::shared_ptr PPOpts, BuiltinInfo = std::make_unique(); + // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of + // a macro. They get unpoisoned where it is allowed. + (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); + (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); + // Initialize the pragma handlers. RegisterBuiltinPragmas(); // Initialize builtin macros like __LINE__ and friends. RegisterBuiltinMacros(); - // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of - // a macro. They get unpoisoned where it is allowed. Note that we model - // __VA_OPT__ as a builtin macro to allow #ifdef and friends to detect it. - (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); - SetPoisonReason(Ident__VA_ARGS__, diag::ext_pp_bad_vaargs_use); - Ident__VA_OPT__->setIsPoisoned(); - SetPoisonReason(Ident__VA_OPT__, diag::ext_pp_bad_vaopt_use); - if(LangOpts.Borland) { Ident__exception_info = getIdentifierInfo("_exception_info"); Ident___exception_info = getIdentifierInfo("__exception_info"); diff --git a/clang/test/Preprocessor/macro_vaopt_check.cpp b/clang/test/Preprocessor/macro_vaopt_check.cpp index 84f3b85871dd..c5c0ac518bc0 100644 --- a/clang/test/Preprocessor/macro_vaopt_check.cpp +++ b/clang/test/Preprocessor/macro_vaopt_check.cpp @@ -2,20 +2,6 @@ // RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++11 // RUN: %clang_cc1 -x c %s -Eonly -verify -Wno-all -pedantic -std=c99 -// Check that support for __VA_OPT__ can be detected by #ifdef. -#ifndef __VA_OPT__ -#error should be defined -#endif - -#ifdef __VA_OPT__ -#else -#error should be defined -#endif - -#if !defined(__VA_OPT__) -#error should be defined -#endif - //expected-error@+1{{missing '('}} #define V1(...) __VA_OPT__ #undef V1 @@ -82,12 +68,7 @@ #if __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} #endif -#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} - -// Check defined(__VA_OPT__) doesn't leave __VA_OPT__ poisoned. -#define Z(...) (0 __VA_OPT__(|| 1)) -#if defined(__VA_OPT__) && Z(hello) -// OK -#else -#error bad +#ifdef __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} #endif + +#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} From 9df2b64fc5fa911ca59b3f646806ca3fd6787c2d Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 27 Jan 2021 16:07:51 -0800 Subject: [PATCH 006/318] [cxx_status] Mark P0732R2 as only 'partial', not 'Clang 12', as some of the changes were reverted. (cherry picked from commit 727fc31a9898dfb89610ca1bc05ff86204a77177) --- clang/www/cxx_status.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 685f32dbe0d3..fc3340ec9d96 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1005,7 +1005,7 @@

C++20 implementation status

Class types as non-type template parameters P0732R2 - Clang 12 + Partial P1907R1 From 8d22f25d155113f9cfdf3952dc49088c820f2a77 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 27 Jan 2021 16:28:04 -0800 Subject: [PATCH 007/318] [llvm-c] Move LLVMX86_AMXTypeKind & LLVMPoisonValueValueKind to the bottom to avoid value changes compared with LLVM<=11 Fixes PR48905 (cherry picked from commit 6612c2bb68becda5504099b48082c844503c6d4c) --- llvm/include/llvm-c/Core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 8274213aa839..a78df16ca404 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -160,10 +160,10 @@ typedef enum { LLVMVectorTypeKind, /**< Fixed width SIMD vector type */ LLVMMetadataTypeKind, /**< Metadata */ LLVMX86_MMXTypeKind, /**< X86 MMX */ - LLVMX86_AMXTypeKind, /**< X86 AMX */ LLVMTokenTypeKind, /**< Tokens */ LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */ - LLVMBFloatTypeKind /**< 16 bit brain floating point type */ + LLVMBFloatTypeKind, /**< 16 bit brain floating point type */ + LLVMX86_AMXTypeKind /**< X86 AMX */ } LLVMTypeKind; typedef enum { @@ -270,7 +270,6 @@ typedef enum { LLVMConstantVectorValueKind, LLVMUndefValueValueKind, - LLVMPoisonValueValueKind, LLVMConstantAggregateZeroValueKind, LLVMConstantDataArrayValueKind, LLVMConstantDataVectorValueKind, @@ -283,6 +282,7 @@ typedef enum { LLVMInlineAsmValueKind, LLVMInstructionValueKind, + LLVMPoisonValueValueKind } LLVMValueKind; typedef enum { From 8364f5369eeeb2da8db2bae7716c549930d8df93 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 27 Jan 2021 10:59:28 -0800 Subject: [PATCH 008/318] Revert "Suppress non-conforming GNU paste extension in all standard-conforming modes" This reverts commit f4537935dcdbf390c863591cf556e76c3abab9c1. This reverts commit b43c26d036dcbf7a6881f39e4434cf059364022a. This GNU and MSVC extension turns out to be very popular. Most projects are not using C++20, so cannot use the new __VA_OPT__ feature to be standards conformant. The other workaround, using -std=gnu*, enables too many language extensions and isn't viable. Until there is a way for users to get the behavior provided by the `, ## __VA_ARGS__` extension in the -std=c++17 and earlier language modes, we need to revert this. (cherry picked from commit 61a66e4b5ec18e9e73c2f6334f6b7f7dd4bca77e) --- clang/lib/Lex/TokenLexer.cpp | 10 +++++----- clang/test/Preprocessor/macro_fn_comma_swallow2.c | 5 ----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 97cb2cf0bb8c..da5681aaf478 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -148,12 +148,12 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( return false; // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if - // __VA_ARGS__ is empty, but not in strict mode where there are no - // named arguments, where it remains. With GNU extensions, it is removed - // regardless of named arguments. + // __VA_ARGS__ is empty, but not in strict C99 mode where there are no + // named arguments, where it remains. In all other modes, including C99 + // with GNU extensions, it is removed regardless of named arguments. // Microsoft also appears to support this extension, unofficially. - if (!PP.getLangOpts().GNUMode && !PP.getLangOpts().MSVCCompat && - Macro->getNumParams() < 2) + if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode + && Macro->getNumParams() < 2) return false; // Is a comma available to be removed? diff --git a/clang/test/Preprocessor/macro_fn_comma_swallow2.c b/clang/test/Preprocessor/macro_fn_comma_swallow2.c index 4e4960ca7f18..93ab2b83664a 100644 --- a/clang/test/Preprocessor/macro_fn_comma_swallow2.c +++ b/clang/test/Preprocessor/macro_fn_comma_swallow2.c @@ -1,16 +1,11 @@ // Test the __VA_ARGS__ comma swallowing extensions of various compiler dialects. // RUN: %clang_cc1 -E %s | FileCheck -check-prefix=GCC -strict-whitespace %s -// RUN: %clang_cc1 -E -std=c90 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -std=c99 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -std=c11 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -x c++ %s | FileCheck -check-prefix=GCC -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -std=c++03 %s | FileCheck -check-prefix=C99 -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -std=c++11 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -std=gnu99 %s | FileCheck -check-prefix=GCC -strict-whitespace %s // RUN: %clang_cc1 -E -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -std=c++11 -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s // RUN: %clang_cc1 -E -DNAMED %s | FileCheck -check-prefix=GCC -strict-whitespace %s // RUN: %clang_cc1 -E -std=c99 -DNAMED %s | FileCheck -check-prefix=C99 -strict-whitespace %s From b0085d205b3063c332a080599830ef0500cb6924 Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Mon, 7 Dec 2020 10:26:49 -0500 Subject: [PATCH 009/318] Itanium Mangling: Mangle `__alignof__` differently than `alignof`. The two operations have acted differently since Clang 8, but were unfortunately mangled the same. The new mangling uses new "vendor extended expression" syntax proposed in https://github.com/itanium-cxx-abi/cxx-abi/issues/112 GCC had the same mangling problem, https://gcc.gnu.org/PR88115, and will hopefully be switching to the same mangling as implemented here. Additionally, fix the mangling of `__uuidof` to use the new extension syntax, instead of its previous nonstandard special-case. Adjusts the demangler accordingly. Differential Revision: https://reviews.llvm.org/D93922 (cherry picked from commit 9c7aeaebb3ac1b94200b59b111742cb6b8f090c2) --- clang/lib/AST/ItaniumMangle.cpp | 103 ++++++++++++------ clang/test/CodeGenCXX/mangle-alignof.cpp | 25 +++++ .../CodeGenCXX/microsoft-uuidof-mangling.cpp | 44 +++++--- libcxxabi/src/demangle/ItaniumDemangle.h | 68 ++++++------ libcxxabi/test/test_demangle.pass.cpp | 14 ++- llvm/include/llvm/Demangle/ItaniumDemangle.h | 68 ++++++------ 6 files changed, 211 insertions(+), 111 deletions(-) create mode 100644 clang/test/CodeGenCXX/mangle-alignof.cpp diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 6c8d5687c64a..668733a4be34 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -558,6 +558,7 @@ class CXXNameMangler { unsigned NumTemplateArgs); void mangleTemplateArgs(TemplateName TN, const TemplateArgumentList &AL); void mangleTemplateArg(TemplateArgument A, bool NeedExactType); + void mangleTemplateArgExpr(const Expr *E); void mangleValueInTemplateArg(QualType T, const APValue &V, bool TopLevel, bool NeedExactType = false); @@ -3528,8 +3529,8 @@ void CXXNameMangler::mangleType(const DependentSizedMatrixType *T) { Out << "u" << VendorQualifier.size() << VendorQualifier; Out << "I"; - mangleTemplateArg(T->getRowExpr(), false); - mangleTemplateArg(T->getColumnExpr(), false); + mangleTemplateArgExpr(T->getRowExpr()); + mangleTemplateArgExpr(T->getColumnExpr()); mangleType(T->getElementType()); Out << "E"; } @@ -3916,6 +3917,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // ::= ds # expr.*expr // ::= sZ # size of a parameter pack // ::= sZ # size of a function parameter pack + // ::= u * E # vendor extended expression // ::= // ::= L E # integer literal // ::= L E # floating literal @@ -4007,14 +4009,26 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { case Expr::CXXUuidofExprClass: { const CXXUuidofExpr *UE = cast(E); - if (UE->isTypeOperand()) { - QualType UuidT = UE->getTypeOperand(Context.getASTContext()); - Out << "u8__uuidoft"; - mangleType(UuidT); + // As of clang 12, uuidof uses the vendor extended expression + // mangling. Previously, it used a special-cased nonstandard extension. + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + Out << "u8__uuidof"; + if (UE->isTypeOperand()) + mangleType(UE->getTypeOperand(Context.getASTContext())); + else + mangleTemplateArgExpr(UE->getExprOperand()); + Out << 'E'; } else { - Expr *UuidExp = UE->getExprOperand(); - Out << "u8__uuidofz"; - mangleExpression(UuidExp, Arity); + if (UE->isTypeOperand()) { + QualType UuidT = UE->getTypeOperand(Context.getASTContext()); + Out << "u8__uuidoft"; + mangleType(UuidT); + } else { + Expr *UuidExp = UE->getExprOperand(); + Out << "u8__uuidofz"; + mangleExpression(UuidExp, Arity); + } } break; } @@ -4312,13 +4326,39 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; } + auto MangleAlignofSizeofArg = [&] { + if (SAE->isArgumentType()) { + Out << 't'; + mangleType(SAE->getArgumentType()); + } else { + Out << 'z'; + mangleExpression(SAE->getArgumentExpr()); + } + }; + switch(SAE->getKind()) { case UETT_SizeOf: Out << 's'; + MangleAlignofSizeofArg(); break; case UETT_PreferredAlignOf: + // As of clang 12, we mangle __alignof__ differently than alignof. (They + // have acted differently since Clang 8, but were previously mangled the + // same.) + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + Out << "u11__alignof__"; + if (SAE->isArgumentType()) + mangleType(SAE->getArgumentType()); + else + mangleTemplateArgExpr(SAE->getArgumentExpr()); + Out << 'E'; + break; + } + LLVM_FALLTHROUGH; case UETT_AlignOf: Out << 'a'; + MangleAlignofSizeofArg(); break; case UETT_VecStep: { DiagnosticsEngine &Diags = Context.getDiags(); @@ -4336,13 +4376,6 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { return; } } - if (SAE->isArgumentType()) { - Out << 't'; - mangleType(SAE->getArgumentType()); - } else { - Out << 'z'; - mangleExpression(SAE->getArgumentExpr()); - } break; } @@ -4971,23 +5004,7 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { mangleType(A.getAsTemplateOrTemplatePattern()); break; case TemplateArgument::Expression: { - // It's possible to end up with a DeclRefExpr here in certain - // dependent cases, in which case we should mangle as a - // declaration. - const Expr *E = A.getAsExpr()->IgnoreParenImpCasts(); - if (const DeclRefExpr *DRE = dyn_cast(E)) { - const ValueDecl *D = DRE->getDecl(); - if (isa(D) || isa(D)) { - Out << 'L'; - mangle(D); - Out << 'E'; - break; - } - } - - Out << 'X'; - mangleExpression(E); - Out << 'E'; + mangleTemplateArgExpr(A.getAsExpr()); break; } case TemplateArgument::Integral: @@ -5044,6 +5061,26 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { } } +void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) { + // It's possible to end up with a DeclRefExpr here in certain + // dependent cases, in which case we should mangle as a + // declaration. + E = E->IgnoreParenImpCasts(); + if (const DeclRefExpr *DRE = dyn_cast(E)) { + const ValueDecl *D = DRE->getDecl(); + if (isa(D) || isa(D)) { + Out << 'L'; + mangle(D); + Out << 'E'; + return; + } + } + + Out << 'X'; + mangleExpression(E); + Out << 'E'; +} + /// Determine whether a given value is equivalent to zero-initialization for /// the purpose of discarding a trailing portion of a 'tl' mangling. /// diff --git a/clang/test/CodeGenCXX/mangle-alignof.cpp b/clang/test/CodeGenCXX/mangle-alignof.cpp new file mode 100644 index 000000000000..0a65c7e87a2d --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-alignof.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++11 -Wno-gnu-alignof-expression -emit-llvm %s -o - -triple=%itanium_abi_triple | FileCheck %s --check-prefix=CHECK-NEW +// RUN: %clang_cc1 -std=c++11 -Wno-gnu-alignof-expression -emit-llvm %s -o - -triple=%itanium_abi_triple -fclang-abi-compat=11 | FileCheck %s --check-prefix=CHECK-OLD + +// Verify the difference in mangling for alignof and __alignof__ in a new ABI +// compat mode. + +template void f1(decltype(alignof(T))) {} +template void f1(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f1IiEvDTatT_E +// CHECK-NEW: void @_Z2f1IiEvDTatT_E + +template void f2(decltype(__alignof__(T))) {} +template void f2(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f2IiEvDTatT_E +// CHECK-NEW: void @_Z2f2IiEvDTu11__alignof__T_E + +template void f3(decltype(alignof(T(0)))) {} +template void f3(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f3IiEvDTazcvT_Li0EE +// CHECK-NEW: void @_Z2f3IiEvDTazcvT_Li0EE + +template void f4(decltype(__alignof__(T(0)))) {} +template void f4(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f4IiEvDTazcvT_Li0EE +// CHECK-NEW: void @_Z2f4IiEvDTu11__alignof__XcvT_Li0EEEE diff --git a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp index ec26be292acc..321f65cacc71 100644 --- a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp +++ b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions | FileCheck %s --check-prefixes=CHECK,CHECK-V12 +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions -fclang-abi-compat=11 | FileCheck %s --check-prefixes=CHECK,CHECK-V11 // rdar://17784718 typedef struct _GUID @@ -24,11 +25,16 @@ struct __declspec(uuid("EAFA1952-66F8-438B-8FBA-AF1BBAE42191")) TestStruct struct __declspec(uuid("EAFA1952-66F8-438B-8FBA-AF1BBAE42191")) OtherStruct {}; -template void test_uuidofType(void *arg[sizeof(__uuidof(T))] = 0) {} +template void test_uuidofType(decltype(__uuidof(T)) arg) {} -template void test_uuidofExpr(void *arg[sizeof(__uuidof(typename T::member))] = 0) {} +template void test_uuidofExpr(decltype(__uuidof(T::member)) arg) {} -struct HasMember { typedef TestStruct member; }; +struct HasMember { + TestStruct member; +}; + +// Ensure that mangling an "expr-primary" argument is handled properly. +template void test_uuidofExpr2(decltype(T{}, __uuidof(HasMember::member)) arg) {} template struct UUIDTestTwo { UUIDTestTwo(); }; @@ -39,19 +45,29 @@ int main(int argc, const char * argv[]) // type had better not mention TestStruct or OtherStruct! UUIDTestTwo<__uuidof(TestStruct)> uuidof_test2; UUIDTestTwo<__uuidof(OtherStruct)> uuidof_test3; - test_uuidofType(); - test_uuidofExpr(); + test_uuidofType(GUID{}); + test_uuidofExpr(GUID{}); + test_uuidofExpr2(GUID{}); return 0; } // CHECK: define{{.*}} i32 @main -// CHECK: call void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: call void @_Z15test_uuidofTypeI10TestStructEvPPv(i8** null) -// CHECK: call void @_Z15test_uuidofExprI9HasMemberEvPPv(i8** null) - +// CHECK: call void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev( +// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev( +// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev( +// CHECK-V11: call void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E( +// CHECK-V12: call void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE( +// CHECK-V11: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( +// CHECK-V12: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( +// CHECK-V11: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( +// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( +// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvPPv -// CHECK: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvPPv +// CHECK-V11: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E( +// CHECK-V12: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE( +// CHECK-V11: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( +// CHECK-V12: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( +// CHECK-V11: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( +// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( +// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC2Ev diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 6bfc02d15379..e5fca98f9271 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -96,7 +96,6 @@ X(InitListExpr) \ X(FoldExpr) \ X(ThrowExpr) \ - X(UUIDOfExpr) \ X(BoolExpr) \ X(StringLiteral) \ X(LambdaExpr) \ @@ -2035,21 +2034,6 @@ class ThrowExpr : public Node { } }; -// MSVC __uuidof extension, generated by clang in -fms-extensions mode. -class UUIDOfExpr : public Node { - Node *Operand; -public: - UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {} - - template void match(Fn F) const { F(Operand); } - - void printLeft(OutputStream &S) const override { - S << "__uuidof("; - Operand->print(S); - S << ")"; - } -}; - class BoolExpr : public Node { bool Value; @@ -5013,6 +4997,43 @@ Node *AbstractManglingParser::parseExpr() { } } return nullptr; + case 'u': { + ++First; + Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr); + if (!Name) + return nullptr; + // Special case legacy __uuidof mangling. The 't' and 'z' appear where the + // standard encoding expects a , and would be otherwise be + // interpreted as node 'short' or 'ellipsis'. However, neither + // __uuidof(short) nor __uuidof(...) can actually appear, so there is no + // actual conflict here. + if (Name->getBaseName() == "__uuidof") { + if (numLeft() < 2) + return nullptr; + if (*First == 't') { + ++First; + Node *Ty = getDerived().parseType(); + if (!Ty) + return nullptr; + return make(Name, makeNodeArray(&Ty, &Ty + 1)); + } + if (*First == 'z') { + ++First; + Node *Ex = getDerived().parseExpr(); + if (!Ex) + return nullptr; + return make(Name, makeNodeArray(&Ex, &Ex + 1)); + } + } + size_t ExprsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseTemplateArg(); + if (E == nullptr) + return E; + Names.push_back(E); + } + return make(Name, popTrailingNodeArray(ExprsBegin)); + } case '1': case '2': case '3': @@ -5024,21 +5045,6 @@ Node *AbstractManglingParser::parseExpr() { case '9': return getDerived().parseUnresolvedName(); } - - if (consumeIf("u8__uuidoft")) { - Node *Ty = getDerived().parseType(); - if (!Ty) - return nullptr; - return make(Ty); - } - - if (consumeIf("u8__uuidofz")) { - Node *Ex = getDerived().parseExpr(); - if (!Ex) - return nullptr; - return make(Ex); - } - return nullptr; } diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp index 3954fdba048e..512cc3928fdd 100644 --- a/libcxxabi/test/test_demangle.pass.cpp +++ b/libcxxabi/test/test_demangle.pass.cpp @@ -29776,8 +29776,18 @@ const char* cases[][2] = // Vendor extension types are substitution candidates. {"_Z1fu3fooS_", "f(foo, foo)"}, - {"_ZN3FooIXu8__uuidofzdeL_Z3sucEEEC1Ev", "Foo<__uuidof(*(suc))>::Foo()"}, - {"_ZN3FooIXu8__uuidoft13SomeUUIDClassEEC1Ev", "Foo<__uuidof(SomeUUIDClass)>::Foo()"}, + // alignof with type and expression, and __alignof__ with the same. + {"_Z2f1IiEvDTatT_E", "void f1(decltype(alignof (int)))"}, + {"_Z2f3IiEvDTazcvT_Li0EE", "void f3(decltype(alignof ((int)(0))))"}, + {"_Z2f2IiEvDTu11__alignof__T_EE", "void f2(decltype(__alignof__(int)))"}, + {"_Z2f4IiEvDTu11__alignof__XcvT_Li0EEEE", "void f4(decltype(__alignof__((int)(0))))"}, + + // Legacy nonstandard mangling for __uuidof. + {"_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E", "void test_uuidofType(decltype(__uuidof(TestStruct)))"}, + {"_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE", "void test_uuidofExpr(decltype(__uuidof(HasMember::member)))"}, + // Current __uuidof mangling using vendor extended expression. + {"_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE", "void test_uuidofType(decltype(__uuidof(TestStruct)))"}, + {"_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE", "void test_uuidofExpr(decltype(__uuidof(HasMember::member)))"}, // C++2a char8_t: {"_ZTSPDu", "typeinfo name for char8_t*"}, diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 6bfc02d15379..e5fca98f9271 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -96,7 +96,6 @@ X(InitListExpr) \ X(FoldExpr) \ X(ThrowExpr) \ - X(UUIDOfExpr) \ X(BoolExpr) \ X(StringLiteral) \ X(LambdaExpr) \ @@ -2035,21 +2034,6 @@ class ThrowExpr : public Node { } }; -// MSVC __uuidof extension, generated by clang in -fms-extensions mode. -class UUIDOfExpr : public Node { - Node *Operand; -public: - UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {} - - template void match(Fn F) const { F(Operand); } - - void printLeft(OutputStream &S) const override { - S << "__uuidof("; - Operand->print(S); - S << ")"; - } -}; - class BoolExpr : public Node { bool Value; @@ -5013,6 +4997,43 @@ Node *AbstractManglingParser::parseExpr() { } } return nullptr; + case 'u': { + ++First; + Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr); + if (!Name) + return nullptr; + // Special case legacy __uuidof mangling. The 't' and 'z' appear where the + // standard encoding expects a , and would be otherwise be + // interpreted as node 'short' or 'ellipsis'. However, neither + // __uuidof(short) nor __uuidof(...) can actually appear, so there is no + // actual conflict here. + if (Name->getBaseName() == "__uuidof") { + if (numLeft() < 2) + return nullptr; + if (*First == 't') { + ++First; + Node *Ty = getDerived().parseType(); + if (!Ty) + return nullptr; + return make(Name, makeNodeArray(&Ty, &Ty + 1)); + } + if (*First == 'z') { + ++First; + Node *Ex = getDerived().parseExpr(); + if (!Ex) + return nullptr; + return make(Name, makeNodeArray(&Ex, &Ex + 1)); + } + } + size_t ExprsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseTemplateArg(); + if (E == nullptr) + return E; + Names.push_back(E); + } + return make(Name, popTrailingNodeArray(ExprsBegin)); + } case '1': case '2': case '3': @@ -5024,21 +5045,6 @@ Node *AbstractManglingParser::parseExpr() { case '9': return getDerived().parseUnresolvedName(); } - - if (consumeIf("u8__uuidoft")) { - Node *Ty = getDerived().parseType(); - if (!Ty) - return nullptr; - return make(Ty); - } - - if (consumeIf("u8__uuidofz")) { - Node *Ex = getDerived().parseExpr(); - if (!Ex) - return nullptr; - return make(Ex); - } - return nullptr; } From 7da92afbf08e90960f7e5dee00bbf6ef8f323a5c Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Sun, 24 Jan 2021 15:50:15 -0500 Subject: [PATCH 010/318] Itanium Mangling: Fix handling of in . Previously, we were emitting an extraneous X .. E in around an if the template argument was constructed from an expression (rather than an already-evaluated literal value). In such a case, we would then e.g. emit 'XLi0EE' instead of 'Li0E'. We had one special-case for DeclRefExpr expressions, in particular, to omit them the mangled-name without the surrounding X/E. However, unfortunately, that special case also triggered for ParmVarDecl (a subtype of VarDecl), and _incorrectly_ emitted 'L_Z .. E' instead of the proper 'Xfp_E'. This change causes mangleExpression itself to be responsible for emitting X/E around non-primary expressions, which removes the special-case, and corrects both these problems. Differential Revision: https://reviews.llvm.org/D95487 (cherry picked from commit 8ca33605ff0cfc536f5c6710fb5f6378bf11959a) --- clang/lib/AST/ItaniumMangle.cpp | 223 +++++++++++++----- clang/test/CodeGenCXX/clang-abi-compat.cpp | 94 +++++++- clang/test/CodeGenCXX/mangle-abi-tag.cpp | 2 +- clang/test/CodeGenCXX/mangle-concept.cpp | 4 +- clang/test/CodeGenCXX/mangle-template.cpp | 4 +- clang/test/CodeGenCXX/mangle.cpp | 2 +- clang/test/CodeGenCXX/matrix-type.cpp | 16 +- .../CodeGenCXX/microsoft-uuidof-mangling.cpp | 6 +- 8 files changed, 259 insertions(+), 92 deletions(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 668733a4be34..54e2f361a9f1 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -546,8 +546,8 @@ class CXXNameMangler { unsigned knownArity); void mangleCastExpression(const Expr *E, StringRef CastEncoding); void mangleInitListElements(const InitListExpr *InitList); - void mangleDeclRefExpr(const NamedDecl *D); - void mangleExpression(const Expr *E, unsigned Arity = UnknownArity); + void mangleExpression(const Expr *E, unsigned Arity = UnknownArity, + bool AsTemplateArg = false); void mangleCXXCtorType(CXXCtorType T, const CXXRecordDecl *InheritedFrom); void mangleCXXDtorType(CXXDtorType T); @@ -3872,33 +3872,8 @@ void CXXNameMangler::mangleInitListElements(const InitListExpr *InitList) { mangleExpression(InitList->getInit(i)); } -void CXXNameMangler::mangleDeclRefExpr(const NamedDecl *D) { - switch (D->getKind()) { - default: - // ::= L E # external name - Out << 'L'; - mangle(D); - Out << 'E'; - break; - - case Decl::ParmVar: - mangleFunctionParam(cast(D)); - break; - - case Decl::EnumConstant: { - const EnumConstantDecl *ED = cast(D); - mangleIntegerLiteral(ED->getType(), ED->getInitVal()); - break; - } - - case Decl::NonTypeTemplateParm: - const NonTypeTemplateParmDecl *PD = cast(D); - mangleTemplateParameter(PD->getDepth(), PD->getIndex()); - break; - } -} - -void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { +void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, + bool AsTemplateArg) { // ::= // ::= // ::= @@ -3912,6 +3887,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // ::= at # alignof (a type) // ::= // ::= + // ::= fpT # 'this' expression (part of ) // ::= sr # dependent name // ::= sr # dependent template-id // ::= ds # expr.*expr @@ -3920,11 +3896,55 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // ::= u * E # vendor extended expression // ::= // ::= L E # integer literal - // ::= L E # floating literal + // ::= L E # floating literal + // ::= L E # string literal + // ::= L E # nullptr literal "LDnE" + // ::= L 0 E # null pointer template argument + // ::= L _ E # complex floating point literal (C99); not used by clang // ::= L E # external name - // ::= fpT # 'this' expression QualType ImplicitlyConvertedToType; + // A top-level expression that's not needs to be wrapped in + // X...E in a template arg. + bool IsPrimaryExpr = true; + auto NotPrimaryExpr = [&] { + if (AsTemplateArg && IsPrimaryExpr) + Out << 'X'; + IsPrimaryExpr = false; + }; + + auto MangleDeclRefExpr = [&](const NamedDecl *D) { + switch (D->getKind()) { + default: + // ::= L E # external name + Out << 'L'; + mangle(D); + Out << 'E'; + break; + + case Decl::ParmVar: + NotPrimaryExpr(); + mangleFunctionParam(cast(D)); + break; + + case Decl::EnumConstant: { + // + const EnumConstantDecl *ED = cast(D); + mangleIntegerLiteral(ED->getType(), ED->getInitVal()); + break; + } + + case Decl::NonTypeTemplateParm: + NotPrimaryExpr(); + const NonTypeTemplateParmDecl *PD = cast(D); + mangleTemplateParameter(PD->getDepth(), PD->getIndex()); + break; + } + }; + + // 'goto recurse' is used when handling a simple "unwrapping" node which + // produces no output, where ImplicitlyConvertedToType and AsTemplateArg need + // to be preserved. recurse: switch (E->getStmtClass()) { case Expr::NoStmtClass: @@ -3996,6 +4016,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { case Expr::SourceLocExprClass: case Expr::BuiltinBitCastExprClass: { + NotPrimaryExpr(); if (!NullOut) { // As bad as this diagnostic is, it's better than crashing. DiagnosticsEngine &Diags = Context.getDiags(); @@ -4003,11 +4024,13 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { "cannot yet mangle expression type %0"); Diags.Report(E->getExprLoc(), DiagID) << E->getStmtClassName() << E->getSourceRange(); + return; } break; } case Expr::CXXUuidofExprClass: { + NotPrimaryExpr(); const CXXUuidofExpr *UE = cast(E); // As of clang 12, uuidof uses the vendor extended expression // mangling. Previously, it used a special-cased nonstandard extension. @@ -4027,7 +4050,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } else { Expr *UuidExp = UE->getExprOperand(); Out << "u8__uuidofz"; - mangleExpression(UuidExp, Arity); + mangleExpression(UuidExp); } } break; @@ -4035,13 +4058,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // Even gcc-4.5 doesn't mangle this. case Expr::BinaryConditionalOperatorClass: { + NotPrimaryExpr(); DiagnosticsEngine &Diags = Context.getDiags(); unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "?: operator with omitted middle operand cannot be mangled"); Diags.Report(E->getExprLoc(), DiagID) << E->getStmtClassName() << E->getSourceRange(); - break; + return; } // These are used for internal purposes and cannot be meaningfully mangled. @@ -4049,6 +4073,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { llvm_unreachable("cannot mangle opaque value; mangling wrong thing?"); case Expr::InitListExprClass: { + NotPrimaryExpr(); Out << "il"; mangleInitListElements(cast(E)); Out << "E"; @@ -4056,6 +4081,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::DesignatedInitExprClass: { + NotPrimaryExpr(); auto *DIE = cast(E); for (const auto &Designator : DIE->designators()) { if (Designator.isFieldDesignator()) { @@ -4077,27 +4103,27 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXDefaultArgExprClass: - mangleExpression(cast(E)->getExpr(), Arity); - break; + E = cast(E)->getExpr(); + goto recurse; case Expr::CXXDefaultInitExprClass: - mangleExpression(cast(E)->getExpr(), Arity); - break; + E = cast(E)->getExpr(); + goto recurse; case Expr::CXXStdInitializerListExprClass: - mangleExpression(cast(E)->getSubExpr(), Arity); - break; + E = cast(E)->getSubExpr(); + goto recurse; case Expr::SubstNonTypeTemplateParmExprClass: - mangleExpression(cast(E)->getReplacement(), - Arity); - break; + E = cast(E)->getReplacement(); + goto recurse; case Expr::UserDefinedLiteralClass: // We follow g++'s approach of mangling a UDL as a call to the literal // operator. case Expr::CXXMemberCallExprClass: // fallthrough case Expr::CallExprClass: { + NotPrimaryExpr(); const CallExpr *CE = cast(E); // ::= cp * E @@ -4128,6 +4154,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXNewExprClass: { + NotPrimaryExpr(); const CXXNewExpr *New = cast(E); if (New->isGlobalNew()) Out << "gs"; Out << (New->isArray() ? "na" : "nw"); @@ -4163,6 +4190,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXPseudoDestructorExprClass: { + NotPrimaryExpr(); const auto *PDE = cast(E); if (const Expr *Base = PDE->getBase()) mangleMemberExprBase(Base, PDE->isArrow()); @@ -4189,6 +4217,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::MemberExprClass: { + NotPrimaryExpr(); const MemberExpr *ME = cast(E); mangleMemberExpr(ME->getBase(), ME->isArrow(), ME->getQualifier(), nullptr, @@ -4199,6 +4228,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::UnresolvedMemberExprClass: { + NotPrimaryExpr(); const UnresolvedMemberExpr *ME = cast(E); mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(), ME->isArrow(), ME->getQualifier(), nullptr, @@ -4209,6 +4239,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXDependentScopeMemberExprClass: { + NotPrimaryExpr(); const CXXDependentScopeMemberExpr *ME = cast(E); mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(), @@ -4221,6 +4252,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::UnresolvedLookupExprClass: { + NotPrimaryExpr(); const UnresolvedLookupExpr *ULE = cast(E); mangleUnresolvedName(ULE->getQualifier(), ULE->getName(), ULE->getTemplateArgs(), ULE->getNumTemplateArgs(), @@ -4229,6 +4261,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXUnresolvedConstructExprClass: { + NotPrimaryExpr(); const CXXUnresolvedConstructExpr *CE = cast(E); unsigned N = CE->getNumArgs(); @@ -4239,7 +4272,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { mangleType(CE->getType()); mangleInitListElements(IL); Out << "E"; - return; + break; } Out << "cv"; @@ -4251,14 +4284,17 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXConstructExprClass: { + // An implicit cast is silent, thus may contain . const auto *CE = cast(E); if (!CE->isListInitialization() || CE->isStdInitListInitialization()) { assert( CE->getNumArgs() >= 1 && (CE->getNumArgs() == 1 || isa(CE->getArg(1))) && "implicit CXXConstructExpr must have one argument"); - return mangleExpression(cast(E)->getArg(0)); + E = cast(E)->getArg(0); + goto recurse; } + NotPrimaryExpr(); Out << "il"; for (auto *E : CE->arguments()) mangleExpression(E); @@ -4267,6 +4303,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXTemporaryObjectExprClass: { + NotPrimaryExpr(); const auto *CE = cast(E); unsigned N = CE->getNumArgs(); bool List = CE->isListInitialization(); @@ -4296,17 +4333,20 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXScalarValueInitExprClass: + NotPrimaryExpr(); Out << "cv"; mangleType(E->getType()); Out << "_E"; break; case Expr::CXXNoexceptExprClass: + NotPrimaryExpr(); Out << "nx"; mangleExpression(cast(E)->getOperand()); break; case Expr::UnaryExprOrTypeTraitExprClass: { + // Non-instantiation-dependent traits are an integer literal. const UnaryExprOrTypeTraitExpr *SAE = cast(E); if (!SAE->isInstantiationDependent()) { @@ -4326,6 +4366,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; } + NotPrimaryExpr(); // But otherwise, they are not. + auto MangleAlignofSizeofArg = [&] { if (SAE->isArgumentType()) { Out << 't'; @@ -4380,6 +4422,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXThrowExprClass: { + NotPrimaryExpr(); const CXXThrowExpr *TE = cast(E); // ::= tw # throw expression // ::= tr # rethrow @@ -4393,6 +4436,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXTypeidExprClass: { + NotPrimaryExpr(); const CXXTypeidExpr *TIE = cast(E); // ::= ti # typeid (type) // ::= te # typeid (expression) @@ -4407,6 +4451,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXDeleteExprClass: { + NotPrimaryExpr(); const CXXDeleteExpr *DE = cast(E); // ::= [gs] dl # [::] delete expr // ::= [gs] da # [::] delete [] expr @@ -4417,6 +4462,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::UnaryOperatorClass: { + NotPrimaryExpr(); const UnaryOperator *UO = cast(E); mangleOperatorName(UnaryOperator::getOverloadedOperator(UO->getOpcode()), /*Arity=*/1); @@ -4425,6 +4471,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ArraySubscriptExprClass: { + NotPrimaryExpr(); const ArraySubscriptExpr *AE = cast(E); // Array subscript is treated as a syntactically weird form of @@ -4436,6 +4483,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::MatrixSubscriptExprClass: { + NotPrimaryExpr(); const MatrixSubscriptExpr *ME = cast(E); Out << "ixix"; mangleExpression(ME->getBase()); @@ -4446,6 +4494,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { case Expr::CompoundAssignOperatorClass: // fallthrough case Expr::BinaryOperatorClass: { + NotPrimaryExpr(); const BinaryOperator *BO = cast(E); if (BO->getOpcode() == BO_PtrMemD) Out << "ds"; @@ -4458,6 +4507,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXRewrittenBinaryOperatorClass: { + NotPrimaryExpr(); // The mangled form represents the original syntax. CXXRewrittenBinaryOperator::DecomposedForm Decomposed = cast(E)->getDecomposedForm(); @@ -4469,6 +4519,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ConditionalOperatorClass: { + NotPrimaryExpr(); const ConditionalOperator *CO = cast(E); mangleOperatorName(OO_Conditional, /*Arity=*/3); mangleExpression(CO->getCond()); @@ -4484,19 +4535,22 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ObjCBridgedCastExprClass: { + NotPrimaryExpr(); // Mangle ownership casts as a vendor extended operator __bridge, // __bridge_transfer, or __bridge_retain. StringRef Kind = cast(E)->getBridgeKindName(); Out << "v1U" << Kind.size() << Kind; + mangleCastExpression(E, "cv"); + break; } - // Fall through to mangle the cast itself. - LLVM_FALLTHROUGH; case Expr::CStyleCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "cv"); break; case Expr::CXXFunctionalCastExprClass: { + NotPrimaryExpr(); auto *Sub = cast(E)->getSubExpr()->IgnoreImplicit(); // FIXME: Add isImplicit to CXXConstructExpr. if (auto *CCE = dyn_cast(Sub)) @@ -4516,22 +4570,28 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXStaticCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "sc"); break; case Expr::CXXDynamicCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "dc"); break; case Expr::CXXReinterpretCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "rc"); break; case Expr::CXXConstCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "cc"); break; case Expr::CXXAddrspaceCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "ac"); break; case Expr::CXXOperatorCallExprClass: { + NotPrimaryExpr(); const CXXOperatorCallExpr *CE = cast(E); unsigned NumArgs = CE->getNumArgs(); // A CXXOperatorCallExpr for OO_Arrow models only semantics, not syntax @@ -4545,9 +4605,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ParenExprClass: - mangleExpression(cast(E)->getSubExpr(), Arity); - break; - + E = cast(E)->getSubExpr(); + goto recurse; case Expr::ConceptSpecializationExprClass: { // ::= L E # external name @@ -4561,10 +4620,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::DeclRefExprClass: - mangleDeclRefExpr(cast(E)->getDecl()); + // MangleDeclRefExpr helper handles primary-vs-nonprimary + MangleDeclRefExpr(cast(E)->getDecl()); break; case Expr::SubstNonTypeTemplateParmPackExprClass: + NotPrimaryExpr(); // FIXME: not clear how to mangle this! // template class A { // template void foo(U (&x)[N]...); @@ -4573,14 +4634,16 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; case Expr::FunctionParmPackExprClass: { + NotPrimaryExpr(); // FIXME: not clear how to mangle this! const FunctionParmPackExpr *FPPE = cast(E); Out << "v110_SUBSTPACK"; - mangleDeclRefExpr(FPPE->getParameterPack()); + MangleDeclRefExpr(FPPE->getParameterPack()); break; } case Expr::DependentScopeDeclRefExprClass: { + NotPrimaryExpr(); const DependentScopeDeclRefExpr *DRE = cast(E); mangleUnresolvedName(DRE->getQualifier(), DRE->getDeclName(), DRE->getTemplateArgs(), DRE->getNumTemplateArgs(), @@ -4589,24 +4652,27 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXBindTemporaryExprClass: - mangleExpression(cast(E)->getSubExpr()); - break; + E = cast(E)->getSubExpr(); + goto recurse; case Expr::ExprWithCleanupsClass: - mangleExpression(cast(E)->getSubExpr(), Arity); - break; + E = cast(E)->getSubExpr(); + goto recurse; case Expr::FloatingLiteralClass: { + // const FloatingLiteral *FL = cast(E); mangleFloatLiteral(FL->getType(), FL->getValue()); break; } case Expr::FixedPointLiteralClass: + // Currently unimplemented -- might be in future? mangleFixedPointLiteral(); break; case Expr::CharacterLiteralClass: + // Out << 'L'; mangleType(E->getType()); Out << cast(E)->getValue(); @@ -4615,18 +4681,21 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // FIXME. __objc_yes/__objc_no are mangled same as true/false case Expr::ObjCBoolLiteralExprClass: + // Out << "Lb"; Out << (cast(E)->getValue() ? '1' : '0'); Out << 'E'; break; case Expr::CXXBoolLiteralExprClass: + // Out << "Lb"; Out << (cast(E)->getValue() ? '1' : '0'); Out << 'E'; break; case Expr::IntegerLiteralClass: { + // llvm::APSInt Value(cast(E)->getValue()); if (E->getType()->isSignedIntegerType()) Value.setIsSigned(true); @@ -4635,6 +4704,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ImaginaryLiteralClass: { + // const ImaginaryLiteral *IE = cast(E); // Mangle as if a complex literal. // Proposal from David Vandevoorde, 2010.06.30. @@ -4658,6 +4728,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::StringLiteralClass: { + // // Revised proposal from David Vandervoorde, 2010.07.15. Out << 'L'; assert(isa(E->getType())); @@ -4667,21 +4738,25 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::GNUNullExprClass: + // // Mangle as if an integer literal 0. mangleIntegerLiteral(E->getType(), llvm::APSInt(32)); break; case Expr::CXXNullPtrLiteralExprClass: { + // Out << "LDnE"; break; } case Expr::PackExpansionExprClass: + NotPrimaryExpr(); Out << "sp"; mangleExpression(cast(E)->getPattern()); break; case Expr::SizeOfPackExprClass: { + NotPrimaryExpr(); auto *SPE = cast(E); if (SPE->isPartiallySubstituted()) { Out << "sP"; @@ -4706,12 +4781,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; } - case Expr::MaterializeTemporaryExprClass: { - mangleExpression(cast(E)->getSubExpr()); - break; - } + case Expr::MaterializeTemporaryExprClass: + E = cast(E)->getSubExpr(); + goto recurse; case Expr::CXXFoldExprClass: { + NotPrimaryExpr(); auto *FE = cast(E); if (FE->isLeftFold()) Out << (FE->getInit() ? "fL" : "fl"); @@ -4733,27 +4808,34 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXThisExprClass: + NotPrimaryExpr(); Out << "fpT"; break; case Expr::CoawaitExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_await"; mangleExpression(cast(E)->getOperand()); break; case Expr::DependentCoawaitExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_await"; mangleExpression(cast(E)->getOperand()); break; case Expr::CoyieldExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_yield"; mangleExpression(cast(E)->getOperand()); break; } + + if (AsTemplateArg && !IsPrimaryExpr) + Out << 'E'; } /// Mangle an expression which refers to a parameter variable. @@ -5003,10 +5085,9 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { Out << "Dp"; mangleType(A.getAsTemplateOrTemplatePattern()); break; - case TemplateArgument::Expression: { + case TemplateArgument::Expression: mangleTemplateArgExpr(A.getAsExpr()); break; - } case TemplateArgument::Integral: mangleIntegerLiteral(A.getIntegralType(), A.getAsIntegral()); break; @@ -5062,9 +5143,22 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { } void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) { - // It's possible to end up with a DeclRefExpr here in certain - // dependent cases, in which case we should mangle as a - // declaration. + ASTContext &Ctx = Context.getASTContext(); + if (Ctx.getLangOpts().getClangABICompat() > LangOptions::ClangABI::Ver11) { + mangleExpression(E, UnknownArity, /*AsTemplateArg=*/true); + return; + } + + // Prior to Clang 12, we didn't omit the X .. E around + // correctly in cases where the template argument was + // constructed from an expression rather than an already-evaluated + // literal. In such a case, we would then e.g. emit 'XLi0EE' instead of + // 'Li0E'. + // + // We did special-case DeclRefExpr to attempt to DTRT for that one + // expression-kind, but while doing so, unfortunately handled ParmVarDecl + // (subtype of VarDecl) _incorrectly_, and emitted 'L_Z .. E' instead of + // the proper 'Xfp_E'. E = E->IgnoreParenImpCasts(); if (const DeclRefExpr *DRE = dyn_cast(E)) { const ValueDecl *D = DRE->getDecl(); @@ -5075,7 +5169,6 @@ void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) { return; } } - Out << 'X'; mangleExpression(E); Out << 'E'; diff --git a/clang/test/CodeGenCXX/clang-abi-compat.cpp b/clang/test/CodeGenCXX/clang-abi-compat.cpp index 46e7ed812cbc..caf06bd5f9f6 100644 --- a/clang/test/CodeGenCXX/clang-abi-compat.cpp +++ b/clang/test/CodeGenCXX/clang-abi-compat.cpp @@ -1,12 +1,23 @@ -// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fclang-abi-compat=3.0 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.0 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.8 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.9 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=4.0 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=5 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=11 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s -// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fclang-abi-compat=latest %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,V12 %s -// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fclang-abi-compat=latest %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17 %s +// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - -Wno-c++11-extensions \ +// RUN: | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.8 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.9 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=4.0 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=5 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17,PRE12-CXX20 %s +// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - -Wno-c++11-extensions \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,V12 %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20 %s typedef __attribute__((vector_size(8))) long long v1xi64; void clang39(v1xi64) {} @@ -55,3 +66,68 @@ template void clang12_b(); // CHECK: @_Z9clang12_cIXadL_Z3arrEEEvv template void clang12_c() {} template void clang12_c<&arr>(); + + +/// Tests for changes in clang12: +namespace expr_primary { +struct A { + template struct Int {}; + template struct Ref {}; +}; + +/// Check various DeclRefExpr manglings + +// PRE12: @_ZN12expr_primary5test1INS_1AEEEvNT_3IntIXLi1EEEE +// V12: @_ZN12expr_primary5test1INS_1AEEEvNT_3IntILi1EEE +template void test1(typename T::template Int<1> a) {} +template void test1(typename A::template Int<1> a); + +enum Enum { EnumVal = 4 }; +int Global; + +// PRE12: @_ZN12expr_primary5test2INS_1AEEEvNT_3IntIXLNS_4EnumE4EEEE +// V12: @_ZN12expr_primary5test2INS_1AEEEvNT_3IntILNS_4EnumE4EEE +template void test2(typename T::template Int a) {} +template void test2(typename A::template Int<4> a); + +// CHECK: @_ZN12expr_primary5test3ILi3EEEvNS_1A3IntIXT_EEE +template void test3(typename A::template Int a) {} +template void test3<3>(A::Int<3> a); + +#if __cplusplus >= 202002L +// CHECK-CXX20: @_ZN12expr_primary5test4INS_1AEEEvNT_3RefIL_ZNS_6GlobalEEEE +template void test4(typename T::template Ref<(Global)> a) {} +template void test4(typename A::template Ref a); + +struct B { + struct X { + constexpr X(double) {} + constexpr X(int&) {} + }; + template struct Y {}; +}; + +// PRE12-CXX20: _ZN12expr_primary5test5INS_1BEEEvNT_1YIXLd3ff0000000000000EEEE +// V12-CXX20: _ZN12expr_primary5test5INS_1BEEEvNT_1YILd3ff0000000000000EEE +template void test5(typename T::template Y<1.0>) { } +template void test5(typename B::Y<1.0>); + +// PRE12-CXX20: @_ZN12expr_primary5test6INS_1BEEENT_1YIL_ZZNS_5test6EiE1bEEEi +// V12-CXX20: @_ZN12expr_primary5test6INS_1BEEENT_1YIXfp_EEEi +template auto test6(int b) -> typename T::template Y { return {}; } +template auto test6(int b) -> B::Y; +#endif + +/// Verify non-dependent type-traits within a dependent template arg. + +// PRE12: @_ZN12expr_primary5test7INS_1AEEEvNT_3IntIXLm1EEEE +// V12: @_ZN12expr_primary5test7INS_1AEEEvNT_3IntILm1EEE +template void test7(typename T::template Int a) {} +template void test7(A::Int<1>); + +// PRE12: @_ZN12expr_primary5test8ILi2EEEvu11matrix_typeIXLi1EEXT_EiE +// V12: @_ZN12expr_primary5test8ILi2EEEvu11matrix_typeILi1EXT_EiE +template using matrix1xN = int __attribute__((matrix_type(1, N))); +template void test8(matrix1xN a) {} +template void test8<2>(matrix1xN<2> a); +} diff --git a/clang/test/CodeGenCXX/mangle-abi-tag.cpp b/clang/test/CodeGenCXX/mangle-abi-tag.cpp index 5d84096d24cd..9e26604a2c44 100644 --- a/clang/test/CodeGenCXX/mangle-abi-tag.cpp +++ b/clang/test/CodeGenCXX/mangle-abi-tag.cpp @@ -225,7 +225,7 @@ namespace pr30440 { template void g(F); template auto h(A ...a)->decltype (g (0, g < a > (a) ...)) { } -// CHECK-DAG: define {{.*}} @_ZN7pr304401hIJEEEDTcl1gLi0Espcl1gIL_ZZNS_1hEDpT_E1aEEfp_EEES2_( +// CHECK-DAG: define {{.*}} @_ZN7pr304401hIJEEEDTcl1gLi0Espcl1gIXfp_EEfp_EEEDpT_( void pr30440_test () { h(); diff --git a/clang/test/CodeGenCXX/mangle-concept.cpp b/clang/test/CodeGenCXX/mangle-concept.cpp index b0fcd586727e..e60e6348a5f6 100644 --- a/clang/test/CodeGenCXX/mangle-concept.cpp +++ b/clang/test/CodeGenCXX/mangle-concept.cpp @@ -6,11 +6,11 @@ template struct S {}; template concept C = true; template S> f0() { return S>{}; } template S> f0<>(); -// CHECK: @_ZN5test12f0IiEENS_1SIXL_ZNS_1CIT_EEEEEEv( +// CHECK: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( } template struct S {}; template concept C = true; template S> f0() { return S>{}; } template S> f0<>(); -// CHECK: @_Z2f0IiE1SIXL_Z1CIT_EEEEv( +// CHECK: @_Z2f0IiE1SIL_Z1CIT_EEEv( diff --git a/clang/test/CodeGenCXX/mangle-template.cpp b/clang/test/CodeGenCXX/mangle-template.cpp index 40688de7e12e..9b80a6d64695 100644 --- a/clang/test/CodeGenCXX/mangle-template.cpp +++ b/clang/test/CodeGenCXX/mangle-template.cpp @@ -270,7 +270,7 @@ namespace test17 { // Note: there is no J...E here, because we can't form a pack argument, and // the 5u and 6u are mangled with the original type 'j' (unsigned int) not // with the resolved type 'i' (signed int). - // CHECK: define {{.*}} @_ZN6test171hILi4EJLi1ELi2ELi3EEEEvNS_1XIXspT0_EXLj5EEXT_EXLj6EEEE + // CHECK: define {{.*}} @_ZN6test171hILi4EJLi1ELi2ELi3EEEEvNS_1XIXspT0_ELj5EXT_ELj6EEE template void h(X) {} void i() { h<4, 1, 2, 3>({}); } @@ -323,7 +323,7 @@ namespace partially_dependent_template_args { // callee is unresolved, the rest mangle the converted argument Lj0E // because the callee is resolved. void h() { - // CHECK: @_ZN33partially_dependent_template_args5test22g1INS0_1XEEEvDTcl1fIXLi0EEEcvT__EEE + // CHECK: @_ZN33partially_dependent_template_args5test22g1INS0_1XEEEvDTcl1fILi0EEcvT__EEE g1({}); // CHECK: @_ZN33partially_dependent_template_args5test22g2IiEEvDTplclL_ZNS0_1fILj0EEEiNS0_1XEEilEEcvT__EE g2({}); diff --git a/clang/test/CodeGenCXX/mangle.cpp b/clang/test/CodeGenCXX/mangle.cpp index f8ea9960a5c5..6cec33e3758e 100644 --- a/clang/test/CodeGenCXX/mangle.cpp +++ b/clang/test/CodeGenCXX/mangle.cpp @@ -1123,7 +1123,7 @@ namespace test56 { namespace test57 { struct X { template int f(); } x; template void f(decltype(x.f<0>() + N)) {} - // CHECK-LABEL: @_ZN6test571fILi0EEEvDTplcldtL_ZNS_1xEE1fIXLi0EEEET_E + // CHECK-LABEL: @_ZN6test571fILi0EEEvDTplcldtL_ZNS_1xEE1fILi0EEET_E template void f<0>(int); } diff --git a/clang/test/CodeGenCXX/matrix-type.cpp b/clang/test/CodeGenCXX/matrix-type.cpp index 9bde12e13b86..9e715e10ce1c 100644 --- a/clang/test/CodeGenCXX/matrix-type.cpp +++ b/clang/test/CodeGenCXX/matrix-type.cpp @@ -215,14 +215,14 @@ void test_template_deduction() { // CHECK-NEXT: %m4 = alloca [144 x float], align 4 // CHECK-NEXT: %v = alloca %struct.selector.3, align 1 // CHECK-NEXT: %undef.agg.tmp4 = alloca %struct.selector.3, align 1 - // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeIXLm10EEXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m0) + // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m0) // CHECK-NEXT: call void @_Z10use_matrixIiE8selectorILi2EERu11matrix_typeILm10ELm10ET_E([100 x i32]* nonnull align 4 dereferenceable(400) %m1) - // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_EXLm10EET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m2) + // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m2) // CHECK-NEXT: call void @_Z10use_matrixIiLm12ELm12EE8selectorILi0EERu11matrix_typeIXT0_EXT1_ET_E([144 x i32]* nonnull align 4 dereferenceable(576) %m3) // CHECK-NEXT: call void @_Z10use_matrixILm12ELm12EE8selectorILi4EERu11matrix_typeIXT_EXT0_EfE([144 x float]* nonnull align 4 dereferenceable(576) %m4) // CHECK-NEXT: ret void - // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeIXLm10EEXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) + // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) // CHECK-NEXT: entry: // CHECK-NEXT: %m.addr = alloca [120 x i32]*, align 8 // CHECK-NEXT: store [120 x i32]* %m, [120 x i32]** %m.addr, align 8 @@ -236,7 +236,7 @@ void test_template_deduction() { // CHECK-NEXT: call void @llvm.trap() // CHECK-NEXT: unreachable - // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_EXLm10EET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) + // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) // CHECK-NEXT: entry: // CHECK-NEXT: %m.addr = alloca [120 x i32]*, align 8 // CHECK-NEXT: store [120 x i32]* %m, [120 x i32]** %m.addr, align 8 @@ -277,10 +277,10 @@ void test_auto_t() { // CHECK-LABEL: define{{.*}} void @_Z11test_auto_tv() // CHECK-NEXT: entry: // CHECK-NEXT: %m = alloca [130 x i32], align 4 - // CHECK-NEXT: call void @_Z3fooILm13EEvRu11matrix_typeIXT_EXLm10EEiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) + // CHECK-NEXT: call void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) // CHECK-NEXT: ret void - // CHECK-LABEL: define linkonce_odr void @_Z3fooILm13EEvRu11matrix_typeIXT_EXLm10EEiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) + // CHECK-LABEL: define linkonce_odr void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) // CHECK-NEXT: entry: // CHECK-NEXT: %m.addr = alloca [130 x i32]*, align 8 // CHECK-NEXT: store [130 x i32]* %m, [130 x i32]** %m.addr, align 8 @@ -326,7 +326,7 @@ void test_use_matrix_2() { // CHECK-NEXT: store <40 x float> %call, <40 x float>* %0, align 4 // CHECK-NEXT: call void @_Z12use_matrix_2ILm2ELm12EE8selectorILi0EERu11matrix_typeIXplT_Li2EEXdvT0_Li2EEiERu11matrix_typeIXT_EXT0_EfE([24 x i32]* nonnull align 4 dereferenceable(96) %m1, [24 x float]* nonnull align 4 dereferenceable(96) %m2) // CHECK-NEXT: call void @_Z12use_matrix_2ILm5ELm8EE8selectorILi1EERu11matrix_typeIXplT_T0_EXT0_EiERu11matrix_typeIXT_EXmiT0_T_EfE([104 x i32]* nonnull align 4 dereferenceable(416) %m3, [15 x float]* nonnull align 4 dereferenceable(60) %m4) - // CHECK-NEXT: %call2 = call <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_EXLm10EEiE([50 x i32]* nonnull align 4 dereferenceable(200) %m5) + // CHECK-NEXT: %call2 = call <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m5) // CHECK-NEXT: %1 = bitcast [20 x float]* %r4 to <20 x float>* // CHECK-NEXT: store <20 x float> %call2, <20 x float>* %1, align 4 // CHECK-NEXT: call void @_Z12use_matrix_3ILm6EE8selectorILi2EERu11matrix_typeIXmiT_Li2EEXT_EiE([24 x i32]* nonnull align 4 dereferenceable(96) %m1) @@ -357,7 +357,7 @@ void test_use_matrix_2() { // CHECK-NEXT: call void @llvm.trap() // CHECK-NEXT: unreachable - // CHECK-LABEL: define linkonce_odr <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_EXLm10EEiE([50 x i32]* nonnull align 4 dereferenceable(200) %m1) + // CHECK-LABEL: define linkonce_odr <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m1) // CHECK-NEXT: entry: // CHECK-NEXT: %m1.addr = alloca [50 x i32]*, align 8 // CHECK-NEXT: store [50 x i32]* %m1, [50 x i32]** %m1.addr, align 8 diff --git a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp index 321f65cacc71..5c02b1eb014c 100644 --- a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp +++ b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp @@ -60,14 +60,12 @@ int main(int argc, const char * argv[]) // CHECK-V11: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( // CHECK-V12: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( // CHECK-V11: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( -// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( -// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ +// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofL_ZN9HasMember6memberEEEE( // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev // CHECK-V11: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E( // CHECK-V12: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE( // CHECK-V11: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( // CHECK-V12: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( // CHECK-V11: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( -// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( -// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ +// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofL_ZN9HasMember6memberEEEE( // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC2Ev From 0b7b698fecd37415a635a586e5ca159ab0b8872f Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Sun, 24 Jan 2021 16:23:58 -0500 Subject: [PATCH 011/318] Itanium Mangling: In 'enable_if', omit X/E around . The Clang enable_if extension is mangled as an , which is supposed to contain . However, we were unconditionally emitting X/E around its arguments, neglecting the fact that should be emitted directly without the surrounding X/E. Differential Revision: https://reviews.llvm.org/D95488 (cherry picked from commit a7246ba02a8923f316419a62d836dbe1c0b437bd) --- clang/lib/AST/ItaniumMangle.cpp | 14 +++++++-- clang/test/CodeGen/enable_if.c | 34 +++++++++++----------- clang/test/CodeGenCXX/clang-abi-compat.cpp | 5 ++++ clang/test/CodeGenCXX/enable_if.cpp | 2 +- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 54e2f361a9f1..4420f6a2c1c3 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -727,9 +727,17 @@ void CXXNameMangler::mangleFunctionEncodingBareType(const FunctionDecl *FD) { EnableIfAttr *EIA = dyn_cast(*I); if (!EIA) continue; - Out << 'X'; - mangleExpression(EIA->getCond()); - Out << 'E'; + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + mangleTemplateArgExpr(EIA->getCond()); + } else { + // Prior to Clang 12, we hardcoded the X/E around enable-if's argument, + // even though should not include an X/E around + // . + Out << 'X'; + mangleExpression(EIA->getCond()); + Out << 'E'; + } } Out << 'E'; FunctionTypeDepth.pop(Saved); diff --git a/clang/test/CodeGen/enable_if.c b/clang/test/CodeGen/enable_if.c index 14550b9e2db9..327a201cdeba 100644 --- a/clang/test/CodeGen/enable_if.c +++ b/clang/test/CodeGen/enable_if.c @@ -31,22 +31,22 @@ void bar(int m) __attribute__((overloadable, enable_if(m > 0, ""))); void bar(int m) __attribute__((overloadable, enable_if(1, ""))); // CHECK-LABEL: define{{.*}} void @test2 void test2() { - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi void (*p)(int) = bar; - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi void (*p2)(int) = &bar; - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi p = bar; - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi p = &bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) void *vp1 = (void*)&bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) void *vp2 = (void*)bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) vp1 = (void*)&bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) vp1 = (void*)bar; } @@ -54,13 +54,13 @@ void baz(int m) __attribute__((overloadable, enable_if(1, ""))); void baz(int m) __attribute__((overloadable)); // CHECK-LABEL: define{{.*}} void @test3 void test3() { - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi void (*p)(int) = baz; - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi void (*p2)(int) = &baz; - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi p = baz; - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi p = &baz; } @@ -71,13 +71,13 @@ void qux(int m) __attribute__((overloadable, enable_if(1, ""), void qux(int m) __attribute__((overloadable, enable_if(1, ""))); // CHECK-LABEL: define{{.*}} void @test4 void test4() { - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi void (*p)(int) = qux; - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi void (*p2)(int) = &qux; - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi p = qux; - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi p = &qux; } @@ -90,6 +90,6 @@ void test5() { int foo(char *i __attribute__((pass_object_size(0)))) __attribute__((enable_if(1, ""), overloadable)); - // CHECK: call i32 @_Z3fooUa9enable_ifIXLi1EEEPcU17pass_object_size0 + // CHECK: call i32 @_Z3fooUa9enable_ifILi1EEPcU17pass_object_size0 foo((void*)0); } diff --git a/clang/test/CodeGenCXX/clang-abi-compat.cpp b/clang/test/CodeGenCXX/clang-abi-compat.cpp index caf06bd5f9f6..80311aa320fe 100644 --- a/clang/test/CodeGenCXX/clang-abi-compat.cpp +++ b/clang/test/CodeGenCXX/clang-abi-compat.cpp @@ -130,4 +130,9 @@ template void test7(A::Int<1>); template using matrix1xN = int __attribute__((matrix_type(1, N))); template void test8(matrix1xN a) {} template void test8<2>(matrix1xN<2> a); + +// PRE12: @_ZN12expr_primary5test9EUa9enable_ifIXLi1EEEv +// V12: @_ZN12expr_primary5test9EUa9enable_ifILi1EEv +void test9(void) __attribute__((enable_if(1, ""))) {} + } diff --git a/clang/test/CodeGenCXX/enable_if.cpp b/clang/test/CodeGenCXX/enable_if.cpp index 4e7707aaeed9..70386b87fcee 100644 --- a/clang/test/CodeGenCXX/enable_if.cpp +++ b/clang/test/CodeGenCXX/enable_if.cpp @@ -5,7 +5,7 @@ int test5(int); template T test5(T) __attribute__((enable_if(1, "better than non-template"))); -// CHECK: @_Z5test5IiEUa9enable_ifIXLi1EEET_S0_ +// CHECK: @_Z5test5IiEUa9enable_ifILi1EET_S0_ int (*Ptr)(int) = &test5; // Test itanium mangling for attribute enable_if From de3396d89d998769c3310c23bdd49babade9d874 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 28 Jan 2021 15:30:21 -0800 Subject: [PATCH 012/318] workflows: Update branch names Also remove main-brancy-sync workflow that was removed from the main branch. --- .github/workflows/clang-tests.yml | 6 +++--- .github/workflows/libclc-tests.yml | 6 +++--- .github/workflows/lld-tests.yml | 6 +++--- .github/workflows/lldb-tests.yml | 6 +++--- .github/workflows/llvm-tests.yml | 10 +++++----- .github/workflows/main-branch-sync.yml | 25 ------------------------- 6 files changed, 17 insertions(+), 42 deletions(-) delete mode 100644 .github/workflows/main-branch-sync.yml diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml index f8ca65e10726..af0b5eabeeda 100644 --- a/.github/workflows/clang-tests.yml +++ b/.github/workflows/clang-tests.yml @@ -28,16 +28,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Test clang - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release build_target: check-clang diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml index 4e8639b1c89a..2f1eb2939ea2 100644 --- a/.github/workflows/libclc-tests.yml +++ b/.github/workflows/libclc-tests.yml @@ -31,16 +31,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Build clang - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release build_target: "" diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml index 9b4cbe95f231..bdf0c2fcd886 100644 --- a/.github/workflows/lld-tests.yml +++ b/.github/workflows/lld-tests.yml @@ -28,16 +28,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Test lld - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="lld" -DCMAKE_BUILD_TYPE=Release build_target: check-lld diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml index 229e6deece6e..93fddc2de8c6 100644 --- a/.github/workflows/lldb-tests.yml +++ b/.github/workflows/lldb-tests.yml @@ -31,16 +31,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Build lldb - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: # Mac OS requries that libcxx is enabled for lldb tests, so we need to disable them. cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang;lldb" -DCMAKE_BUILD_TYPE=Release -DLLDB_INCLUDE_TESTS=OFF diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 67f318ad849f..675383407d64 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -29,16 +29,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Test llvm - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release @@ -60,7 +60,7 @@ jobs: repo: ${{ github.repository }} steps: - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - name: Install abi-compliance-checker run: | sudo apt-get install abi-dumper autoconf pkg-config @@ -72,7 +72,7 @@ jobs: ./configure sudo make install - name: Download source code - uses: llvm/actions/get-llvm-project-src@master + uses: llvm/actions/get-llvm-project-src@main with: ref: ${{ matrix.ref }} repo: ${{ matrix.repo }} diff --git a/.github/workflows/main-branch-sync.yml b/.github/workflows/main-branch-sync.yml deleted file mode 100644 index 5ea360e281d6..000000000000 --- a/.github/workflows/main-branch-sync.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: main branch sync - -on: - push: - branches: - - 'main' - -jobs: - branch_sync: - runs-on: ubuntu-latest - steps: - - name: Checkout Code - uses: actions/checkout@v2 - with: - # persist-credentials: false allows us to use our own credentials for - # pushing to the repository. Otherwise, the default github actions token - # is used. - persist-credentials: false - fetch-depth: 0 - - - name: Update branch - env: - LLVMBOT_TOKEN: ${{ secrets.LLVMBOT_MAIN_SYNC }} - run: | - git push https://$LLVMBOT_TOKEN@github.com/${{ github.repository }} HEAD:master From 0a32d93bd95b7ad0a4c7f91955c6c815150df84c Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Wed, 27 Jan 2021 09:14:22 +0100 Subject: [PATCH 013/318] [clang-format] Avoid considering include directive as a template closer. This fixes a bug [[ http://llvm.org/PR48891 | PR48891 ]] introduced in D93839 where: ``` #include namespace rep {} ``` got formatted as ``` #include namespace rep { } ``` Reviewed By: MyDeveloperDay, leonardchan Differential Revision: https://reviews.llvm.org/D95479 (cherry picked from commit e3713f156b8cb65a2b74f150afb824ce1e2a2fab) --- clang/lib/Format/UnwrappedLineFormatter.cpp | 2 +- clang/unittests/Format/FormatTest.cpp | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index d1138bbc9c36..5dd0ccdfa6fd 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -371,7 +371,7 @@ class LineJoiner { if (Previous->is(tok::comment)) Previous = Previous->getPreviousNonComment(); if (Previous) { - if (Previous->is(tok::greater)) + if (Previous->is(tok::greater) && !I[-1]->InPPDirective) return 0; if (Previous->is(tok::identifier)) { const FormatToken *PreviousPrevious = diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 855cf0242fe9..c1f88b9ae17a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -10248,6 +10248,21 @@ TEST_F(FormatTest, SplitEmptyClass) { "{\n" "};", Style); + + verifyFormat("#include \"stdint.h\"\n" + "namespace rep {}", + Style); + verifyFormat("#include \n" + "namespace rep {}", + Style); + verifyFormat("#include \n" + "namespace rep {}", + "#include \n" + "namespace rep {\n" + "\n" + "\n" + "}", + Style); } TEST_F(FormatTest, SplitEmptyStruct) { From 8c5d184ef714dcf435784e21e66b4b5e25b2dffb Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 27 Jan 2021 16:51:27 -0500 Subject: [PATCH 014/318] clang: Fix static_assert in a few contexts in microsoft mode Follow-up to D17444. Fixes PR48904. See bug for details. Differential Revision: https://reviews.llvm.org/D95559 (cherry picked from commit 764a7a2155c6747ec8d0b38d8edbb65960eae874) --- clang/lib/Parse/ParseDecl.cpp | 3 ++- clang/test/Sema/static-assert.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 571164139630..347d992b1643 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4216,7 +4216,7 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, } // Parse _Static_assert declaration. - if (Tok.is(tok::kw__Static_assert)) { + if (Tok.isOneOf(tok::kw__Static_assert, tok::kw_static_assert)) { SourceLocation DeclEnd; ParseStaticAssertDeclaration(DeclEnd); continue; @@ -5180,6 +5180,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::kw_friend: // static_assert-declaration + case tok::kw_static_assert: case tok::kw__Static_assert: // GNU typeof support. diff --git a/clang/test/Sema/static-assert.c b/clang/test/Sema/static-assert.c index f08e557fc8ea..9105f2366985 100644 --- a/clang/test/Sema/static-assert.c +++ b/clang/test/Sema/static-assert.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c11 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fms-compatibility -DMS -fsyntax-only -verify %s // RUN: %clang_cc1 -std=c99 -pedantic -fsyntax-only -verify=expected,ext %s // RUN: %clang_cc1 -xc++ -std=c++11 -pedantic -fsyntax-only -verify=expected,ext,cxx %s @@ -11,10 +12,17 @@ _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 ex _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \ // ext-warning {{'_Static_assert' is a C11 extension}} +#ifdef MS +static_assert(1, "1 is nonzero"); +#endif + void foo(void) { _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 extension}} _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \ // ext-warning {{'_Static_assert' is a C11 extension}} +#ifdef MS + static_assert(1, "1 is nonzero"); +#endif } _Static_assert(1, invalid); // expected-error {{expected string literal for diagnostic message in static_assert}} \ @@ -25,6 +33,9 @@ struct A { _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 extension}} _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \ // ext-warning {{'_Static_assert' is a C11 extension}} +#ifdef MS + static_assert(1, "1 is nonzero"); +#endif }; #ifdef __cplusplus From 1edbbf9d20d9f859f7ff2a146a501aeb1423141e Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 20 Jan 2021 12:38:32 +0100 Subject: [PATCH 015/318] [clangd] Log warning when using legacy (theia) semantic highlighting. The legacy protocol will be removed on trunk after the 12 branch cut, and gone in clangd 13. Differential Revision: https://reviews.llvm.org/D95031 (cherry picked from commit 29472bb76915c4929aecc938300f6df31f63ac29) --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index dc89ebd59fe2..35aed2166f03 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -510,6 +510,11 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, "semanticTokens request, choosing the latter (no notifications)."); Opts.TheiaSemanticHighlighting = false; } + if (Opts.TheiaSemanticHighlighting) { + log("Using legacy semanticHighlights notification, which will be removed " + "in clangd 13. Clients should use the standard semanticTokens " + "request instead."); + } if (Params.rootUri && *Params.rootUri) Opts.WorkspaceRoot = std::string(Params.rootUri->file()); From 61e05d1bc1af737c5f24fd5cd765f1a9914cbd13 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 25 Jan 2021 16:16:22 +0100 Subject: [PATCH 016/318] [clangd] Parse Diagnostics block, and nest ClangTidy block under it. (ClangTidy configuration block hasn't been in any release, so we should be OK to move it around like this) Differential Revision: https://reviews.llvm.org/D95362 (cherry picked from commit c3df9d58c75e0f89ca95e947804d65e79a491adc) --- clang-tools-extra/clangd/Config.h | 15 +++--- clang-tools-extra/clangd/ConfigCompile.cpp | 14 +++--- clang-tools-extra/clangd/ConfigFragment.h | 47 +++++++++---------- clang-tools-extra/clangd/ConfigYAML.cpp | 14 +++++- clang-tools-extra/clangd/TidyProvider.cpp | 2 +- .../clangd/unittests/ConfigCompileTests.cpp | 32 +++++++------ .../clangd/unittests/ConfigYAMLTests.cpp | 11 +++-- 7 files changed, 75 insertions(+), 60 deletions(-) diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h index 44ca283b6a0e..391632cb086a 100644 --- a/clang-tools-extra/clangd/Config.h +++ b/clang-tools-extra/clangd/Config.h @@ -90,6 +90,13 @@ struct Config { struct { bool SuppressAll = false; llvm::StringSet<> Suppress; + + /// Configures what clang-tidy checks to run and options to use with them. + struct { + // A comma-seperated list of globs specify which clang-tidy checks to run. + std::string Checks; + llvm::StringMap CheckOptions; + } ClangTidy; } Diagnostics; /// Style of the codebase. @@ -99,14 +106,6 @@ struct Config { // ::). All nested namespaces are affected as well. std::vector FullyQualifiedNamespaces; } Style; - - /// Configures what clang-tidy checks to run and options to use with them. - struct { - // A comma-seperated list of globs to specify which clang-tidy checks to - // run. - std::string Checks; - llvm::StringMap CheckOptions; - } ClangTidy; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index e82c6e159421..8682cae36f26 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -189,7 +189,6 @@ struct FragmentCompiler { compile(std::move(F.CompileFlags)); compile(std::move(F.Index)); compile(std::move(F.Diagnostics)); - compile(std::move(F.ClangTidy)); } void compile(Fragment::IfBlock &&F) { @@ -379,6 +378,8 @@ struct FragmentCompiler { for (llvm::StringRef N : Normalized) C.Diagnostics.Suppress.insert(N); }); + + compile(std::move(F.ClangTidy)); } void compile(Fragment::StyleBlock &&F) { @@ -422,7 +423,7 @@ struct FragmentCompiler { CurSpec += Str; } - void compile(Fragment::ClangTidyBlock &&F) { + void compile(Fragment::DiagnosticsBlock::ClangTidyBlock &&F) { std::string Checks; for (auto &CheckGlob : F.Add) appendTidyCheckSpec(Checks, CheckGlob, true); @@ -433,8 +434,9 @@ struct FragmentCompiler { if (!Checks.empty()) Out.Apply.push_back( [Checks = std::move(Checks)](const Params &, Config &C) { - C.ClangTidy.Checks.append( - Checks, C.ClangTidy.Checks.empty() ? /*skip comma*/ 1 : 0, + C.Diagnostics.ClangTidy.Checks.append( + Checks, + C.Diagnostics.ClangTidy.Checks.empty() ? /*skip comma*/ 1 : 0, std::string::npos); }); if (!F.CheckOptions.empty()) { @@ -445,8 +447,8 @@ struct FragmentCompiler { Out.Apply.push_back( [CheckOptions = std::move(CheckOptions)](const Params &, Config &C) { for (auto &StringPair : CheckOptions) - C.ClangTidy.CheckOptions.insert_or_assign(StringPair.first, - StringPair.second); + C.Diagnostics.ClangTidy.CheckOptions.insert_or_assign( + StringPair.first, StringPair.second); }); } } diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index 5b67c49fe154..c36b07f5e8e2 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -203,6 +203,29 @@ struct Fragment { /// (e.g. by disabling a clang-tidy check, or the -Wunused compile flag). /// This often has other advantages, such as skipping some analysis. std::vector> Suppress; + + /// Controls how clang-tidy will run over the code base. + /// + /// The settings are merged with any settings found in .clang-tidy + /// configiration files with these ones taking precedence. + struct ClangTidyBlock { + std::vector> Add; + /// List of checks to disable. + /// Takes precedence over Add. To enable all llvm checks except include + /// order: + /// Add: llvm-* + /// Remove: llvm-include-onder + std::vector> Remove; + + /// A Key-Value pair list of options to pass to clang-tidy checks + /// These take precedence over options specified in clang-tidy + /// configuration files. Example: + /// CheckOptions: + /// readability-braces-around-statements.ShortStatementLines: 2 + std::vector, Located>> + CheckOptions; + }; + ClangTidyBlock ClangTidy; }; DiagnosticsBlock Diagnostics; @@ -215,30 +238,6 @@ struct Fragment { std::vector> FullyQualifiedNamespaces; }; StyleBlock Style; - - /// Controls how clang-tidy will run over the code base. - /// - /// The settings are merged with any settings found in .clang-tidy - /// configiration files with these ones taking precedence. - // FIXME: move this to Diagnostics.Tidy. - struct ClangTidyBlock { - std::vector> Add; - /// List of checks to disable. - /// Takes precedence over Add. To enable all llvm checks except include - /// order: - /// Add: llvm-* - /// Remove: llvm-include-onder - std::vector> Remove; - - /// A Key-Value pair list of options to pass to clang-tidy checks - /// These take precedence over options specified in clang-tidy configuration - /// files. Example: - /// CheckOptions: - /// readability-braces-around-statements.ShortStatementLines: 2 - std::vector, Located>> - CheckOptions; - }; - ClangTidyBlock ClangTidy; }; } // namespace config diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp index 7aaff5565497..348ee9dd1f75 100644 --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -62,7 +62,7 @@ class Parser { Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); }); Dict.handle("Index", [&](Node &N) { parse(F.Index, N); }); Dict.handle("Style", [&](Node &N) { parse(F.Style, N); }); - Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); }); + Dict.handle("Diagnostics", [&](Node &N) { parse(F.Diagnostics, N); }); Dict.parse(N); return !(N.failed() || HadError); } @@ -110,7 +110,17 @@ class Parser { Dict.parse(N); } - void parse(Fragment::ClangTidyBlock &F, Node &N) { + void parse(Fragment::DiagnosticsBlock &F, Node &N) { + DictParser Dict("Diagnostics", this); + Dict.handle("Suppress", [&](Node &N) { + if (auto Values = scalarValues(N)) + F.Suppress = std::move(*Values); + }); + Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); }); + Dict.parse(N); + } + + void parse(Fragment::DiagnosticsBlock::ClangTidyBlock &F, Node &N) { DictParser Dict("ClangTidy", this); Dict.handle("Add", [&](Node &N) { if (auto Values = scalarValues(N)) diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index 0a9f12221287..c26c59fd347d 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -255,7 +255,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { TidyProviderRef provideClangdConfig() { return [](tidy::ClangTidyOptions &Opts, llvm::StringRef) { - const auto &CurTidyConfig = Config::current().ClangTidy; + const auto &CurTidyConfig = Config::current().Diagnostics.ClangTidy; if (!CurTidyConfig.Checks.empty()) mergeCheckList(Opts.Checks, CurTidyConfig.Checks); diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index ef24b5d8417f..4b1da2035727 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -259,32 +259,36 @@ TEST_F(ConfigCompileTests, DiagnosticSuppression) { } TEST_F(ConfigCompileTests, Tidy) { - Frag.ClangTidy.Add.emplace_back("bugprone-use-after-move"); - Frag.ClangTidy.Add.emplace_back("llvm-*"); - Frag.ClangTidy.Remove.emplace_back("llvm-include-order"); - Frag.ClangTidy.Remove.emplace_back("readability-*"); - Frag.ClangTidy.CheckOptions.emplace_back( + auto &Tidy = Frag.Diagnostics.ClangTidy; + Tidy.Add.emplace_back("bugprone-use-after-move"); + Tidy.Add.emplace_back("llvm-*"); + Tidy.Remove.emplace_back("llvm-include-order"); + Tidy.Remove.emplace_back("readability-*"); + Tidy.CheckOptions.emplace_back( std::make_pair(std::string("StrictMode"), std::string("true"))); - Frag.ClangTidy.CheckOptions.emplace_back(std::make_pair( + Tidy.CheckOptions.emplace_back(std::make_pair( std::string("example-check.ExampleOption"), std::string("0"))); EXPECT_TRUE(compileAndApply()); EXPECT_EQ( - Conf.ClangTidy.Checks, + Conf.Diagnostics.ClangTidy.Checks, "bugprone-use-after-move,llvm-*,-llvm-include-order,-readability-*"); - EXPECT_EQ(Conf.ClangTidy.CheckOptions.size(), 2U); - EXPECT_EQ(Conf.ClangTidy.CheckOptions.lookup("StrictMode"), "true"); - EXPECT_EQ(Conf.ClangTidy.CheckOptions.lookup("example-check.ExampleOption"), + EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.size(), 2U); + EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.lookup("StrictMode"), + "true"); + EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.lookup( + "example-check.ExampleOption"), "0"); EXPECT_THAT(Diags.Diagnostics, IsEmpty()); } TEST_F(ConfigCompileTests, TidyBadChecks) { - Frag.ClangTidy.Add.emplace_back("unknown-check"); - Frag.ClangTidy.Remove.emplace_back("*"); - Frag.ClangTidy.Remove.emplace_back("llvm-includeorder"); + auto &Tidy = Frag.Diagnostics.ClangTidy; + Tidy.Add.emplace_back("unknown-check"); + Tidy.Remove.emplace_back("*"); + Tidy.Remove.emplace_back("llvm-includeorder"); EXPECT_TRUE(compileAndApply()); // Ensure bad checks are stripped from the glob. - EXPECT_EQ(Conf.ClangTidy.Checks, "-*"); + EXPECT_EQ(Conf.Diagnostics.ClangTidy.Checks, "-*"); EXPECT_THAT( Diags.Diagnostics, ElementsAre( diff --git a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp index 25d468ba604a..e1c81344de20 100644 --- a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp @@ -60,10 +60,11 @@ CompileFlags: { Add: [foo, bar] } Index: Background: Skip --- -ClangTidy: - CheckOptions: - IgnoreMacros: true - example-check.ExampleOption: 0 +Diagnostics: + ClangTidy: + CheckOptions: + IgnoreMacros: true + example-check.ExampleOption: 0 )yaml"; auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback()); EXPECT_THAT(Diags.Diagnostics, IsEmpty()); @@ -77,7 +78,7 @@ CompileFlags: { Add: [foo, bar] } ASSERT_TRUE(Results[2].Index.Background); EXPECT_EQ("Skip", *Results[2].Index.Background.getValue()); - EXPECT_THAT(Results[3].ClangTidy.CheckOptions, + EXPECT_THAT(Results[3].Diagnostics.ClangTidy.CheckOptions, ElementsAre(PairVal("IgnoreMacros", "true"), PairVal("example-check.ExampleOption", "0"))); } From 074ad6de6fae20ff7ff720f79df1d6c1a7845157 Mon Sep 17 00:00:00 2001 From: AndreyChurbanov Date: Fri, 29 Jan 2021 13:16:41 +0300 Subject: [PATCH 017/318] [OpenMP] libomp: fix build by cl with vs2019 Replace VLA with dynamic allocation using alloca(). This fixes https://bugs.llvm.org/show_bug.cgi?id=48919. Differential Revision: https://reviews.llvm.org/D95627 (cherry picked from commit 7f5ad0e07162e0c19e569986ee37a17c147c9a27) --- openmp/runtime/src/kmp_settings.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index a8522130f972..b477edbbfb42 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -3355,7 +3355,8 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value, ntraits++; } } - omp_alloctrait_t traits[ntraits]; + omp_alloctrait_t *traits = + (omp_alloctrait_t *)KMP_ALLOCA(ntraits * sizeof(omp_alloctrait_t)); // Helper macros #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) From 99f43f598907a9cc1a613c691ffbce7c8bd4ec75 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 28 Jan 2021 14:37:33 +0100 Subject: [PATCH 018/318] Relax test expectations in debug-info-gline-tables-only-codeview.cpp To make it pass also on 32-bit Windows, see PR48920. (cherry picked from commit 0024efc69ea6cd0b630cd11cef5991b7edb73ffc) --- clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp b/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp index 27ac682c10f5..409b62da62c1 100644 --- a/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp +++ b/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp @@ -25,6 +25,6 @@ void test() { // CHECK: ![[C]] = !DICompositeType(tag: DW_TAG_structure_type, name: "C", // CHECK-SAME: flags: DIFlagFwdDecl // CHECK-NOT: identifier - // CHECK: ![[MTYPE]] = !DISubroutineType(types: !{{.*}}) + // CHECK: ![[MTYPE]] = !DISubroutineType({{.*}}types: !{{.*}}) c.m(); } From c5a1eb9b0a76eef7e3025b7333a0d256b8562360 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Wed, 27 Jan 2021 16:02:49 +0100 Subject: [PATCH 019/318] [AMDGPU] Avoid an illegal operand in si-shrink-instructions Before the patch it was possible to trigger a constant bus violation when folding immediates into a shrunk instruction. The patch adds a check to enforce the legality of the new operand. Differential Revision: https://reviews.llvm.org/D95527 (cherry picked from commit fc8e7411218c846386650cfba111b62827c71da0) --- .../Target/AMDGPU/SIShrinkInstructions.cpp | 24 ++++++++++--------- .../shrink-instructions-illegal-fold.mir | 23 ++++++++++++++++++ 2 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 2628070f219c..cdb78aae1c4f 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -75,17 +75,19 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineOperand &MovSrc = Def->getOperand(1); bool ConstantFolded = false; - if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || - isUInt<32>(MovSrc.getImm()))) { - Src0.ChangeToImmediate(MovSrc.getImm()); - ConstantFolded = true; - } else if (MovSrc.isFI()) { - Src0.ChangeToFrameIndex(MovSrc.getIndex()); - ConstantFolded = true; - } else if (MovSrc.isGlobal()) { - Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), - MovSrc.getTargetFlags()); - ConstantFolded = true; + if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) { + if (MovSrc.isImm() && + (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) { + Src0.ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFI()) { + Src0.ChangeToFrameIndex(MovSrc.getIndex()); + ConstantFolded = true; + } else if (MovSrc.isGlobal()) { + Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), + MovSrc.getTargetFlags()); + ConstantFolded = true; + } } if (ConstantFolded) { diff --git a/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir b/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir new file mode 100644 index 000000000000..7889f437facf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir @@ -0,0 +1,23 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-shrink-instructions --verify-machineinstrs %s -o - | FileCheck %s + +# Make sure immediate folding into V_CNDMASK respects constant bus restrictions. +--- + +name: shrink_cndmask_illegal_imm_folding +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: shrink_cndmask_illegal_imm_folding + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32768, implicit $exec + ; CHECK: V_CMP_EQ_U32_e32 0, [[COPY]], implicit-def $vcc, implicit $exec + ; CHECK: V_CNDMASK_B32_e32 [[MOV]], killed [[COPY]], implicit $vcc, implicit $exec + + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 32768, implicit $exec + V_CMP_EQ_U32_e32 0, %0:vgpr_32, implicit-def $vcc, implicit $exec + %2:vgpr_32 = V_CNDMASK_B32_e64 0, %1:vgpr_32, 0, killed %0:vgpr_32, $vcc, implicit $exec + S_NOP 0 + +... From b2710e7535bd43d9fd6f9792644fe2c207079c42 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 28 Jan 2021 23:53:45 +0100 Subject: [PATCH 020/318] [sanitizer] Fix msan test build on FreeBSD after 7afdc89c2054 This commit accidentally enabled fgetgrent_r() in the msan tests under FreeBSD, but this function is not supported. Also remove FreeBSD from the SANITIZER_INTERCEPT_FGETGRENT_R macro. (cherry picked from commit e056fc6cb676f72d5b7dfe7ca540b3275bd1a46f) --- compiler-rt/lib/msan/tests/msan_test.cpp | 2 ++ .../lib/sanitizer_common/sanitizer_platform_interceptors.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 7378b237a711..5dc9090f36c0 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -3707,7 +3707,9 @@ TEST(MemorySanitizer, getgrent_r) { EXPECT_NOT_POISONED(grp.gr_gid); EXPECT_NOT_POISONED(grpres); } +#endif +#ifdef __GLIBC__ TEST(MemorySanitizer, fgetgrent_r) { FILE *fp = fopen("/etc/group", "r"); struct group grp; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 7f7b38d4215b..068fc9829e57 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -226,7 +226,7 @@ (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS) #define SANITIZER_INTERCEPT_GETPWENT \ (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS) -#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_FREEBSD || SI_GLIBC || SI_SOLARIS) +#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_GLIBC || SI_SOLARIS) #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID || SI_SOLARIS #define SANITIZER_INTERCEPT_GETPWENT_R \ (SI_FREEBSD || SI_NETBSD || SI_GLIBC || SI_SOLARIS) From 4e20d9c03d9acc9ee5a78cbba82b08d51ecbaf3f Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Thu, 28 Jan 2021 19:01:41 -0800 Subject: [PATCH 021/318] Make the profile-filter.c test compatible with 32-bit systems This addresses PR48930. Differential Revision: https://reviews.llvm.org/D95658 (cherry picked from commit 0217f1c7a31ba44715bc083a60cddc2192ffed96) --- clang/test/CodeGen/profile-filter.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/test/CodeGen/profile-filter.c b/clang/test/CodeGen/profile-filter.c index 5415ff96cb14..dc5a31e872a1 100644 --- a/clang/test/CodeGen/profile-filter.c +++ b/clang/test/CodeGen/profile-filter.c @@ -28,11 +28,11 @@ unsigned i; // EXCLUDE: noprofile // EXCLUDE: @test1 unsigned test1() { - // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 + // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) return i + 1; } @@ -47,10 +47,10 @@ unsigned test1() { // EXCLUDE-NOT: noprofile // EXCLUDE: @test2 unsigned test2() { - // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 + // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) return i - 1; } From 07f8d437134c0b229104241a621db05013da0049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Fri, 29 Jan 2021 02:14:47 +0100 Subject: [PATCH 022/318] [clang-tidy] Fix linking tests to LLVMTestingSupport LLVMTestingSupport is not part of libLLVM, and therefore can not be linked to via LLVM_LINK_COMPONENTS. Instead, it needs to be specified explicitly to ensure that it is linked explicitly even if LLVM_LINK_LLVM_DYLIB is used. This is consistent with handling in clangd. Fixes PR#48931 Differential Revision: https://reviews.llvm.org/D95653 (cherry picked from commit 632545e8ce846ccaeca8df15a3dc5e36d01a1275) --- clang-tools-extra/unittests/clang-tidy/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt index be35b71d15cf..05d330dd8033 100644 --- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt @@ -1,7 +1,6 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP Support - TestingSupport ) get_filename_component(CLANG_LINT_SOURCE_DIR @@ -46,4 +45,5 @@ target_link_libraries(ClangTidyTests clangTidyObjCModule clangTidyReadabilityModule clangTidyUtils + LLVMTestingSupport ) From f54cf61ad8e1cc6592074ddd7ad07908623ead6b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 27 Jan 2021 17:06:05 -0500 Subject: [PATCH 023/318] [OpenMP][NVPTX] Disable building NVPTX deviceRTL by default on a non-CUDA system D95466 dropped CUDA to build NVPTX deviceRTL and enabled it by default. However, the building requires some libraries that are not available on non-CUDA system by default, which could break the compilation. This patch disabled the build by default. It can be enabled with `LIBOMPTARGET_BUILD_NVPTX_BCLIB=ON`. Reviewed By: kparzysz Differential Revision: https://reviews.llvm.org/D95556 (cherry picked from commit fb12df4a8e33d759938057718273dfb434b2d9c4) --- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 4661bf08af1c..23efbba29d66 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -10,6 +10,15 @@ # ##===----------------------------------------------------------------------===## +# By default we will not build NVPTX deviceRTL on a non-CUDA +set(LIBOMPTARGET_BUILD_NVPTX_BCLIB FALSE CACHE BOOL + "Whether build NVPTX deviceRTL on non-CUDA system.") + +if (NOT (LIBOMPTARGET_DEP_CUDA_FOUND OR LIBOMPTARGET_BUILD_NVPTX_BCLIB)) + libomptarget_say("Not building NVPTX deviceRTL by default on non-CUDA system.") + return() +endif() + # Check if we can create an LLVM bitcode implementation of the runtime library # that could be inlined in the user application. For that we need to find # a Clang compiler capable of compiling our CUDA files to LLVM bitcode and From 07dc51637cc419cbd61383eb4e26713a8f931806 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 30 Jan 2021 13:30:48 +0000 Subject: [PATCH 024/318] [LoopUnswitch] Properly update MSSA if header has non-clobbering stores. This patch fixes updating MemorySSA if the header contains memory defs that do not clobber a duplicated instruction. We need to find the first defining access outside the loop body and use that as defining access of the duplicated instruction. This fixes a crash caused by bee486851c1a. (Cherry-picked on the 12.x release branch from 10c57268c074c3ad48f76da38fa2ba575ee3d1f9) --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp | 10 ++- .../partial-unswitch-update-memoryssa.ll | 76 +++++++++++++++++++ .../LoopUnswitch/partial-unswitch.ll | 36 --------- 3 files changed, 83 insertions(+), 39 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 18717394d384..822a786fc7c7 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1114,12 +1114,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition( Loop *L = LI->getLoopFor(I->getParent()); auto *DefiningAccess = MemA->getDefiningAccess(); - // If the defining access is a MemoryPhi in the header, get the incoming - // value for the pre-header as defining access. - if (DefiningAccess->getBlock() == I->getParent()) { + // Get the first defining access before the loop. + while (L->contains(DefiningAccess->getBlock())) { + // If the defining access is a MemoryPhi, get the incoming + // value for the pre-header as defining access. if (auto *MemPhi = dyn_cast(DefiningAccess)) { DefiningAccess = MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } else { + DefiningAccess = + cast(DefiningAccess)->getDefiningAccess(); } } MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll new file mode 100644 index 000000000000..ec1e8eeeb070 --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll @@ -0,0 +1,76 @@ +; RUN: opt -loop-unswitch -verify-dom-info -verify-memoryssa -S -enable-new-pm=0 %s | FileCheck %s +; RUN: opt -loop-unswitch -memssa-check-limit=3 -verify-dom-info -verify-memoryssa -S -enable-new-pm=0 %s | FileCheck %s + +declare void @clobber() + +; Check that MemorySSA updating can deal with a clobbering access of a +; duplicated load being a MemoryPHI outside the loop. +define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) { +; CHECK-LABEL: @partial_unswitch_memssa_update( +; CHECK-LABEL: loop.ph: +; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0 +; CHECK-NEXT: br i1 [[C]] +entry: + br i1 %c, label %loop.ph, label %outside.clobber + +outside.clobber: + call void @clobber() + br label %loop.ph + +loop.ph: + br label %loop.header + +loop.header: + %lv = load i32, i32* %ptr, align 4 + %hc = icmp eq i32 %lv, 0 + br i1 %hc, label %if, label %then + +if: + br label %loop.latch + +then: + br label %loop.latch + +loop.latch: + br i1 true, label %loop.header, label %exit + +exit: + ret void +} + +; Check that MemorySSA updating can deal with skipping defining accesses in the +; loop body until it finds the first defining access outside the loop. +define void @partial_unswitch_inloop_stores_beteween_outside_defining_access(i64* noalias %ptr, i16* noalias %src) { +; CHECK-LABEL: @partial_unswitch_inloop_stores_beteween_outside_defining_access +; CHECK-LABEL: entry: +; CHECK-NEXT: store i64 0, i64* %ptr, align 1 +; CHECK-NEXT: store i64 1, i64* %ptr, align 1 +; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i16, i16* %src, align 1 +; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i16 [[LV]], 0 +; CHECK-NEXT: br i1 [[C]] +; +entry: + store i64 0, i64* %ptr, align 1 + store i64 1, i64* %ptr, align 1 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + store i64 2, i64* %ptr, align 1 + %lv = load i16, i16* %src, align 1 + %invar.cond = icmp eq i16 %lv, 0 + br i1 %invar.cond, label %noclobber, label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, 1000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll index 9f0e5d6f6c35..96a6b0f4e2b5 100644 --- a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll @@ -575,42 +575,6 @@ exit: ret i32 10 } -; Check that MemorySSA updating can deal with a clobbering access of a -; duplicated load being a MemoryPHI outside the loop. -define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) { -; CHECK-LABEL: @partial_unswitch_memssa_update( -; CHECK-LABEL: loop.ph: -; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4 -; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0 -; CHECK-NEXT: br i1 [[C]] -entry: - br i1 %c, label %loop.ph, label %outside.clobber - -outside.clobber: - call void @clobber() - br label %loop.ph - -loop.ph: - br label %loop.header - -loop.header: - %lv = load i32, i32* %ptr, align 4 - %hc = icmp eq i32 %lv, 0 - br i1 %hc, label %if, label %then - -if: - br label %loop.latch - -then: - br label %loop.latch - -loop.latch: - br i1 true, label %loop.header, label %exit - -exit: - ret void -} - ; Make sure the duplicated instructions are moved to a preheader that always ; executes when the loop body also executes. Do not check the unswitched code, ; because it is already checked in the @partial_unswitch_true_successor test From c5fd87eaddaad87b28530e5272b7cf0c788dc1f9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 Jan 2021 03:09:20 +0000 Subject: [PATCH 025/318] workflows: Fix LLVM ABI checks to work for X.0.0 releases --- .github/workflows/llvm-tests.yml | 84 +++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 12 deletions(-) diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 675383407d64..1cffc3ef4d97 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -1,8 +1,5 @@ name: LLVM Tests -env: - release_major: 12 - on: push: branches: @@ -42,7 +39,38 @@ jobs: with: cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release + abi-dump-setup: + runs-on: ubuntu-latest + outputs: + BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} + ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} + BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} + LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} + LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} + steps: + - name: Checkout source + uses: actions/checkout@v1 + with: + fetch-depth: 1 + + - name: Get LLVM version + id: version + uses: tstellar/actions/get-llvm-version@get-version + + - name: Setup Variables + id: vars + run: | + if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then + echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + echo ::set-output name=ABI_HEADERS::llvm-c + else + echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + echo ::set-output name=ABI_HEADERS::. + fi + abi-dump: + needs: abi-dump-setup runs-on: ubuntu-latest strategy: matrix: @@ -51,11 +79,11 @@ jobs: - build-latest include: - name: build-baseline - # FIXME: Referencing the env context does not work here - # ref: llvmorg-${{ env.release_major }}.0.0 - ref: llvmorg-12.0.0 + llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} + ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 repo: llvm/llvm-project - name: build-latest + llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} ref: ${{ github.sha }} repo: ${{ github.repository }} steps: @@ -78,22 +106,44 @@ jobs: repo: ${{ matrix.repo }} - name: Configure run: | - mkdir build - cd build - cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" ../llvm + mkdir install + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm - name: Build - run: ninja -C build libLLVM-${{ env.release_major }}.so + # Need to run install-LLVM twice to ensure the symlink is installed (this is a bug). + run: | + ninja -C build install-LLVM + ninja -C build install-LLVM + ninja -C build install-llvm-headers - name: Dump ABI - run: abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers llvm/include -o ${{ matrix.ref }}.abi.tar.gz build/lib/libLLVM-${{ env.release_major }}.so + run: | + if [ "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" ]; then + nm ./install/lib/libLLVM.so | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" | cut -d ' ' -f 3 > llvm.symbols + # Even though the -symbols-list option doesn't seem to filter out the symbols, I believe it speeds up processing, so I'm leaving it in. + export EXTRA_ARGS="-symbols-list llvm.symbols" + else + touch llvm.symbols + fi + abi-dumper $EXTRA_ARGS -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o ${{ matrix.ref }}.abi ./install/lib/libLLVM.so + # Remove symbol versioning from dumps, so we can compare across major versions. + sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi + tar -czf ${{ matrix.ref }}.abi.tar.gz ${{ matrix.ref }}.abi - name: Upload ABI file uses: actions/upload-artifact@v1 with: name: ${{ matrix.name }} path: ${{ matrix.ref }}.abi.tar.gz + - name: Upload symbol list file + if: matrix.name == 'build-baseline' + uses: actions/upload-artifact@v1 + with: + name: symbol-list + path: llvm.symbols + abi-compare: runs-on: ubuntu-latest needs: + - abi-dump-setup - abi-dump steps: - name: Download baseline @@ -104,10 +154,20 @@ jobs: uses: actions/download-artifact@v1 with: name: build-latest + - name: Download symbol list + uses: actions/download-artifact@v1 + with: + name: symbol-list + - name: Install abi-compliance-checker run: sudo apt-get install abi-compliance-checker - name: Compare ABI - run: abi-compliance-checker -l libLLVM-${{ env.release_major}}.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz + run: | + if [ -s symbol-list/llvm.symbols ]; then + # This option doesn't seem to work with the ABI dumper, so passing it here. + export EXTRA_ARGS="-symbols-list symbol-list/llvm.symbols" + fi + abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" - name: Upload ABI Comparison if: always() uses: actions/upload-artifact@v1 From b6d2402e319be00592908b2c9cb63fccdb481008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 2 Feb 2021 15:08:17 +0200 Subject: [PATCH 026/318] [docs] Add release notes for things I've done for the 12.x release branch. --- clang/docs/ReleaseNotes.rst | 3 +++ lld/docs/ReleaseNotes.rst | 20 ++++++++++++++++++-- llvm/docs/ReleaseNotes.rst | 19 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3001d6feb631..a34cd512ca59 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -150,6 +150,9 @@ Attribute Changes in Clang Windows Support --------------- +- Implicitly add ``.exe`` suffix for MinGW targets, even when cross compiling. + (This matches a change from GCC 8.) + C Language Changes in Clang --------------------------- diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index e0b17ca3e030..ea1403888eba 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -35,12 +35,28 @@ Breaking changes COFF Improvements ----------------- -* ... +* Error out clearly if creating a DLL with too many exported symbols. + (`D86701 `_) MinGW Improvements ------------------ -* ... +* Enabled dynamicbase by default. (`D86654 `_) + +* Tolerate mismatches between COMDAT section sizes with different amount of + padding (produced by binutils) by inspecting the aux section definition. + (`D86659 `_) + +* Support setting the subsystem version via the subsystem argument. + (`D88804 `_) + +* Implemented the GNU -wrap option. + (`D89004 `_, + `D91689 `_) + +* Handle the ``--demangle`` and ``--no-demangle`` options. + (`D93950 `_) + MachO Improvements ------------------ diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index de8431fe3908..f2eb53778406 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -92,6 +92,25 @@ Changes to TableGen uses the "`...`" range punctuation (e.g., ``{0...9}``). The hyphen syntax is deprecated. +Changes to the AArch64 Backend +-------------------------- + +During this release ... + +* Lots of improvements to generation of Windows unwind data; the unwind + data is optimized and written in packed form where possible, reducing + the size of unwind data (pdata and xdata sections) by around 60% + compared with LLVM 11. The generation of prologs/epilogs is tweaked + when targeting Windows, to increase the chances of being able to use + the packed unwind info format. + +* Support for creating Windows unwind data using ``.seh_*`` assembler + directives. + +* Produce proper assembly output for the Windows target, including + ``:lo12:`` relocation specifiers, to allow the assembly output + to actually be assembled. + Changes to the ARM Backend -------------------------- From 0db882a0f59afcd7f76d716ca2e04f2d6d92aa03 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 1 Feb 2021 10:48:29 -0800 Subject: [PATCH 027/318] workflows: Fix libclc tests --- .github/workflows/libclc-tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml index 2f1eb2939ea2..188eecfc3b89 100644 --- a/.github/workflows/libclc-tests.yml +++ b/.github/workflows/libclc-tests.yml @@ -45,9 +45,9 @@ jobs: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release build_target: "" - name: Build and test libclc + # spirv targets require llvm-spirv, so skip building them until we figure out + # how to install this tool. run: | - mkdir libclc-build - cd libclc-build - cmake -G Ninja ../libclc -DLLVM_CONFIG=../build/bin/llvm-config - ninja - ninja test + cmake -G Ninja -S libclc -B libclc-build -DLLVM_CONFIG=`pwd`/build/bin/llvm-config -DLIBCLC_TARGETS_TO_BUILD="amdgcn--;amdgcn--amdhsa;r600--;nvptx--;nvptx64--;nvptx--nvidiacl;nvptx64--nvidiacl" + ninja -C libclc-build + ninja -C libclc-build test From c0097c784179e6f927ed8ae6b28796faee2fea61 Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Sun, 31 Jan 2021 19:18:41 -0500 Subject: [PATCH 028/318] [OpenMP][Libomptarget] Remove possible harmful copy constructor call for RTLsTy From https://bugs.llvm.org/show_bug.cgi?id=48973, we know that `std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs)` causes compile time problems in libstdc++v3 5.3.1. This is because there was a defect in the standard regarding the `call_once` (LWG 2442). This was fixed in libstdc++ soon thereafter, but there are likely other standard libraries where this will fail. By matching this function call with the other one, we fix this bug. Differential Revision: https://reviews.llvm.org/D95769 --- openmp/libomptarget/src/interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 239570935cb2..cf6d36960c75 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -94,7 +94,7 @@ EXTERN void __tgt_register_requires(int64_t flags) { /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { TIMESCOPE(); - std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs); + std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); for (auto &RTL : PM->RTLs.AllRTLs) { if (RTL.register_lib) { if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) { From 162642bec0df760b27e66cfff046b40f1dfd2713 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 4 Feb 2021 00:07:04 +0900 Subject: [PATCH 029/318] Revert "[ConstantFold] Fold more operations to poison" This reverts commit 53040a968dc2ff20931661e55f05da2ef8b964a0 due to its bad interaction with select i1 -> and/or i1 transformation. This fixes: https://bugs.llvm.org/show_bug.cgi?id=49005 https://bugs.llvm.org/show_bug.cgi?id=48435 (cherry picked from commit 06829034ca64b8c83a5b20d8abe5ddbfe7af0004) --- clang/test/Frontend/fixed_point_unary.c | 4 +- llvm/lib/IR/ConstantFold.cpp | 59 ++++++------- ...amdgpu-codegenprepare-fold-binop-select.ll | 2 +- .../Transforms/InstCombine/apint-shift.ll | 2 +- .../canonicalize-ashr-shl-to-masking.ll | 2 +- .../canonicalize-lshr-shl-to-masking.ll | 2 +- .../canonicalize-shl-lshr-to-masking.ll | 2 +- llvm/test/Transforms/InstCombine/icmp.ll | 4 +- ...nput-masking-after-truncation-variant-a.ll | 4 +- ...nput-masking-after-truncation-variant-b.ll | 4 +- ...nput-masking-after-truncation-variant-c.ll | 4 +- ...nput-masking-after-truncation-variant-d.ll | 4 +- ...nput-masking-after-truncation-variant-e.ll | 4 +- ...dant-left-shift-input-masking-variant-a.ll | 4 +- ...dant-left-shift-input-masking-variant-b.ll | 4 +- ...dant-left-shift-input-masking-variant-c.ll | 4 +- ...dant-left-shift-input-masking-variant-d.ll | 4 +- ...dant-left-shift-input-masking-variant-e.ll | 4 +- .../InstCombine/select-of-bittest.ll | 6 +- .../InstCombine/shift-add-inseltpoison.ll | 12 +-- llvm/test/Transforms/InstCombine/shift-add.ll | 12 +-- .../ConstProp/InsertElement-inseltpoison.ll | 2 +- .../InstSimplify/ConstProp/InsertElement.ll | 2 +- .../Transforms/InstSimplify/ConstProp/cast.ll | 4 +- .../InstSimplify/ConstProp/poison.ll | 4 +- .../InstSimplify/ConstProp/shift.ll | 24 ++--- .../vector-undef-elts-inseltpoison.ll | 2 +- .../ConstProp/vector-undef-elts.ll | 2 +- .../ConstProp/vscale-inseltpoison.ll | 16 ++-- .../InstSimplify/ConstProp/vscale.ll | 16 ++-- llvm/test/Transforms/InstSimplify/div.ll | 39 +-------- llvm/test/Transforms/InstSimplify/rem.ll | 31 +------ llvm/test/Transforms/InstSimplify/undef.ll | 87 +++++++++---------- llvm/test/Transforms/SROA/phi-gep.ll | 2 +- llvm/test/Transforms/SROA/select-gep.ll | 2 +- .../X86/insert-binop-inseltpoison.ll | 4 +- .../X86/insert-binop-with-constant.ll | 42 ++++----- .../VectorCombine/X86/insert-binop.ll | 6 +- llvm/unittests/IR/ConstantsTest.cpp | 25 +++--- 39 files changed, 199 insertions(+), 258 deletions(-) diff --git a/clang/test/Frontend/fixed_point_unary.c b/clang/test/Frontend/fixed_point_unary.c index 6ce760daba11..849e38a94bc4 100644 --- a/clang/test/Frontend/fixed_point_unary.c +++ b/clang/test/Frontend/fixed_point_unary.c @@ -90,7 +90,7 @@ void inc_usa() { // SIGNED-LABEL: @inc_uf( // SIGNED-NEXT: entry: // SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2 -// SIGNED-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], poison +// SIGNED-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], undef // SIGNED-NEXT: store i16 [[TMP1]], i16* @uf, align 2 // SIGNED-NEXT: ret void // @@ -271,7 +271,7 @@ void dec_usa() { // SIGNED-LABEL: @dec_uf( // SIGNED-NEXT: entry: // SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2 -// SIGNED-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], poison +// SIGNED-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], undef // SIGNED-NEXT: store i16 [[TMP1]], i16* @uf, align 2 // SIGNED-NEXT: ret void // diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 03cb108cc485..95dd55237e5f 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -630,7 +630,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored)) { // Undefined behavior invoked - the destination type can't represent // the input constant. - return PoisonValue::get(DestTy); + return UndefValue::get(DestTy); } return ConstantInt::get(FPC->getContext(), IntVal); } @@ -916,7 +916,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, unsigned NumElts = ValTy->getNumElements(); if (CIdx->uge(NumElts)) - return PoisonValue::get(Val->getType()); + return UndefValue::get(Val->getType()); SmallVector Result; Result.reserve(NumElts); @@ -1151,21 +1151,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, } case Instruction::SDiv: case Instruction::UDiv: - // X / undef -> poison - // X / 0 -> poison - if (match(C2, m_CombineOr(m_Undef(), m_Zero()))) - return PoisonValue::get(C2->getType()); + // X / undef -> undef + if (isa(C2)) + return C2; + // undef / 0 -> undef // undef / 1 -> undef - if (match(C2, m_One())) + if (match(C2, m_Zero()) || match(C2, m_One())) return C1; // undef / X -> 0 otherwise return Constant::getNullValue(C1->getType()); case Instruction::URem: case Instruction::SRem: - // X % undef -> poison - // X % 0 -> poison - if (match(C2, m_CombineOr(m_Undef(), m_Zero()))) - return PoisonValue::get(C2->getType()); + // X % undef -> undef + if (match(C2, m_Undef())) + return C2; + // undef % 0 -> undef + if (match(C2, m_Zero())) + return C1; // undef % X -> 0 otherwise return Constant::getNullValue(C1->getType()); case Instruction::Or: // X | undef -> -1 @@ -1173,28 +1175,28 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, return C1; return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0 case Instruction::LShr: - // X >>l undef -> poison + // X >>l undef -> undef if (isa(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef >>l 0 -> undef if (match(C2, m_Zero())) return C1; // undef >>l X -> 0 return Constant::getNullValue(C1->getType()); case Instruction::AShr: - // X >>a undef -> poison + // X >>a undef -> undef if (isa(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef >>a 0 -> undef if (match(C2, m_Zero())) return C1; - // TODO: undef >>a X -> poison if the shift is exact + // TODO: undef >>a X -> undef if the shift is exact // undef >>a X -> 0 return Constant::getNullValue(C1->getType()); case Instruction::Shl: // X << undef -> undef if (isa(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef << 0 -> undef if (match(C2, m_Zero())) return C1; @@ -1247,14 +1249,14 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, if (CI2->isOne()) return C1; // X / 1 == X if (CI2->isZero()) - return PoisonValue::get(CI2->getType()); // X / 0 == poison + return UndefValue::get(CI2->getType()); // X / 0 == undef break; case Instruction::URem: case Instruction::SRem: if (CI2->isOne()) return Constant::getNullValue(CI2->getType()); // X % 1 == 0 if (CI2->isZero()) - return PoisonValue::get(CI2->getType()); // X % 0 == poison + return UndefValue::get(CI2->getType()); // X % 0 == undef break; case Instruction::And: if (CI2->isZero()) return C2; // X & 0 == 0 @@ -1368,7 +1370,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::SDiv: assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) - return PoisonValue::get(CI1->getType()); // MIN_INT / -1 -> poison + return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V)); case Instruction::URem: assert(!CI2->isZero() && "Div by zero handled above"); @@ -1376,7 +1378,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::SRem: assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) - return PoisonValue::get(CI1->getType()); // MIN_INT % -1 -> poison + return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.srem(C2V)); case Instruction::And: return ConstantInt::get(CI1->getContext(), C1V & C2V); @@ -1387,15 +1389,15 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::Shl: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.shl(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef case Instruction::LShr: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef case Instruction::AShr: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef } } @@ -1441,7 +1443,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, // Fast path for splatted constants. if (Constant *C2Splat = C2->getSplatValue()) { if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue()) - return PoisonValue::get(VTy); + return UndefValue::get(VTy); if (Constant *C1Splat = C1->getSplatValue()) { return ConstantVector::getSplat( VTy->getElementCount(), @@ -1458,9 +1460,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx); Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx); - // If any element of a divisor vector is zero, the whole op is poison. + // If any element of a divisor vector is zero, the whole op is undef. if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue()) - return PoisonValue::get(VTy); + return UndefValue::get(VTy); Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); } @@ -2343,8 +2345,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, return PoisonValue::get(GEPTy); if (isa(C)) - // If inbounds, we can choose an out-of-bounds pointer as a base pointer. - return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy); + return UndefValue::get(GEPTy); Constant *Idx0 = cast(Idxs[0]); if (Idxs.size() == 1 && (Idx0->isNullValue() || isa(Idx0))) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll index e0037f0d8e45..bfe83c7a1285 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -42,7 +42,7 @@ define i32 @select_sdiv_rhs_const_i32(i1 %cond) { define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) { ; IR-LABEL: @select_sdiv_lhs_const_v2i32( -; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> , <2 x i32> +; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> , <2 x i32> ; IR-NEXT: ret <2 x i32> [[OP]] ; ; GCN-LABEL: select_sdiv_lhs_const_v2i32: diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll index 908aeac0cea2..5a351efccfcc 100644 --- a/llvm/test/Transforms/InstCombine/apint-shift.ll +++ b/llvm/test/Transforms/InstCombine/apint-shift.ll @@ -337,7 +337,7 @@ define <2 x i1> @test16vec_nonuniform(<2 x i84> %X) { define <2 x i1> @test16vec_undef(<2 x i84> %X) { ; CHECK-LABEL: @test16vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i84> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll index 8d29372c3a72..ba0d32ee3768 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll @@ -418,7 +418,7 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll index 40bc4aaab21c..445f6406b3d2 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll @@ -418,7 +418,7 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = lshr <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll index 45aa22aa808f..9de0b337de28 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll @@ -171,7 +171,7 @@ define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) { define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index b48466e678d8..5e6bed4e280f 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -2876,7 +2876,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform(<2 x i32> %x) { define <2 x i1> @icmp_and_or_lshr_cst_vec_undef(<2 x i32> %x) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -2920,7 +2920,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) { define <2 x i1> @icmp_and_or_lshr_cst_vec_undef_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef_commute( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll index e49c381fcd16..89c16a0949e8 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -103,7 +103,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -138,7 +138,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll index 20f38deeb0d5..8aef637c6a74 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -103,7 +103,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -138,7 +138,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll index 562280391c5e..61f25e6ca0b1 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -83,7 +83,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -110,7 +110,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll index aa644e6264e4..077bb8296f3e 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -93,7 +93,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] -; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -124,7 +124,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] -; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll index f2aa2894e27a..961ea5e48416 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -83,7 +83,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -110,7 +110,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll index 882117fe3480..41a71aa98f40 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll @@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -109,7 +109,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll index e92875d79207..787135229148 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll @@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -109,7 +109,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll index b8066cef2b40..c0959d9e1ac6 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll @@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = lshr <8 x i32> , %nbits @@ -81,7 +81,7 @@ define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = lshr <8 x i32> , %nbits diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index 20b322c0b647..5e0f0be2b1ad 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -72,7 +72,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]] -; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T4]] ; %t0 = shl <8 x i32> , %nbits @@ -95,7 +95,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]] -; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T4]] ; %t0 = shl <8 x i32> , %nbits diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll index 46f5b0c2f213..2e335f0083c1 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll @@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = shl <8 x i32> %x, %nbits @@ -81,7 +81,7 @@ define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = shl <8 x i32> %x, %nbits diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll index c85bcba82e97..d9bef00b2f78 100644 --- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll +++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll @@ -82,7 +82,7 @@ define <2 x i32> @and_lshr_and_vec_v2(<2 x i32> %arg) { define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) { ; CHECK-LABEL: @and_lshr_and_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[TMP4]] @@ -91,7 +91,6 @@ define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) { %tmp1 = icmp eq <3 x i32> %tmp, %tmp2 = lshr <3 x i32> %arg, %tmp3 = and <3 x i32> %tmp2, - ; The second element of %tmp4 is poison because it is (undef ? poison : undef). %tmp4 = select <3 x i1> %tmp1, <3 x i32> %tmp3, <3 x i32> ret <3 x i32> %tmp4 } @@ -223,7 +222,7 @@ define <2 x i32> @f_var0_vec(<2 x i32> %arg, <2 x i32> %arg1) { define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-LABEL: @f_var0_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32> @@ -233,7 +232,6 @@ define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { %tmp2 = icmp eq <3 x i32> %tmp, %tmp3 = lshr <3 x i32> %arg, %tmp4 = and <3 x i32> %tmp3, - ; The second element of %tmp5 is poison because it is (undef ? poison : undef). %tmp5 = select <3 x i1> %tmp2, <3 x i32> %tmp4, <3 x i32> ret <3 x i32> %tmp5 } diff --git a/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll index 3232cdc49c0f..e968f13c40b0 100644 --- a/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll @@ -40,7 +40,7 @@ define i32 @lshr_C1_add_A_C2_i32(i32 %A) { define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = zext <4 x i16> %A to <4 x i32> @@ -52,7 +52,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -64,7 +64,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -78,7 +78,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -94,7 +94,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -110,7 +110,7 @@ define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll index eea8b7a074d7..e227274f4930 100644 --- a/llvm/test/Transforms/InstCombine/shift-add.ll +++ b/llvm/test/Transforms/InstCombine/shift-add.ll @@ -40,7 +40,7 @@ define i32 @lshr_C1_add_A_C2_i32(i32 %A) { define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = zext <4 x i16> %A to <4 x i32> @@ -52,7 +52,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -64,7 +64,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -78,7 +78,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -94,7 +94,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -110,7 +110,7 @@ define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll index 54b862c8514a..197e7cc0ac75 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll @@ -25,7 +25,7 @@ define <4 x i64> @insertelement() { define <4 x i64> @insertelement_undef() { ; CHECK-LABEL: @insertelement_undef( -; CHECK-NEXT: ret <4 x i64> poison +; CHECK-NEXT: ret <4 x i64> undef ; %vec1 = insertelement <4 x i64> poison, i64 -1, i32 0 %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll index 127c1692b5b8..a9a27a5df01f 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll @@ -25,7 +25,7 @@ define <4 x i64> @insertelement() { define <4 x i64> @insertelement_undef() { ; CHECK-LABEL: @insertelement_undef( -; CHECK-NEXT: ret <4 x i64> poison +; CHECK-NEXT: ret <4 x i64> undef ; %vec1 = insertelement <4 x i64> undef, i64 -1, i32 0 %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll index 1136151f7157..adf5e4b68a1b 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll @@ -5,7 +5,7 @@ define i8 @overflow_fptosi() { ; CHECK-LABEL: @overflow_fptosi( -; CHECK-NEXT: ret i8 poison +; CHECK-NEXT: ret i8 undef ; %i = fptosi double 1.56e+02 to i8 ret i8 %i @@ -13,7 +13,7 @@ define i8 @overflow_fptosi() { define i8 @overflow_fptoui() { ; CHECK-LABEL: @overflow_fptoui( -; CHECK-NEXT: ret i8 poison +; CHECK-NEXT: ret i8 undef ; %i = fptoui double 2.56e+02 to i8 ret i8 %i diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll index f3fe29ff57ba..ea34bb4699e6 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll @@ -104,14 +104,14 @@ define void @vec_aggr_ops() { define void @other_ops(i8 %x) { ; CHECK-LABEL: @other_ops( -; CHECK-NEXT: call void (...) @use(i1 poison, i1 poison, i8 poison, i8 poison, i8* poison, i8* poison) +; CHECK-NEXT: call void (...) @use(i1 poison, i1 poison, i8 poison, i8 poison, i8* poison) ; CHECK-NEXT: ret void ; %i1 = icmp eq i8 poison, 1 %i2 = fcmp oeq float poison, 1.0 %i3 = select i1 poison, i8 1, i8 2 %i4 = select i1 true, i8 poison, i8 %x - call void (...) @use(i1 %i1, i1 %i2, i8 %i3, i8 %i4, i8* getelementptr (i8, i8* poison, i64 1), i8* getelementptr inbounds (i8, i8* undef, i64 1)) + call void (...) @use(i1 %i1, i1 %i2, i8 %i3, i8 %i4, i8* getelementptr (i8, i8* poison, i64 1)) ret void } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll b/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll index a7a60e562117..3e64513533ff 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll @@ -3,15 +3,15 @@ ; CHECK-LABEL: shift_undef_64 define void @shift_undef_64(i64* %p) { %r1 = lshr i64 -1, 4294967296 ; 2^32 - ; CHECK: store i64 poison + ; CHECK: store i64 undef store i64 %r1, i64* %p %r2 = ashr i64 -1, 4294967297 ; 2^32 + 1 - ; CHECK: store i64 poison + ; CHECK: store i64 undef store i64 %r2, i64* %p %r3 = shl i64 -1, 4294967298 ; 2^32 + 2 - ; CHECK: store i64 poison + ; CHECK: store i64 undef store i64 %r3, i64* %p ret void @@ -20,15 +20,15 @@ define void @shift_undef_64(i64* %p) { ; CHECK-LABEL: shift_undef_65 define void @shift_undef_65(i65* %p) { %r1 = lshr i65 2, 18446744073709551617 - ; CHECK: store i65 poison + ; CHECK: store i65 undef store i65 %r1, i65* %p %r2 = ashr i65 4, 18446744073709551617 - ; CHECK: store i65 poison + ; CHECK: store i65 undef store i65 %r2, i65* %p %r3 = shl i65 1, 18446744073709551617 - ; CHECK: store i65 poison + ; CHECK: store i65 undef store i65 %r3, i65* %p ret void @@ -37,15 +37,15 @@ define void @shift_undef_65(i65* %p) { ; CHECK-LABEL: shift_undef_256 define void @shift_undef_256(i256* %p) { %r1 = lshr i256 2, 18446744073709551617 - ; CHECK: store i256 poison + ; CHECK: store i256 undef store i256 %r1, i256* %p %r2 = ashr i256 4, 18446744073709551618 - ; CHECK: store i256 poison + ; CHECK: store i256 undef store i256 %r2, i256* %p %r3 = shl i256 1, 18446744073709551619 - ; CHECK: store i256 poison + ; CHECK: store i256 undef store i256 %r3, i256* %p ret void @@ -54,15 +54,15 @@ define void @shift_undef_256(i256* %p) { ; CHECK-LABEL: shift_undef_511 define void @shift_undef_511(i511* %p) { %r1 = lshr i511 -1, 1208925819614629174706276 ; 2^80 + 100 - ; CHECK: store i511 poison + ; CHECK: store i511 undef store i511 %r1, i511* %p %r2 = ashr i511 -2, 1208925819614629174706200 - ; CHECK: store i511 poison + ; CHECK: store i511 undef store i511 %r2, i511* %p %r3 = shl i511 -3, 1208925819614629174706180 - ; CHECK: store i511 poison + ; CHECK: store i511 undef store i511 %r3, i511* %p ret void diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll index 6ce03dd2e0f0..2762291d7954 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll @@ -5,7 +5,7 @@ define <3 x i8> @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> ; %c = shl <3 x i8> undef, ret <3 x i8> %c diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll index 99cc2527d12e..5d0f484bc3fd 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll @@ -5,7 +5,7 @@ define <3 x i8> @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> ; %c = shl <3 x i8> undef, ret <3 x i8> %c diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll index 9689887be69b..ee19e617748b 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll @@ -75,7 +75,7 @@ define @fmul() { define @udiv() { ; CHECK-LABEL: @udiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv undef, undef ret %r @@ -83,7 +83,7 @@ define @udiv() { define @udiv_splat_zero() { ; CHECK-LABEL: @udiv_splat_zero( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv zeroinitializer, zeroinitializer ret %r @@ -91,7 +91,7 @@ define @udiv_splat_zero() { define @sdiv() { ; CHECK-LABEL: @sdiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = sdiv undef, undef ret %r @@ -107,7 +107,7 @@ define @fdiv() { define @urem() { ; CHECK-LABEL: @urem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = urem undef, undef ret %r @@ -115,7 +115,7 @@ define @urem() { define @srem() { ; CHECK-LABEL: @srem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = srem undef, undef ret %r @@ -135,7 +135,7 @@ define @frem() { define @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = shl undef, undef ret %r @@ -143,7 +143,7 @@ define @shl() { define @lshr() { ; CHECK-LABEL: @lshr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = lshr undef, undef ret %r @@ -151,7 +151,7 @@ define @lshr() { define @ashr() { ; CHECK-LABEL: @ashr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = ashr undef, undef ret %r diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll index 048e8840ffd8..66e4c93e1968 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll @@ -75,7 +75,7 @@ define @fmul() { define @udiv() { ; CHECK-LABEL: @udiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv undef, undef ret %r @@ -83,7 +83,7 @@ define @udiv() { define @udiv_splat_zero() { ; CHECK-LABEL: @udiv_splat_zero( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv zeroinitializer, zeroinitializer ret %r @@ -91,7 +91,7 @@ define @udiv_splat_zero() { define @sdiv() { ; CHECK-LABEL: @sdiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = sdiv undef, undef ret %r @@ -107,7 +107,7 @@ define @fdiv() { define @urem() { ; CHECK-LABEL: @urem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = urem undef, undef ret %r @@ -115,7 +115,7 @@ define @urem() { define @srem() { ; CHECK-LABEL: @srem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = srem undef, undef ret %r @@ -135,7 +135,7 @@ define @frem() { define @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = shl undef, undef ret %r @@ -143,7 +143,7 @@ define @shl() { define @lshr() { ; CHECK-LABEL: @lshr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = lshr undef, undef ret %r @@ -151,7 +151,7 @@ define @lshr() { define @ashr() { ; CHECK-LABEL: @ashr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = ashr undef, undef ret %r diff --git a/llvm/test/Transforms/InstSimplify/div.ll b/llvm/test/Transforms/InstSimplify/div.ll index 7c8efc27d3aa..5a3e6e8f7daa 100644 --- a/llvm/test/Transforms/InstSimplify/div.ll +++ b/llvm/test/Transforms/InstSimplify/div.ll @@ -25,11 +25,11 @@ define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) { ret <2 x i32> %B } -; Division-by-zero is poison. UB in any vector lane means the whole op is poison. +; Division-by-zero is undef. UB in any vector lane means the whole op is undef. define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %div = sdiv <2 x i8> , ret <2 x i8> %div @@ -37,7 +37,7 @@ define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) { define <2 x i8> @udiv_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @udiv_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %div = udiv <2 x i8> , ret <2 x i8> %div @@ -193,37 +193,4 @@ define i32 @div1() { ret i32 %urem } -define i8 @sdiv_minusone_divisor() { -; CHECK-LABEL: @sdiv_minusone_divisor( -; CHECK-NEXT: ret i8 poison -; - %v = sdiv i8 -128, -1 - ret i8 %v -} - -define i32 @poison(i32 %x) { -; CHECK-LABEL: @poison( -; CHECK-NEXT: ret i32 poison -; - %v = udiv i32 %x, poison - ret i32 %v -} - -; TODO: this should be poison -define i32 @poison2(i32 %x) { -; CHECK-LABEL: @poison2( -; CHECK-NEXT: ret i32 0 -; - %v = udiv i32 poison, %x - ret i32 %v -} - -define <2 x i32> @poison3(<2 x i32> %x) { -; CHECK-LABEL: @poison3( -; CHECK-NEXT: ret <2 x i32> poison -; - %v = udiv <2 x i32> %x, - ret <2 x i32> %v -} - !0 = !{i32 0, i32 3} diff --git a/llvm/test/Transforms/InstSimplify/rem.ll b/llvm/test/Transforms/InstSimplify/rem.ll index 6aaeb5c70d00..6ccb6474ce44 100644 --- a/llvm/test/Transforms/InstSimplify/rem.ll +++ b/llvm/test/Transforms/InstSimplify/rem.ll @@ -25,11 +25,11 @@ define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) { ret <2 x i32> %B } -; Division-by-zero is poison. UB in any vector lane means the whole op is poison. +; Division-by-zero is undef. UB in any vector lane means the whole op is undef. define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @srem_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %rem = srem <2 x i8> , ret <2 x i8> %rem @@ -37,7 +37,7 @@ define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) { define <2 x i8> @urem_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @urem_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %rem = urem <2 x i8> , ret <2 x i8> %rem @@ -325,28 +325,3 @@ define <2 x i32> @srem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i32> %y) { ret <2 x i32> %r } -define i8 @srem_minusone_divisor() { -; CHECK-LABEL: @srem_minusone_divisor( -; CHECK-NEXT: ret i8 poison -; - %v = srem i8 -128, -1 - ret i8 %v -} - -define i32 @poison(i32 %x) { -; CHECK-LABEL: @poison( -; CHECK-NEXT: ret i32 poison -; - %v = urem i32 %x, poison - ret i32 %v -} - -; TODO: this should be poison - -define i32 @poison2(i32 %x) { -; CHECK-LABEL: @poison2( -; CHECK-NEXT: ret i32 0 -; - %v = urem i32 poison, %x - ret i32 %v -} diff --git a/llvm/test/Transforms/InstSimplify/undef.ll b/llvm/test/Transforms/InstSimplify/undef.ll index d09dc43da091..fe1f412d3d37 100644 --- a/llvm/test/Transforms/InstSimplify/undef.ll +++ b/llvm/test/Transforms/InstSimplify/undef.ll @@ -1,9 +1,8 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instsimplify -S < %s | FileCheck %s define i64 @test0() { ; CHECK-LABEL: @test0( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = mul i64 undef, undef ret i64 %r @@ -11,7 +10,7 @@ define i64 @test0() { define i64 @test1() { ; CHECK-LABEL: @test1( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = mul i64 3, undef ret i64 %r @@ -19,7 +18,7 @@ define i64 @test1() { define i64 @test2() { ; CHECK-LABEL: @test2( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = mul i64 undef, 3 ret i64 %r @@ -27,7 +26,7 @@ define i64 @test2() { define i64 @test3() { ; CHECK-LABEL: @test3( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = mul i64 undef, 6 ret i64 %r @@ -35,7 +34,7 @@ define i64 @test3() { define i64 @test4() { ; CHECK-LABEL: @test4( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = mul i64 6, undef ret i64 %r @@ -43,7 +42,7 @@ define i64 @test4() { define i64 @test5() { ; CHECK-LABEL: @test5( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = and i64 undef, undef ret i64 %r @@ -51,7 +50,7 @@ define i64 @test5() { define i64 @test6() { ; CHECK-LABEL: @test6( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = or i64 undef, undef ret i64 %r @@ -59,7 +58,7 @@ define i64 @test6() { define i64 @test7() { ; CHECK-LABEL: @test7( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = udiv i64 undef, 1 ret i64 %r @@ -67,7 +66,7 @@ define i64 @test7() { define i64 @test8() { ; CHECK-LABEL: @test8( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = sdiv i64 undef, 1 ret i64 %r @@ -75,7 +74,7 @@ define i64 @test8() { define i64 @test9() { ; CHECK-LABEL: @test9( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = urem i64 undef, 1 ret i64 %r @@ -83,7 +82,7 @@ define i64 @test9() { define i64 @test10() { ; CHECK-LABEL: @test10( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = srem i64 undef, 1 ret i64 %r @@ -91,7 +90,7 @@ define i64 @test10() { define i64 @test11() { ; CHECK-LABEL: @test11( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 undef ; %r = shl i64 undef, undef ret i64 %r @@ -99,7 +98,7 @@ define i64 @test11() { define i64 @test11b(i64 %a) { ; CHECK-LABEL: @test11b( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 poison ; %r = shl i64 %a, undef ret i64 %r @@ -107,7 +106,7 @@ define i64 @test11b(i64 %a) { define i64 @test12() { ; CHECK-LABEL: @test12( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 undef ; %r = ashr i64 undef, undef ret i64 %r @@ -115,7 +114,7 @@ define i64 @test12() { define i64 @test12b(i64 %a) { ; CHECK-LABEL: @test12b( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 poison ; %r = ashr i64 %a, undef ret i64 %r @@ -123,7 +122,7 @@ define i64 @test12b(i64 %a) { define i64 @test13() { ; CHECK-LABEL: @test13( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 undef ; %r = lshr i64 undef, undef ret i64 %r @@ -131,7 +130,7 @@ define i64 @test13() { define i64 @test13b(i64 %a) { ; CHECK-LABEL: @test13b( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 poison ; %r = lshr i64 %a, undef ret i64 %r @@ -139,7 +138,7 @@ define i64 @test13b(i64 %a) { define i1 @test14() { ; CHECK-LABEL: @test14( -; CHECK-NEXT: ret i1 undef +; CHECK: ret i1 undef ; %r = icmp slt i64 undef, undef ret i1 %r @@ -147,7 +146,7 @@ define i1 @test14() { define i1 @test15() { ; CHECK-LABEL: @test15( -; CHECK-NEXT: ret i1 undef +; CHECK: ret i1 undef ; %r = icmp ult i64 undef, undef ret i1 %r @@ -155,7 +154,7 @@ define i1 @test15() { define i64 @test16(i64 %a) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = select i1 undef, i64 %a, i64 undef ret i64 %r @@ -163,7 +162,7 @@ define i64 @test16(i64 %a) { define i64 @test17(i64 %a) { ; CHECK-LABEL: @test17( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = select i1 undef, i64 undef, i64 %a ret i64 %r @@ -171,7 +170,7 @@ define i64 @test17(i64 %a) { define i64 @test18(i64 %a) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[R:%.*]] = call i64 undef(i64 [[A:%.*]]) +; CHECK: [[R:%.*]] = call i64 undef(i64 %a) ; CHECK-NEXT: ret i64 poison ; %r = call i64 (i64) undef(i64 %a) @@ -180,7 +179,7 @@ define i64 @test18(i64 %a) { define <4 x i8> @test19(<4 x i8> %a) { ; CHECK-LABEL: @test19( -; CHECK-NEXT: ret <4 x i8> poison +; CHECK: ret <4 x i8> poison ; %b = shl <4 x i8> %a, ret <4 x i8> %b @@ -188,7 +187,7 @@ define <4 x i8> @test19(<4 x i8> %a) { define i32 @test20(i32 %a) { ; CHECK-LABEL: @test20( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = udiv i32 %a, 0 ret i32 %b @@ -204,7 +203,7 @@ define <2 x i32> @test20vec(<2 x i32> %a) { define i32 @test21(i32 %a) { ; CHECK-LABEL: @test21( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = sdiv i32 %a, 0 ret i32 %b @@ -220,7 +219,7 @@ define <2 x i32> @test21vec(<2 x i32> %a) { define i32 @test22(i32 %a) { ; CHECK-LABEL: @test22( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = ashr exact i32 undef, %a ret i32 %b @@ -228,7 +227,7 @@ define i32 @test22(i32 %a) { define i32 @test23(i32 %a) { ; CHECK-LABEL: @test23( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = lshr exact i32 undef, %a ret i32 %b @@ -236,7 +235,7 @@ define i32 @test23(i32 %a) { define i32 @test24() { ; CHECK-LABEL: @test24( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = udiv i32 undef, 0 ret i32 %b @@ -244,7 +243,7 @@ define i32 @test24() { define i32 @test25() { ; CHECK-LABEL: @test25( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = lshr i32 0, undef ret i32 %b @@ -252,7 +251,7 @@ define i32 @test25() { define i32 @test26() { ; CHECK-LABEL: @test26( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = ashr i32 0, undef ret i32 %b @@ -260,7 +259,7 @@ define i32 @test26() { define i32 @test27() { ; CHECK-LABEL: @test27( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = shl i32 0, undef ret i32 %b @@ -268,7 +267,7 @@ define i32 @test27() { define i32 @test28(i32 %a) { ; CHECK-LABEL: @test28( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl nsw i32 undef, %a ret i32 %b @@ -276,7 +275,7 @@ define i32 @test28(i32 %a) { define i32 @test29(i32 %a) { ; CHECK-LABEL: @test29( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl nuw i32 undef, %a ret i32 %b @@ -284,7 +283,7 @@ define i32 @test29(i32 %a) { define i32 @test30(i32 %a) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl nsw nuw i32 undef, %a ret i32 %b @@ -292,7 +291,7 @@ define i32 @test30(i32 %a) { define i32 @test31(i32 %a) { ; CHECK-LABEL: @test31( -; CHECK-NEXT: ret i32 0 +; CHECK: ret i32 0 ; %b = shl i32 undef, %a ret i32 %b @@ -300,7 +299,7 @@ define i32 @test31(i32 %a) { define i32 @test32(i32 %a) { ; CHECK-LABEL: @test32( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl i32 undef, 0 ret i32 %b @@ -308,7 +307,7 @@ define i32 @test32(i32 %a) { define i32 @test33(i32 %a) { ; CHECK-LABEL: @test33( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = ashr i32 undef, 0 ret i32 %b @@ -316,7 +315,7 @@ define i32 @test33(i32 %a) { define i32 @test34(i32 %a) { ; CHECK-LABEL: @test34( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = lshr i32 undef, 0 ret i32 %b @@ -324,7 +323,7 @@ define i32 @test34(i32 %a) { define i32 @test35(<4 x i32> %V) { ; CHECK-LABEL: @test35( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = extractelement <4 x i32> %V, i32 4 ret i32 %b @@ -332,7 +331,7 @@ define i32 @test35(<4 x i32> %V) { define i32 @test36(i32 %V) { ; CHECK-LABEL: @test36( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = extractelement <4 x i32> undef, i32 %V ret i32 %b @@ -340,7 +339,7 @@ define i32 @test36(i32 %V) { define i32 @test37() { ; CHECK-LABEL: @test37( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = udiv i32 undef, undef ret i32 %b @@ -348,7 +347,7 @@ define i32 @test37() { define i32 @test38(i32 %a) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = udiv i32 %a, undef ret i32 %b @@ -356,7 +355,7 @@ define i32 @test38(i32 %a) { define i32 @test39() { ; CHECK-LABEL: @test39( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = udiv i32 0, undef ret i32 %b diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll index 915ae546beda..6bf2a7718658 100644 --- a/llvm/test/Transforms/SROA/phi-gep.ll +++ b/llvm/test/Transforms/SROA/phi-gep.ll @@ -348,7 +348,7 @@ define void @test_sroa_gep_phi_select_same_block() { ; CHECK-NEXT: [[PHI:%.*]] = phi %pair* [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ] ; CHECK-NEXT: [[SELECT]] = select i1 undef, %pair* [[PHI]], %pair* undef ; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[PHI]], i64 1 -; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 undef, %pair* [[PHI_SROA_GEP]], %pair* poison +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 undef, %pair* [[PHI_SROA_GEP]], %pair* undef ; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[WHILE_BODY]] ; CHECK: exit: ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll index f69cfeb410bd..93cb3420d0af 100644 --- a/llvm/test/Transforms/SROA/select-gep.ll +++ b/llvm/test/Transforms/SROA/select-gep.ll @@ -83,7 +83,7 @@ define i32 @test_sroa_select_gep_undef(i1 %cond) { ; CHECK-LABEL: @test_sroa_select_gep_undef( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* poison +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* undef ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SELECT_SROA_SEL]], align 4 ; CHECK-NEXT: ret i32 [[LOAD]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll index 8a6b1e98c968..b9d82e9f81df 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll @@ -128,7 +128,7 @@ define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_udiv( ; CHECK-NEXT: [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i32 1 @@ -143,7 +143,7 @@ define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_urem( ; CHECK-NEXT: [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i64 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll index 0637b5005683..a400e8f42907 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -131,7 +131,7 @@ define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) { define <2 x i64> @shl_constant_op0(i64 %x) { ; CHECK-LABEL: @shl_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -142,7 +142,7 @@ define <2 x i64> @shl_constant_op0(i64 %x) { define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @shl_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -189,7 +189,7 @@ define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) { define <2 x i64> @shl_constant_op1(i64 %x) { ; CHECK-LABEL: @shl_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -224,7 +224,7 @@ define <2 x i64> @shl_constant_op1_load(i64* %p) { define <2 x i64> @ashr_constant_op0(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -235,7 +235,7 @@ define <2 x i64> @ashr_constant_op0(i64 %x) { define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -246,7 +246,7 @@ define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @ashr_constant_op1(i64 %x) { ; CHECK-LABEL: @ashr_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -268,7 +268,7 @@ define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op0(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -279,7 +279,7 @@ define <2 x i64> @lshr_constant_op0(i64 %x) { define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -290,7 +290,7 @@ define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op1(i64 %x) { ; CHECK-LABEL: @lshr_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -312,7 +312,7 @@ define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op0(i64 %x) { ; CHECK-LABEL: @urem_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -323,7 +323,7 @@ define <2 x i64> @urem_constant_op0(i64 %x) { define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @urem_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -334,7 +334,7 @@ define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op1(i64 %x) { ; CHECK-LABEL: @urem_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -356,7 +356,7 @@ define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op0(i64 %x) { ; CHECK-LABEL: @srem_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -367,7 +367,7 @@ define <2 x i64> @srem_constant_op0(i64 %x) { define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @srem_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -378,7 +378,7 @@ define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op1(i64 %x) { ; CHECK-LABEL: @srem_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -400,7 +400,7 @@ define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op0(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -411,7 +411,7 @@ define <2 x i64> @udiv_constant_op0(i64 %x) { define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -422,7 +422,7 @@ define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op1(i64 %x) { ; CHECK-LABEL: @udiv_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -444,7 +444,7 @@ define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op0(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -455,7 +455,7 @@ define <2 x i64> @sdiv_constant_op0(i64 %x) { define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -466,7 +466,7 @@ define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op1(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll index 4fd33cc7ef28..abebf4d809af 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll @@ -53,7 +53,7 @@ define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) { ; CHECK-NEXT: [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]] ; CHECK-NEXT: [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]] ; CHECK-NEXT: [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]] -; CHECK-NEXT: [[S2:%.*]] = insertelement <2 x i64> poison, i64 [[S2_SCALAR]], i64 1 +; CHECK-NEXT: [[S2:%.*]] = insertelement <2 x i64> undef, i64 [[S2_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[S2]] ; %i0 = insertelement <2 x i64> undef, i64 %w, i64 1 @@ -128,7 +128,7 @@ define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_udiv( ; CHECK-NEXT: [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i32 1 @@ -143,7 +143,7 @@ define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_urem( ; CHECK-NEXT: [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i64 1 diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp index 9eabc7c55638..96d3672647e8 100644 --- a/llvm/unittests/IR/ConstantsTest.cpp +++ b/llvm/unittests/IR/ConstantsTest.cpp @@ -27,7 +27,7 @@ TEST(ConstantsTest, Integer_i1) { Constant* Zero = ConstantInt::get(Int1, 0); Constant* NegOne = ConstantInt::get(Int1, static_cast(-1), true); EXPECT_EQ(NegOne, ConstantInt::getSigned(Int1, -1)); - Constant* Poison = PoisonValue::get(Int1); + Constant* Undef = UndefValue::get(Int1); // Input: @b = constant i1 add(i1 1 , i1 1) // Output: @b = constant i1 false @@ -53,21 +53,21 @@ TEST(ConstantsTest, Integer_i1) { // @g = constant i1 false EXPECT_EQ(Zero, ConstantExpr::getSub(One, One)); - // @h = constant i1 shl(i1 1 , i1 1) ; poison - // @h = constant i1 poison - EXPECT_EQ(Poison, ConstantExpr::getShl(One, One)); + // @h = constant i1 shl(i1 1 , i1 1) ; undefined + // @h = constant i1 undef + EXPECT_EQ(Undef, ConstantExpr::getShl(One, One)); // @i = constant i1 shl(i1 1 , i1 0) // @i = constant i1 true EXPECT_EQ(One, ConstantExpr::getShl(One, Zero)); - // @j = constant i1 lshr(i1 1, i1 1) ; poison - // @j = constant i1 poison - EXPECT_EQ(Poison, ConstantExpr::getLShr(One, One)); + // @j = constant i1 lshr(i1 1, i1 1) ; undefined + // @j = constant i1 undef + EXPECT_EQ(Undef, ConstantExpr::getLShr(One, One)); - // @m = constant i1 ashr(i1 1, i1 1) ; poison - // @m = constant i1 poison - EXPECT_EQ(Poison, ConstantExpr::getAShr(One, One)); + // @m = constant i1 ashr(i1 1, i1 1) ; undefined + // @m = constant i1 undef + EXPECT_EQ(Undef, ConstantExpr::getAShr(One, One)); // @n = constant i1 mul(i1 -1, i1 1) // @n = constant i1 true @@ -218,6 +218,7 @@ TEST(ConstantsTest, AsInstructionsTest) { Constant *Elt = ConstantInt::get(Int16Ty, 2015); Constant *Poison16 = PoisonValue::get(Int16Ty); Constant *Undef64 = UndefValue::get(Int64Ty); + Constant *UndefV16 = UndefValue::get(P6->getType()); Constant *PoisonV16 = PoisonValue::get(P6->getType()); #define P0STR "ptrtoint (i32** @dummy to i32)" @@ -294,8 +295,8 @@ TEST(ConstantsTest, AsInstructionsTest) { EXPECT_EQ(Elt, ConstantExpr::getExtractElement( ConstantExpr::getInsertElement(P6, Elt, One), One)); - EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Two)); - EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Big)); + EXPECT_EQ(UndefV16, ConstantExpr::getInsertElement(P6, Elt, Two)); + EXPECT_EQ(UndefV16, ConstantExpr::getInsertElement(P6, Elt, Big)); EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Undef64)); } From 91f34dabb92d8446142b3c5777fa83e6bcbdfa7e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 2 Feb 2021 18:41:49 -0800 Subject: [PATCH 030/318] workflows: Re-enable lldb test on Mac OS X --- .github/workflows/lldb-tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml index 93fddc2de8c6..68aec6036995 100644 --- a/.github/workflows/lldb-tests.yml +++ b/.github/workflows/lldb-tests.yml @@ -20,14 +20,16 @@ jobs: build_lldb: name: lldb build runs-on: ${{ matrix.os }} + # Workaround for build faliure on Mac OS X: llvm.org/PR46190, https://github.com/actions/virtual-environments/issues/2274 + env: + CPLUS_INCLUDE_PATH: /usr/local/opt/llvm/include/c++/v1:/Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/include strategy: fail-fast: false matrix: os: - ubuntu-latest - windows-latest - # macOS build disabled due to: llvm.org/PR46190 - #- macOS-latest + - macOS-latest steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') From 872608926129a61489d484e15cb9186882578c73 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 3 Feb 2021 03:09:24 +0000 Subject: [PATCH 031/318] workflows: Fix actions repository name for llvm tests --- .github/workflows/llvm-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 1cffc3ef4d97..1fcd67a10078 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -56,7 +56,7 @@ jobs: - name: Get LLVM version id: version - uses: tstellar/actions/get-llvm-version@get-version + uses: llvm/actions/get-llvm-version@main - name: Setup Variables id: vars From 2a57ea296a4787828b52799564d7ddf02ec1c4f3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 1 Feb 2021 13:05:19 +0000 Subject: [PATCH 032/318] workflows: Add job to check for ABI changes in libclang.so and libclang-cpp.so --- .github/workflows/libclang-abi-tests.yml | 132 +++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 .github/workflows/libclang-abi-tests.yml diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml new file mode 100644 index 000000000000..5681c7c8166e --- /dev/null +++ b/.github/workflows/libclang-abi-tests.yml @@ -0,0 +1,132 @@ +name: libclang ABI Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - '.github/workflows/libclang-abi-tests.yml' + pull_request: + paths: + - 'clang/**' + - '.github/workflows/libclang-abi-tests.yml' + +jobs: + abi-dump-setup: + runs-on: ubuntu-latest + outputs: + BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} + ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} + ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }} + BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} + LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} + LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} + steps: + - name: Checkout source + uses: actions/checkout@v1 + with: + fetch-depth: 1 + + - name: Get LLVM version + id: version + uses: llvm/actions/get-llvm-version@main + + - name: Setup Variables + id: vars + run: | + if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then + echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + echo ::set-output name=ABI_HEADERS::clang-c + echo ::set-output name=ABI_LIBS::libclang.so + else + echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + echo ::set-output name=ABI_HEADERS::. + echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so + fi + + abi-dump: + needs: abi-dump-setup + runs-on: ubuntu-latest + strategy: + matrix: + name: + - build-baseline + - build-latest + include: + - name: build-baseline + llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} + ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 + repo: llvm/llvm-project + - name: build-latest + llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} + ref: ${{ github.sha }} + repo: ${{ github.repository }} + steps: + - name: Install Ninja + uses: llvm/actions/install-ninja@main + - name: Install abi-compliance-checker + run: | + sudo apt-get install abi-dumper autoconf pkg-config + - name: Install universal-ctags + run: | + git clone https://github.com/universal-ctags/ctags.git + cd ctags + ./autogen.sh + ./configure + sudo make install + - name: Download source code + uses: llvm/actions/get-llvm-project-src@main + with: + ref: ${{ matrix.ref }} + repo: ${{ matrix.repo }} + - name: Configure + run: | + mkdir install + cmake -B build -S llvm -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm + - name: Build + run: ninja -C build/ ${{ needs.abi-dump-setup.outputs.ABI_LIBS }} install-clang-headers + - name: Dump ABI + run: | + parallel abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o {}-${{ matrix.ref }}.abi ./build/lib/{} ::: ${{ needs.abi-dump-setup.outputs.ABI_LIBS }} + for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do + # Remove symbol versioning from dumps, so we can compare across major versions. + sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' $lib-${{ matrix.ref }}.abi + tar -czf $lib-${{ matrix.ref }}.abi.tar.gz $lib-${{ matrix.ref }}.abi + done + - name: Upload ABI file + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.name }} + path: "*${{ matrix.ref }}.abi.tar.gz" + + abi-compare: + runs-on: ubuntu-latest + needs: + - abi-dump-setup + - abi-dump + steps: + - name: Download baseline + uses: actions/download-artifact@v1 + with: + name: build-baseline + - name: Download latest + uses: actions/download-artifact@v1 + with: + name: build-latest + + - name: Install abi-compliance-checker + run: sudo apt-get install abi-compliance-checker + - name: Compare ABI + run: | + for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do + abi-compliance-checker -lib $lib -old build-baseline/$lib*.abi.tar.gz -new build-latest/$lib*.abi.tar.gz + done + - name: Upload ABI Comparison + if: always() + uses: actions/upload-artifact@v2 + with: + name: compat-report-${{ github.sha }} + path: compat_reports/ + From c1899cd5102dbdacd006fdb33db075319ccc933f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 28 Jan 2021 11:21:21 +0000 Subject: [PATCH 033/318] [X86][AVX] Add PR48908 shuffle test case (cherry picked from commit da8845fc3d3bb0b0e133f020931440511fa72723) --- .../X86/vector-shuffle-combining-avx.ll | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 979c365acfd7..3da83b25d363 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -436,6 +436,157 @@ entry: unreachable } +define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double>* noalias %out0, <4 x double>* noalias %out1, <4 x double>* noalias %out2) { +; X86-AVX1-LABEL: PR48908: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] +; X86-AVX1-NEXT: vmovapd %ymm4, (%edx) +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX1-NEXT: vmovapd %ymm3, (%ecx) +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X86-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X86-AVX1-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX1-NEXT: vzeroupper +; X86-AVX1-NEXT: retl +; +; X86-AVX2-LABEL: PR48908: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1] +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3] +; X86-AVX2-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0] +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX2-NEXT: vmovapd %ymm3, (%ecx) +; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X86-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X86-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X86-AVX2-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX2-NEXT: vzeroupper +; X86-AVX2-NEXT: retl +; +; X86-AVX512-LABEL: PR48908: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; X86-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; X86-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2] +; X86-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3] +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,0,3,0,8,0,1,0] +; X86-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3 +; X86-AVX512-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,0,3,0,10,0,1,0] +; X86-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4 +; X86-AVX512-NEXT: vmovapd %ymm4, (%ecx) +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u> +; X86-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3 +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0] +; X86-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0 +; X86-AVX512-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX512-NEXT: vzeroupper +; X86-AVX512-NEXT: retl +; +; X64-AVX1-LABEL: PR48908: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] +; X64-AVX1-NEXT: vmovapd %ymm4, (%rdi) +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX1-NEXT: vmovapd %ymm3, (%rsi) +; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X64-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X64-AVX1-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: PR48908: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1] +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3] +; X64-AVX2-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0] +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX2-NEXT: vmovapd %ymm3, (%rsi) +; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X64-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X64-AVX2-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: PR48908: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; X64-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; X64-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; X64-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2] +; X64-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3] +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,3,8,1] +; X64-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3 +; X64-AVX512-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1] +; X64-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4 +; X64-AVX512-NEXT: vmovapd %ymm4, (%rsi) +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,11,u,u> +; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3 +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3] +; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0 +; X64-AVX512-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %t0 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> + %t1 = shufflevector <4 x double> %v1, <4 x double> %v2, <4 x i32> + %r0 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> + store <4 x double> %r0, <4 x double>* %out0, align 32 + %r1 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> + store <4 x double> %r1, <4 x double>* %out1, align 32 + %t2 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> + %r2 = shufflevector <4 x double> %t2, <4 x double> %v2, <4 x i32> + store <4 x double> %r2, <4 x double>* %out2, align 32 + ret void +} + define <4 x i64> @concat_self_v4i64(<2 x i64> %x) { ; AVX1-LABEL: concat_self_v4i64: ; AVX1: # %bb.0: From 52a70a07e93c322ad137bce1a1ff2f1c9fdf6050 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 28 Jan 2021 12:11:31 +0000 Subject: [PATCH 034/318] [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - don't merge VPERMILPD ops with different low/high masks. Unlike VPERMILPS, VPERMILPD can have non-repeating masks in each 128-bit subvector, we weren't accounting for this when folding vperm2f128(vpermilpd(x,c),vpermilpd(y,c)) -> vpermilpd(vperm2f128(x,y),c). I'm intending to add support for this but wanted to get a minimal fix in first for merging into 12.xx. Fixes PR48908 (cherry picked from commit 6663330bc8c84a75ea092272297b557bfc310380) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++++- .../X86/vector-shuffle-combining-avx.ll | 40 ++++++++++--------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0dd20235aa3c..6b816c710f98 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36916,11 +36916,18 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res)); return DAG.getBitcast(VT, Res); } + case X86ISD::VPERMILPI: + // TODO: Handle v4f64 permutes with different low/high lane masks. + if (SrcVT0 == MVT::v4f64) { + uint64_t Mask = Src0.getConstantOperandVal(1); + if ((Mask & 0x3) != ((Mask >> 2) & 0x3)) + break; + } + LLVM_FALLTHROUGH; case X86ISD::VSHLI: case X86ISD::VSRLI: case X86ISD::VSRAI: case X86ISD::PSHUFD: - case X86ISD::VPERMILPI: if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) { SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0)); SDValue RHS = diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 3da83b25d363..1a1153d0e886 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -442,16 +442,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] -; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 -; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] -; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm5 +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3] ; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] -; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] -; X86-AVX1-NEXT: vmovapd %ymm4, (%edx) -; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] -; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] -; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3] +; X86-AVX1-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm4[2,3,0,1] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm4[0,1],ymm0[2],ymm4[3] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3] ; X86-AVX1-NEXT: vmovapd %ymm3, (%ecx) ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] @@ -513,16 +515,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x ; ; X64-AVX1-LABEL: PR48908: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] -; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 -; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] -; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm5 +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3] ; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] -; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] -; X64-AVX1-NEXT: vmovapd %ymm4, (%rdi) -; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] -; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] -; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3] +; X64-AVX1-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm4[2,3,0,1] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm4[0,1],ymm0[2],ymm4[3] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3] ; X64-AVX1-NEXT: vmovapd %ymm3, (%rsi) ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] From 0564dd904bf7ef7758cb904ed8f7f2a1f915ef8d Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Fri, 29 Jan 2021 08:44:56 +0100 Subject: [PATCH 035/318] [OpenMP] Fix python3 compatibility in openmp's lit.cfg Differential Revision: https://reviews.llvm.org/D95669 (cherry picked from commit c3c02d0d5a313272f6d35926bdf678fc6b884c02) --- openmp/runtime/test/lit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index 0d4a6107ff2b..c4e5fe1ea9e0 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -76,7 +76,7 @@ if config.operating_system == 'Darwin': cmd = subprocess.Popen(['xcrun', '--show-sdk-path'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = cmd.communicate() - out = out.strip() + out = out.strip().decode() res = cmd.wait() if res == 0 and out: config.test_flags += " -isysroot " + out From e3658cefc5bc3538d05fc8ef058d83bcd24b785a Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sat, 30 Jan 2021 12:34:06 +0900 Subject: [PATCH 036/318] [VE] Change inetger constants 32-bit friendly Correct integer constants like `1UL << 63` to `UINT64_C(1) << 63` in order to make them work on 32-bit machines. Tested on both an i386 and x86_64 machines. Reviewed By: mgorny Differential Revision: https://reviews.llvm.org/D95724 (cherry picked from commit 4648098f97fa2a7c08c04632c70cf29293528812) --- llvm/lib/Target/VE/VE.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h index a404f7ced70a..8c1fa840f19c 100644 --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -334,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) { return true; } // (m)1 patterns - return (Val & (1UL << 63)) && isShiftedMask_64(Val); + return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val); } inline static bool isMImm32Val(uint32_t Val) { @@ -347,14 +347,14 @@ inline static bool isMImm32Val(uint32_t Val) { return true; } // (m)1 patterns - return (Val & (1 << 31)) && isShiftedMask_32(Val); + return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val); } /// val2MImm - Convert an integer immediate value to target MImm immediate. inline static uint64_t val2MImm(uint64_t Val) { if (Val == 0) return 0; // (0)1 - if (Val & (1UL << 63)) + if (Val & (UINT64_C(1) << 63)) return countLeadingOnes(Val); // (m)1 return countLeadingZeros(Val) | 0x40; // (m)0 } @@ -364,8 +364,8 @@ inline static uint64_t mimm2Val(uint64_t Val) { if (Val == 0) return 0; // (0)1 if ((Val & 0x40) == 0) - return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1 - return ((uint64_t)(-1L) >> (Val & 0x3f)); // (m)0 + return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1 + return ((uint64_t)INT64_C(-1) >> (Val & 0x3f)); // (m)0 } inline unsigned M0(unsigned Val) { return Val + 64; } From b351efcae08a59c0cafa123a92b24c5f2300202b Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Wed, 27 Jan 2021 23:08:39 -0600 Subject: [PATCH 037/318] [PowerPC] Do not emit XXSPLTI32DX for sub 64-bit constants If the APInt returned by BuildVectorSDNode::isConstantSplat() is narrower than 64 bits, the result produced by XXSPLTI32DX is incorrect. The result returned by the function appears to be incorrect and we'll investigate/fix it in a follow-up commit. However, since this causes miscompiles, we must temporarily disable emitting this instruction for such values. (cherry picked from commit 54e570d94af995ff58287a8288389641910a8239) --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 ++- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll | 22 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9215c17cb94b..663ee15db11e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8613,7 +8613,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); return DAG.getBitcast(Op.getValueType(), SplatNode); - } else { // We may lose precision, so we have to use XXSPLTI32DX. + } else if (APSplatBits.getBitWidth() == 64) { + // We may lose precision, so we have to use XXSPLTI32DX. uint32_t Hi = (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll index 420a96dc1495..081cae729acf 100644 --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -100,3 +100,25 @@ define dso_local <8 x i16> @test_xxsplti32dx_9() { entry: ret <8 x i16> } + +define dso_local <16 x i8> @test_xxsplti32dx_10() { +; CHECK-LABEL: test_xxsplti32dx_10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NEXT: xxsplti32dx vs34, 0, 1207959552 +; CHECK-NEXT: blr +entry: + ret <16 x i8> +} + +; FIXME: It appears that there is something wrong with the computation +; of the 64-bit constant to splat so we cannot emit xxsplti32dx for +; this test case for now. +define dso_local <16 x i8> @constSplatBug() { +; CHECK-LABEL: constSplatBug: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv vs34, .LCPI10_0@PCREL(0), 1 +; CHECK-NEXT: blr +entry: + ret <16 x i8> +} From dfb763363bc560769605e37e96c1d13cb236223d Mon Sep 17 00:00:00 2001 From: Albion Fung Date: Thu, 28 Jan 2021 15:17:18 -0500 Subject: [PATCH 038/318] [PowerPC][Power10] Fix XXSPLI32DX not correctly exploiting specific cases Some cases may be transformed into 32 bit splats before hitting the boolean statement, which may cause incorrect behaviour and provide XXSPLTI32DX with the incorrect values of splat. The condition was reversed so that the shortcut prevents this problem. Differential Revision: https://reviews.llvm.org/D95634 (cherry picked from commit 2e470e03b49f1d79ebc315ca9d62a690a633c0cd) --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++++++---- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll | 16 ++-------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 663ee15db11e..929a72ac687e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8604,16 +8604,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same - // double. This is to exploit the XXSPLTIDP instruction.+ // If we lose precision, we use XXSPLTI32DX. + // double. This is to exploit the XXSPLTIDP instruction. + // If we lose precision, we use XXSPLTI32DX. if (BVNIsConstantSplat && (SplatBitSize == 64) && Subtarget.hasPrefixInstrs()) { - if (convertToNonDenormSingle(APSplatBits) && - (Op->getValueType(0) == MVT::v2f64)) { + // Check the type first to short-circuit so we don't modify APSplatBits if + // this block isn't executed. + if ((Op->getValueType(0) == MVT::v2f64) && + convertToNonDenormSingle(APSplatBits)) { SDValue SplatNode = DAG.getNode( PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); return DAG.getBitcast(Op.getValueType(), SplatNode); - } else if (APSplatBits.getBitWidth() == 64) { + } else { // We may lose precision, so we have to use XXSPLTI32DX. uint32_t Hi = diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll index 081cae729acf..ce4c2da24b0d 100644 --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -101,23 +101,11 @@ entry: ret <8 x i16> } -define dso_local <16 x i8> @test_xxsplti32dx_10() { -; CHECK-LABEL: test_xxsplti32dx_10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlxor vs34, vs34, vs34 -; CHECK-NEXT: xxsplti32dx vs34, 0, 1207959552 -; CHECK-NEXT: blr -entry: - ret <16 x i8> -} - -; FIXME: It appears that there is something wrong with the computation -; of the 64-bit constant to splat so we cannot emit xxsplti32dx for -; this test case for now. define dso_local <16 x i8> @constSplatBug() { ; CHECK-LABEL: constSplatBug: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI10_0@PCREL(0), 1 +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NEXT: xxsplti32dx vs34, 0, 1191182336 ; CHECK-NEXT: blr entry: ret <16 x i8> From 237b39a02f38b4903f39fef362d0f5e98e1de194 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Fri, 29 Jan 2021 21:59:49 +0800 Subject: [PATCH 039/318] [RISCV] Update the version number to v0.10 for vector. v0.10 is tagged in V specification. Update the version to v0.10. Differential Revision: https://reviews.llvm.org/D95680 (cherry picked from commit 282aca10aeb03bdaef0a8d4f3faa4c2ff236e527) --- clang/lib/Basic/Targets/RISCV.cpp | 6 +++--- clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 2 +- clang/test/Driver/riscv-arch.c | 6 +++--- .../test/Preprocessor/riscv-target-features.c | 18 +++++++++--------- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 6 +++--- .../RISCV/MCTargetDesc/RISCVTargetStreamer.cpp | 6 +++--- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 2 +- llvm/test/CodeGen/RISCV/attributes.ll | 8 ++++---- llvm/test/MC/RISCV/attribute-arch.s | 8 ++++---- 9 files changed, 31 insertions(+), 31 deletions(-) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 0bf02e605740..786201ea340d 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -150,7 +150,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, } if (HasV) { - Builder.defineMacro("__riscv_v", "1000000"); + Builder.defineMacro("__riscv_v", "10000"); Builder.defineMacro("__riscv_vector"); } @@ -191,10 +191,10 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__riscv_zfh", "1000"); if (HasZvamo) - Builder.defineMacro("__riscv_zvamo", "1000000"); + Builder.defineMacro("__riscv_zvamo", "10000"); if (HasZvlsseg) - Builder.defineMacro("__riscv_zvlsseg", "1000000"); + Builder.defineMacro("__riscv_zvlsseg", "10000"); } /// Return true if has this feature, need to sync with handleTargetFeatures. diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index ffae47e5672e..c7f2a3ea5e02 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -63,7 +63,7 @@ isExperimentalExtension(StringRef Ext) { Ext == "zbr" || Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc") return RISCVExtensionVersion{"0", "93"}; if (Ext == "v" || Ext == "zvamo" || Ext == "zvlsseg") - return RISCVExtensionVersion{"1", "0"}; + return RISCVExtensionVersion{"0", "10"}; if (Ext == "zfh") return RISCVExtensionVersion{"0", "1"}; return None; diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 3762a4aef1b3..cf148ca885d0 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -384,7 +384,7 @@ // RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1' // RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v" @@ -412,7 +412,7 @@ // RV32-EXPERIMENTAL-ZVAMO-BADVERS: error: invalid arch name 'rv32izvamo0p1' // RV32-EXPERIMENTAL-ZVAMO-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVAMO-GOODVERS %s // RV32-EXPERIMENTAL-ZVAMO-GOODVERS: "-target-feature" "+experimental-zvamo" @@ -431,6 +431,6 @@ // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: error: invalid arch name 'rv32izvlsseg0p1' // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS %s // RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS: "-target-feature" "+experimental-zvlsseg" diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 006395505246..88826bbd60b8 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -110,23 +110,23 @@ // CHECK-DOUBLE-NOT: __riscv_float_abi_single // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv32iv1p0 -x c -E -dM %s \ +// RUN: -march=rv32iv0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv1p0 -x c -E -dM %s \ +// RUN: -march=rv64iv0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// CHECK-V-EXT: __riscv_v 1000000 +// CHECK-V-EXT: __riscv_v 10000 // CHECK-V-EXT: __riscv_vector 1 -// CHECK-V-EXT: __riscv_zvamo 1000000 -// CHECK-V-EXT: __riscv_zvlsseg 1000000 +// CHECK-V-EXT: __riscv_zvamo 10000 +// CHECK-V-EXT: __riscv_zvlsseg 10000 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izba0p93 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZBA-EXT %s diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index e7e590153605..dcf7525d7458 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2126,7 +2126,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() { if (getFeatureBits(RISCV::FeatureStdExtB)) formalArchStr = (Twine(formalArchStr) + "_b0p93").str(); if (getFeatureBits(RISCV::FeatureStdExtV)) - formalArchStr = (Twine(formalArchStr) + "_v1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_v0p10").str(); if (getFeatureBits(RISCV::FeatureExtZfh)) formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str(); if (getFeatureBits(RISCV::FeatureExtZba)) @@ -2152,9 +2152,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() { if (getFeatureBits(RISCV::FeatureExtZbt)) formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str(); if (getFeatureBits(RISCV::FeatureExtZvamo)) - formalArchStr = (Twine(formalArchStr) + "_zvamo1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str(); if (getFeatureBits(RISCV::FeatureStdExtZvlsseg)) - formalArchStr = (Twine(formalArchStr) + "_zvlsseg1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str(); getTargetStreamer().emitTextAttribute(Tag, formalArchStr); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 72434a15bedb..13c4b84aa300 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -63,7 +63,7 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { if (STI.hasFeature(RISCV::FeatureStdExtB)) Arch += "_b0p93"; if (STI.hasFeature(RISCV::FeatureStdExtV)) - Arch += "_v1p0"; + Arch += "_v0p10"; if (STI.hasFeature(RISCV::FeatureExtZfh)) Arch += "_zfh0p1"; if (STI.hasFeature(RISCV::FeatureExtZba)) @@ -89,9 +89,9 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { if (STI.hasFeature(RISCV::FeatureExtZbt)) Arch += "_zbt0p93"; if (STI.hasFeature(RISCV::FeatureExtZvamo)) - Arch += "_zvamo1p0"; + Arch += "_zvamo0p10"; if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg)) - Arch += "_zvlsseg1p0"; + Arch += "_zvlsseg0p10"; emitTextAttribute(RISCVAttrs::ARCH, Arch); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 4f9e9cfbdb98..e02c9f8bcbe2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file describes the RISC-V instructions from the standard 'V' Vector -/// extension, version 0.9. +/// extension, version 0.10. /// This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index a0943d5d4293..c26a6d5b4a69 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -47,7 +47,7 @@ ; RV32D: .attribute 5, "rv32i2p0_f2p0_d2p0" ; RV32C: .attribute 5, "rv32i2p0_c2p0" ; RV32B: .attribute 5, "rv32i2p0_b0p93_zba0p93_zbb0p93_zbc0p93_zbe0p93_zbf0p93_zbm0p93_zbp0p93_zbr0p93_zbs0p93_zbt0p93" -; RV32V: .attribute 5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0" +; RV32V: .attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" ; RV32ZFH: .attribute 5, "rv32i2p0_f2p0_zfh0p1" ; RV32ZBA: .attribute 5, "rv32i2p0_zba0p93" ; RV32ZBB: .attribute 5, "rv32i2p0_zbb0p93" @@ -60,7 +60,7 @@ ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93" ; RV32ZBS: .attribute 5, "rv32i2p0_zbs0p93" ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93" -; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v1p0_zfh0p1_zbb0p93_zvamo1p0_zvlsseg1p0" +; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v0p10_zfh0p1_zbb0p93_zvamo0p10_zvlsseg0p10" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64A: .attribute 5, "rv64i2p0_a2p0" @@ -80,8 +80,8 @@ ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93" ; RV64ZBS: .attribute 5, "rv64i2p0_zbs0p93" ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93" -; RV64V: .attribute 5, "rv64i2p0_v1p0_zvamo1p0_zvlsseg1p0" -; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v1p0_zfh0p1_zbb0p93_zvamo1p0_zvlsseg1p0" +; RV64V: .attribute 5, "rv64i2p0_v0p10_zvamo0p10_zvlsseg0p10" +; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v0p10_zfh0p1_zbb0p93_zvamo0p10_zvlsseg0p10" define i32 @addi(i32 %a) { diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 66d7ad576382..51d0c6ace9e1 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -40,7 +40,7 @@ # CHECK: attribute 5, "rv32i2p0_b0p93_zba0p93_zbb0p93_zbc0p93_zbe0p93_zbf0p93_zbm0p93_zbp0p93_zbr0p93_zbs0p93_zbt0p93" .attribute arch, "rv32iv" -# CHECK: attribute 5, "rv32i2p0_v1p0" +# CHECK: attribute 5, "rv32i2p0_v0p10" .attribute arch, "rv32izba" # CHECK: attribute 5, "rv32i2p0_zba0p93" @@ -79,7 +79,7 @@ # CHECK: attribute 5, "rv32i2p0_f2p0_zfh0p1" .attribute arch, "rv32ivzvamo_zvlsseg" -# CHECK: attribute 5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0" +# CHECK: attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" -.attribute arch, "rv32iv_zvamo1p0_zvlsseg" -# CHECK: attribute 5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0" +.attribute arch, "rv32iv_zvamo0p10_zvlsseg" +# CHECK: attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" From c738c8aa9bf387cc960feca81bc5263e8c634e15 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Sat, 30 Jan 2021 07:54:41 +0800 Subject: [PATCH 040/318] [RISCV] Update the version number to v0.10 for vector. (cherry picked from commit 9847023660467a4469b5667bcf7a4c73a4780037) --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 06e4d053d5d7..9fdfc2727d86 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file contains the required infrastructure to support code generation -/// for the standard 'V' (Vector) extension, version 0.9. This version is still +/// for the standard 'V' (Vector) extension, version 0.10. This version is still /// experimental as the 'V' extension hasn't been ratified yet. /// /// This file is included from RISCVInstrInfoV.td diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index aea3d0e17ccc..79a1e6ddc8a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -8,7 +8,7 @@ /// /// This file contains the required infrastructure and SDNode patterns to /// support code generation for the standard 'V' (Vector) extension, version -/// 0.9. This version is still experimental as the 'V' extension hasn't been +/// 0.10. This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// /// This file is included from and depends upon RISCVInstrInfoVPseudos.td From c5904f5c9d32e563e2898e1242d5818e488fe2ee Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Sat, 16 Jan 2021 16:08:40 +0000 Subject: [PATCH 041/318] [LV] Fix crash when computing max VF too early D90687 introduced a crash: llvm::LoopVectorizationCostModel::computeMaxVF(llvm::ElementCount, unsigned int): Assertion `WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() && "No decisions should have been taken at this point"' failed. when compiling the following C code: typedef struct { char a; } b; b *c; int d, e; int f() { int g = 0; for (; d; d++) { e = 0; for (; e < c[d].a; e++) g++; } return g; } with: clang -Os -target hexagon -mhvx -fvectorize -mv67 testcase.c -S -o - This occurred since prior to D90687 computeFeasibleMaxVF would only be called in computeMaxVF when a scalar epilogue was allowed, but now it's always called. This causes the assert above since computeFeasibleMaxVF collects all viable VFs larger than the default MaxVF, and for each VF calculates the register usage which results in analysis being done the assert above guards against. This can occur in computeFeasibleMaxVF if TTI.shouldMaximizeVectorBandwidth and this target hook is implemented in the hexagon backend to always return true. Reported by @iajbar. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D94869 (cherry picked from commit 8cda227432f1c9ceb63b88802ed8136da97274f1) --- .../Transforms/Vectorize/LoopVectorize.cpp | 7 ++--- .../LoopVectorize/Hexagon/maximum-vf-crash.ll | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ea0d7673edf6..47635dbdda02 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5504,11 +5504,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return None; } - ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); - switch (ScalarEpilogueStatus) { case CM_ScalarEpilogueAllowed: - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); case CM_ScalarEpilogueNotAllowedUsePredicate: LLVM_FALLTHROUGH; case CM_ScalarEpilogueNotNeededUsePredicate: @@ -5546,7 +5544,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a " "scalar epilogue instead.\n"); ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); } return None; } @@ -5563,6 +5561,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } + ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); assert(!MaxVF.isScalable() && "Scalable vectors do not yet support tail folding"); assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll new file mode 100644 index 000000000000..5f8c5d329edf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll @@ -0,0 +1,29 @@ +; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -S < %s 2>&1 | FileCheck %s + +; Check that we don't crash. + +; CHECK-LABEL: @f +; CHECK: vector.body + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +; Function Attrs: optsize +define i32 @f() #0 { +entry: + br label %loop + +loop: + %g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ] + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %0 = load i8, i8* undef, align 1 + %g.1.lcssa = add i32 %g.016, undef + %iv.next = add nsw i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret i32 %g.1.lcssa +} + +attributes #0 = { optsize "target-features"="+hvx-length128b" } From b15f3fc5c71dc8a9db7e931e2922a065293e4a64 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Wed, 27 Jan 2021 16:47:21 +0000 Subject: [PATCH 042/318] [X86] Fix disassembly of x86-64 GDTLS code sequence For x86-64 the REX.w prefix takes precedence over any other size override (i.e. 0x66). Therefore, for x86-64 when REX.w is present set 'hasOpSize' to false to ensure that any size override is ignored. Fixes PR48901. Differential Revision: https://reviews.llvm.org/D95682 (cherry picked from commit 94fedd266125a5425aa33e11332bf414f0b6dc35) --- .../X86/Disassembler/X86Disassembler.cpp | 1 + llvm/test/MC/Disassembler/X86/x86-64.txt | 10 +++++++--- .../llvm-objdump/X86/disassemble-gdtls.s | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 05e482a6b66e..4e6d8e8e1a54 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) { insn->addressSize = (insn->hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; + insn->hasOpSize = false; } else { insn->registerSize = (insn->hasOpSize ? 2 : 4); insn->addressSize = (insn->hasAdSize ? 4 : 8); diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt index d91ef2500d99..5e56d4c796e6 100644 --- a/llvm/test/MC/Disassembler/X86/x86-64.txt +++ b/llvm/test/MC/Disassembler/X86/x86-64.txt @@ -329,8 +329,10 @@ # CHECK: callw 32767 0x66 0xe8 0xff 0x7f -# CHECK: callw 32767 -0x66 0x66 0x48 0xe8 0xff 0x7f +# TODO: Should display data16 prefixes. +# CHECK-NOT: data16 +# CHECK: callq 32767 +0x66 0x66 0x48 0xe8 0xff 0x7f 0x00 0x00 # CHECK: jmp -32769 0xe9 0xff 0x7f 0xff 0xff @@ -338,8 +340,10 @@ # CHECK: jmp 32767 0x66 0xe9 0xff 0x7f +# TODO: Should display data16 prefixes. +# CHECK-NOT: data16 # CHECK: jmp 32767 -0x66 0x66 0x48 0xe9 0xff 0x7f +0x66 0x66 0x48 0xe9 0xff 0x7f 0x00 0x00 # CHECK: jo -32769 0x0f 0x80 0xff 0x7f 0xff 0xff diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s new file mode 100644 index 000000000000..e913f5f6a345 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -filetype=obj -triple=x86_64 | llvm-objdump -d - | FileCheck %s + +# CHECK: : +# TODO: Should display data16 prefixes. +# CHECK-NEXT: 0: 66 48 8d 3d 00 00 00 00 leaq (%rip), %rdi # 8 +# CHECK-NEXT: 8: 66 66 48 e8 00 00 00 00 callq 0x10 +# CHECK-EMPTY: + +PR48901: + data16 + leaq bar@TLSGD(%rip),%rdi + data16 + data16 + rex64 + callq __tls_get_addr@PLT + +.section .tdata,"awT",@progbits +bar: +.long 42 From e2d822c3bdf6388c6ef21f35745105aba064d16d Mon Sep 17 00:00:00 2001 From: Haowei Wu Date: Thu, 28 Jan 2021 14:13:20 -0800 Subject: [PATCH 043/318] [elfabi] Fix tests which failed on different timezones This patch fixes elfabi tests on machines using a GMT+X timezone settings. Differential Revision: https://reviews.llvm.org/D95641 (cherry picked from commit 771b35965457ebd5faaed8a1c3d2bcefffe721a3) --- llvm/test/tools/llvm-elfabi/preserve-dates-stub.test | 4 ++-- llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test index c399029e0337..9742a61aa281 100644 --- a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test +++ b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test @@ -1,9 +1,9 @@ ## Test writing unchanged content to ELF Stub file with --write-if-changed flag. # RUN: llvm-elfabi %s --output-target=elf64-little %t -# RUN: touch -m -t 197001010000 %t +# RUN: env TZ=GMT touch -m -t 197001010000 %t # RUN: llvm-elfabi %s --output-target=elf64-little %t --write-if-changed -# RUN: ls -l %t | FileCheck %s +# RUN: env TZ=GMT ls -l %t | FileCheck %s --- !tapi-tbe TbeVersion: 1.0 diff --git a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test index 89cad7733eee..3ec190067c73 100644 --- a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test +++ b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test @@ -1,8 +1,8 @@ ## Test writing unchanged content to TBE file with --write-if-changed flag. # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t -# RUN: touch -m -t 197001010000 %t +# RUN: env TZ=GMT touch -m -t 197001010000 %t # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t --write-if-changed -# RUN: ls -l %t | FileCheck %s +# RUN: env TZ=GMT ls -l %t | FileCheck %s # CHECK: {{[[:space:]]1970}} From 12b6579b79dc21e9e54e74520ece0d571a640d4b Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Wed, 27 Jan 2021 18:49:41 -0500 Subject: [PATCH 044/318] [OpenMP][Libomptarget] Fix conditional in CMake for remote plugin The remote offloading plugin's CMakeLists was trying to build if its flag was enabled even if it didn't find gRPC/protobuf. The conditional was wrong, it's fixed by this. Differential Revision: https://reviews.llvm.org/D95574 (cherry picked from commit 8a77056256d9970387595a5c729d894e3fe07131) --- openmp/libomptarget/plugins/remote/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/openmp/libomptarget/plugins/remote/CMakeLists.txt b/openmp/libomptarget/plugins/remote/CMakeLists.txt index 1baa1125f44c..989c74642c66 100644 --- a/openmp/libomptarget/plugins/remote/CMakeLists.txt +++ b/openmp/libomptarget/plugins/remote/CMakeLists.txt @@ -42,12 +42,13 @@ if (Protobuf_FOUND AND gRPC_FOUND AND PROTOC AND GRPC_CPP_PLUGIN) set(GRPC_INCLUDE_DIR ${directory} ) + + set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/) + set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/) + + add_subdirectory(src) + add_subdirectory(server) else() libomptarget_say("Not building remote offloading plugin: required libraries were not found.") endif() -set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/) -set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/) - -add_subdirectory(src) -add_subdirectory(server) From 4d0874c72a0a3f53eb3084a1ea3ee4456ab6e004 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 08:13:28 -0500 Subject: [PATCH 045/318] [OpenMP][NVPTX] Added the missing -O1 when building NVPTX bitcode libraries In the past `-O1` was used when building NVPTX bitcode libraries. After we switched to OpenMP, `-O1` was missing by mistake, leading to a huge performance regression. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D95545 (cherry picked from commit 5a64794bbad4010778406dfee7748e6080258dbf) --- .../libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 23efbba29d66..eeda137ef120 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -126,14 +126,14 @@ set(cuda_src_files ) # Set flags for LLVM Bitcode compilation. -set(bc_flags -S -x c++ - -target nvptx64 - -Xclang -emit-llvm-bc - -Xclang -aux-triple -Xclang ${aux_triple} - -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device - -D__CUDACC__ - -I${devicertl_base_directory} - -I${devicertl_nvptx_directory}/src) +set(bc_flags -S -x c++ -O1 -std=c++14 + -target nvptx64 + -Xclang -emit-llvm-bc + -Xclang -aux-triple -Xclang ${aux_triple} + -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device + -D__CUDACC__ + -I${devicertl_base_directory} + -I${devicertl_nvptx_directory}/src) if(${LIBOMPTARGET_NVPTX_DEBUG}) list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=-1) From 5d926bb3c46848c704833e0f02884395609388a3 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 08:12:39 -0500 Subject: [PATCH 046/318] [OpenMP][deviceRTLs] Added `[[clang::loader_uninitialized]]` explicitly `[[clang::loader_uninitialized]]` is in macro `SHARED` but it doesn't work for array like `parallelLevel`, so the variable will be zero initialized. There is also a similar issue for `omptarget_nvptx_device_State` which is in global address space. Its c'tor is also generated, which was not in the past when building the `deviceRTLs` with CUDA. In this patch, we added the attribute to the two variables explicitly. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95550 (cherry picked from commit 19248d30e4ed5250fa84abbbd52fc7b835918a45) --- openmp/libomptarget/deviceRTLs/common/src/omp_data.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu index b91afd7476fe..4736d07108e0 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu @@ -25,7 +25,8 @@ DEVICE omptarget_device_environmentTy omptarget_device_environment; // global data holding OpenMP state information //////////////////////////////////////////////////////////////////////////////// -DEVICE +// OpenMP will try to call its ctor if we don't add the attribute explicitly +[[clang::loader_uninitialized]] DEVICE omptarget_nvptx_Queue omptarget_nvptx_device_State[MAX_SM]; @@ -33,7 +34,9 @@ DEVICE omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager; DEVICE uint32_t SHARED(usedMemIdx); DEVICE uint32_t SHARED(usedSlotIdx); -DEVICE uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; +// SHARED doesn't work with array so we add the attribute explicitly. +[[clang::loader_uninitialized]] DEVICE uint8_t + parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; #pragma omp allocate(parallelLevel) allocator(omp_pteam_mem_alloc) DEVICE uint16_t SHARED(threadLimit); DEVICE uint16_t SHARED(threadsInTeam); From 255f7398845a7cfb47aef53e40b68057ec56839e Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 29 Jan 2021 13:12:47 -0500 Subject: [PATCH 047/318] [OpenMP][NFC] Added release note for new `deviceRTLs` and hidden helper task Added release note for new `deviceRTLs` and hidden helper task for LLVM 12. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95584 (cherry picked from commit 7bc31018f71cac22b7060c49cefb6f3d0d2e2069) --- openmp/docs/ReleaseNotes.rst | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/openmp/docs/ReleaseNotes.rst b/openmp/docs/ReleaseNotes.rst index 7f40d3c81510..cb3464ad84f0 100644 --- a/openmp/docs/ReleaseNotes.rst +++ b/openmp/docs/ReleaseNotes.rst @@ -7,7 +7,7 @@ OpenMP 12.0.0 Release Notes These are in-progress notes for the upcoming LLVM 12.0.0 release. Release notes for previous releases can be found on `the Download Page `_. - + Introduction ============ @@ -44,3 +44,27 @@ Non-comprehensive list of changes in this release ``LIBOMPTARGET_INFO`` allows the user to request certain information from the ``libomptarget`` runtime using a 32-bit field. A full description of each environment variable is described :ref:`here `. + +- ``target nowait`` was supported via hidden helper task, which is a task not + bound to any parallel region. A hidden helper team with a number of threads is + created when the first hidden helper task is encountered. The number of threads + can be configured via the environment variable + ``LIBOMP_NUM_HIDDEN_HELPER_THREADS``. By default it is 8. If + ``LIBOMP_NUM_HIDDEN_HELPER_THREADS=0``, hidden helper task is disabled and + falls back to a regular OpenMP task. It can also be disabled by setting the + environment variable ``LIBOMP_USE_HIDDEN_HELPER_TASK=OFF``. + +- ``deviceRTLs`` for NVPTX platform is CUDA free now. It is generally OpenMP code. + Target dependent parts are implemented with Clang/LLVM/NVVM intrinsics. CUDA + SDK is also dropped as a dependence to build the device runtime, which means + device runtime can also be built on a CUDA free system. However, it is + disabled by default. Set the CMake variable + ``LIBOMPTARGET_BUILD_NVPTX_BCLIB=ON`` to enable the build of NVPTX device + runtime on a CUDA free system. ``gcc-multilib`` and ``g++-multilib`` are + required. If CUDA is found, the device runtime will be built by default. + + - Static NVPTX device runtime library (``libomptarget-nvptx.a``) was dropped. + A bitcode library is required to build an OpenMP program. If the library is + not found in the default path or any of the paths defined by ``LIBRARY_PATH``, + an error will be raised. User can also specify the path to the bitcode device + library via ``--libomptarget-nvptx-bc-path=``. From 922e4149d16754b54ce225faa3e769d32937d7ad Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 1 Feb 2021 10:31:09 -0500 Subject: [PATCH 048/318] [OpenMP] Fix seg fault in libomptarget when using Info with multiple threads Summary: One option for the LIBOMPTARGET_INFO environment variable is to print the current status of the device's data mappings. These are a shared resource among threads so this needs to be protected when using multiple streams. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95786 (cherry picked from commit fda48539988d2a1bdb6395799151e9090312a20b) --- openmp/libomptarget/src/interface.cpp | 4 ++-- openmp/libomptarget/src/private.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index cf6d36960c75..01f3715d6bcc 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -58,7 +58,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) { case tgt_mandatory: if (!success) { if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE) - for (const auto &Device : PM->Devices) + for (auto &Device : PM->Devices) dumpTargetPointerMappings(loc, Device); else FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target " @@ -76,7 +76,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) { 1, "failure of target construct while offloading is mandatory"); } else { if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE) - for (const auto &Device : PM->Devices) + for (auto &Device : PM->Devices) dumpTargetPointerMappings(loc, Device); } break; diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index fb6f681d3020..3b0e57dfe15e 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -99,7 +99,7 @@ int __kmpc_get_target_offload(void) __attribute__((weak)); //////////////////////////////////////////////////////////////////////////////// /// dump a table of all the host-target pointer pairs on failure static inline void dumpTargetPointerMappings(const ident_t *Loc, - const DeviceTy &Device) { + DeviceTy &Device) { if (Device.HostDataToTargetMap.empty()) return; @@ -109,6 +109,7 @@ static inline void dumpTargetPointerMappings(const ident_t *Loc, Kernel.getFilename(), Kernel.getLine(), Kernel.getColumn()); INFO(OMP_INFOTYPE_ALL, Device.DeviceID, "%-18s %-18s %s %s %s\n", "Host Ptr", "Target Ptr", "Size (B)", "RefCount", "Declaration"); + Device.DataMapMtx.lock(); for (const auto &HostTargetMap : Device.HostDataToTargetMap) { SourceInfo Info(HostTargetMap.HstPtrName); INFO(OMP_INFOTYPE_ALL, Device.DeviceID, @@ -118,6 +119,7 @@ static inline void dumpTargetPointerMappings(const ident_t *Loc, HostTargetMap.getRefCount(), Info.getName(), Info.getFilename(), Info.getLine(), Info.getColumn()); } + Device.DataMapMtx.unlock(); } //////////////////////////////////////////////////////////////////////////////// From 678c259d277135ef32861887a8ac8618deba5f24 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 3 Feb 2021 14:57:19 -0800 Subject: [PATCH 049/318] PR44325 (and duplicates): don't issue -Wzero-as-null-pointer-constant when rewriting 'a < b' as '(a <=> b) < 0'. It's pretty common for comparison category types to use a pointer or pointer-to-member type as their '0' parameter. (cherry picked from commit 1f06f41993b6363e6b2c4f22a13488a3e687f31b) --- clang/lib/Sema/Sema.cpp | 7 +++++++ .../SemaCXX/cxx2a-three-way-comparison.cpp | 20 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 55cb3aee6194..cb5a84a31235 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -537,6 +537,13 @@ void Sema::diagnoseZeroToNullptrConversion(CastKind Kind, const Expr* E) { if (E->IgnoreParenImpCasts()->getType()->isNullPtrType()) return; + // Don't diagnose the conversion from a 0 literal to a null pointer argument + // in a synthesized call to operator<=>. + if (!CodeSynthesisContexts.empty() && + CodeSynthesisContexts.back().Kind == + CodeSynthesisContext::RewritingOperatorAsSpaceship) + return; + // If it is a macro from system header, and if the macro name is not "NULL", // do not warn. SourceLocation MaybeMacroLoc = E->getBeginLoc(); diff --git a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp index 353360e052bb..b94225274fff 100644 --- a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp +++ b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -verify %s -Wzero-as-null-pointer-constant // Keep this test before any declarations of operator<=>. namespace PR44786 { @@ -40,3 +40,21 @@ namespace PR47893 { int &f(...); int &r = f(A(), A()); } + +namespace PR44325 { + struct cmp_cat {}; + bool operator<(cmp_cat, void*); + bool operator>(cmp_cat, int cmp_cat::*); + + struct X {}; + cmp_cat operator<=>(X, X); + + bool b1 = X() < X(); // no warning + bool b2 = X() > X(); // no warning + + // FIXME: It's not clear whether warning here is useful, but we can't really + // tell that this is a comparison category in general. This is probably OK, + // as comparisons against zero are only really intended for use in the + // implicit rewrite rules, not for explicit use by programs. + bool c = cmp_cat() < 0; // expected-warning {{zero as null pointer constant}} +} From 2a917b70e770e2d25d96f91beebf2a3e52bb9e66 Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Wed, 3 Feb 2021 23:04:12 +0000 Subject: [PATCH 050/318] Extend release notes for AST Matchers changes --- clang/docs/ReleaseNotes.rst | 38 +++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a34cd512ca59..9efd4c01f053 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -250,15 +250,41 @@ release of Clang. Users of the build system should adjust accordingly. AST Matchers ------------ -- The behavior of TK_IgnoreUnlessSpelledInSource with the traverse() matcher - has been changed to no longer match on template instantiations or on +- The ``mapAnyOf()`` matcher was added. This allows convenient matching of + different AST nodes which have a compatible matcher API. For example, + ``mapAnyOf(ifStmt, forStmt).with(hasCondition(integerLiteral()))`` + matches any ``IfStmt`` or ``ForStmt`` with a integer literal as the + condition. + +- The ``binaryOperation()`` matcher allows matching expressions which + appear like binary operators in the code, even if they are really + ``CXXOperatorCallExpr`` for example. It is based on the ``mapAnyOf()`` + matcher functionality. The matcher API for the latter node has been + extended with ``hasLHS()`` etc to facilitate the abstraction. + +- Matcher API for ``CXXRewrittenBinaryOperator`` has been added. In addition + to explicit matching with the ``cxxRewrittenBinaryOperator()`` matcher, the + ``binaryOperation()`` matches on nodes of this type. + +- The behavior of ``TK_IgnoreUnlessSpelledInSource`` with the ``traverse()`` + matcher has been changed to no longer match on template instantiations or on implicit nodes which are not spelled in the source. -- The TK_IgnoreImplicitCastsAndParentheses traversal kind was removed. It - is recommended to use TK_IgnoreUnlessSpelledInSource instead. +- The ``TK_IgnoreImplicitCastsAndParentheses`` traversal kind was removed. It + is recommended to use ``TK_IgnoreUnlessSpelledInSource`` instead. -- The behavior of the forEach() matcher was changed to not internally ignore - implicit and parenthesis nodes. +- The behavior of the ``forEach()`` matcher was changed to not internally + ignore implicit and parenthesis nodes. This makes it consistent with + the ``has()`` matcher. Uses of ``forEach()`` relying on the old behavior + can now use the ``traverse()`` matcher or ``ignoringParenCasts()``. + +- Several AST Matchers have been changed to match based on the active + traversal mode. For example, ``argumentCountIs()`` matches the number of + arguments written in the source, ignoring default arguments represented + by ``CXXDefaultArgExpr`` nodes. + +- Improvements in AST Matchers allow more matching of template declarations, + independent of their template instantations. clang-format ------------ From f5602e0bf31ab590da19fa357980a753dbfd666e Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 07:24:19 -0500 Subject: [PATCH 051/318] [OpenMP] Disabled profiling in `libomp` by default to unblock link errors Link error occurred when time profiling in libomp is enabled by default because `libomp` is assumed to be a C library but the dependence on `libLLVMSupport` for profiling is a C++ library. Currently the issue blocks all OpenMP tests in Phabricator. This patch set a new CMake option `OPENMP_ENABLE_LIBOMP_PROFILING` to enable/disable the feature. By default it is disabled. Note that once time profiling is enabled for `libomp`, it becomes a C++ library. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95585 (cherry picked from commit c571b168349fdf22d1dc8b920bcffa3d5161f0a2) --- openmp/CMakeLists.txt | 6 ++++++ openmp/docs/design/Runtimes.rst | 5 ++++- openmp/runtime/CMakeLists.txt | 6 +++--- openmp/runtime/src/CMakeLists.txt | 12 +++++++++++- openmp/runtime/src/kmp_config.h.cmake | 4 ++-- openmp/runtime/src/kmp_runtime.cpp | 6 +++--- 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 67600bebdafb..4787d4b5a321 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -86,6 +86,12 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) +option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF) + +# Build host runtime library, after LIBOMPTARGET variables are set since they are needed +# to enable time profiling support in the OpenMP runtime. +add_subdirectory(runtime) + if (OPENMP_ENABLE_LIBOMPTARGET) # Check that the library can actually be built. if (APPLE OR WIN32) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 016b88ba324b..ad36e43eccdc 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -48,7 +48,10 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ for time trace output. Using this library is enabled by default when building using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will -be saved to the filename specified by the environment variable. +be saved to the filename specified by the environment variable. For multi-threaded +applications, profiling in ``libomp`` is also needed. Setting the CMake option +``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will +turn ``libomp`` into a C++ library. .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 9fdd04f41646..8828ff8ef455 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -34,7 +34,6 @@ if(${OPENMP_STANDALONE_BUILD}) # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") - set(LIBOMPTARGET_PROFILING_SUPPORT FALSE) else() # Part of LLVM build # Determine the native architecture from LLVM. string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH) @@ -66,10 +65,11 @@ else() # Part of LLVM build libomp_get_architecture(LIBOMP_ARCH) endif () set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS}) - # Time profiling support - set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING}) endif() +# Time profiling support +set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING}) + # FUJITSU A64FX is a special processor because its cache line size is 256. # We need to pass this information into kmp_config.h. if(LIBOMP_ARCH STREQUAL "aarch64") diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 2e927df84f5c..822f9ca2b825 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,6 +50,14 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() +# Building with time profiling support requires LLVM directory includes. +if(LIBOMP_PROFILING_SUPPORT) + include_directories( + ${LLVM_MAIN_INCLUDE_DIR} + ${LLVM_INCLUDE_DIR} + ) +endif() + # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) @@ -135,7 +143,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS) libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS) # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled. -if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING)) +if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING)) add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES}) # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}) @@ -144,6 +152,8 @@ else() LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS} LINK_COMPONENTS Support ) + # libomp must be a C++ library such that it can link libLLVMSupport + set(LIBOMP_LINKER_LANGUAGE CXX) endif() set_target_properties(omp PROPERTIES diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 3d682c690fc7..f6aee7197ee8 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -44,8 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT -#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT +#cmakedefine01 LIBOMP_PROFILING_SUPPORT +#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 4a0634d59cff..a6e32bd008e1 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -32,7 +32,7 @@ #include "ompt-specific.h" #endif -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" static char *ProfileTraceFile = nullptr; #endif @@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { /* ------------------------------------------------------------------------ */ void *__kmp_launch_thread(kmp_info_t *this_thr) { -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity if (ProfileTraceFile) @@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); KMP_MB(); -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT llvm::timeTraceProfilerFinishThread(); #endif return this_thr; From 7d096f9bb350429628c6befce8f94dba4bbc6db9 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Wed, 27 Jan 2021 16:04:11 -0800 Subject: [PATCH 052/318] [CSSPGO] Support of CS profiles in extended binary format. This change brings up support of context-sensitive profiles in the format of extended binary. Existing sample profile reader/writer/merger code is being tweaked to reflect the fact of bracketed input contexts, like (`[...]`). The paired brackets are also needed in extbinary profiles because we don't yet have an otherwise good way to tell calling contexts apart from regular function names since the context delimiter `@` can somehow serve as a part of the C++ mangled names. Reviewed By: wmi, wenlei Differential Revision: https://reviews.llvm.org/D95547 (cherry picked from commit 7e99bddfeaab2713a8bb6ca538da25b66e6efc59) --- llvm/include/llvm/ProfileData/SampleProf.h | 19 ++-- .../llvm/ProfileData/SampleProfReader.h | 4 + llvm/lib/ProfileData/SampleProfReader.cpp | 86 ++++++++++--------- llvm/lib/ProfileData/SampleProfWriter.cpp | 4 +- .../Transforms/IPO/SampleContextTracker.cpp | 2 +- .../SampleProfile/profile-context-tracker.ll | 4 + .../llvm-profdata/Inputs/cs-sample.proftext | 36 ++++++++ .../llvm-profdata/cs-sample-profile.test | 4 + llvm/tools/llvm-profdata/llvm-profdata.cpp | 2 +- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 2 +- 10 files changed, 113 insertions(+), 50 deletions(-) create mode 100644 llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext create mode 100644 llvm/test/tools/llvm-profdata/cs-sample-profile.test diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index c45ace9e68c1..346bc4c81d86 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -439,9 +439,11 @@ class SampleContext { void clearState(ContextStateMask S) { State &= (uint32_t)~S; } bool hasContext() const { return State != UnknownContext; } bool isBaseContext() const { return CallingContext.empty(); } - StringRef getName() const { return Name; } + StringRef getNameWithoutContext() const { return Name; } StringRef getCallingContext() const { return CallingContext; } - StringRef getNameWithContext() const { return FullContext; } + StringRef getNameWithContext(bool WithBracket = false) const { + return WithBracket ? InputContext : FullContext; + } private: // Give a context string, decode and populate internal states like @@ -449,6 +451,7 @@ class SampleContext { // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` void setContext(StringRef ContextStr, ContextStateMask CState) { assert(!ContextStr.empty()); + InputContext = ContextStr; // Note that `[]` wrapped input indicates a full context string, otherwise // it's treated as context-less function name only. bool HasContext = ContextStr.startswith("["); @@ -480,6 +483,9 @@ class SampleContext { } } + // Input context string including bracketed calling context and leaf function + // name + StringRef InputContext; // Full context string including calling context and leaf function name StringRef FullContext; // Function name for the associated sample profile @@ -676,7 +682,8 @@ class FunctionSamples { Name = Other.getName(); if (!GUIDToFuncNameMap) GUIDToFuncNameMap = Other.GUIDToFuncNameMap; - + if (Context.getNameWithContext(true).empty()) + Context = Other.getContext(); if (FunctionHash == 0) { // Set the function hash code for the target profile. FunctionHash = Other.getFunctionHash(); @@ -743,8 +750,10 @@ class FunctionSamples { StringRef getName() const { return Name; } /// Return function name with context. - StringRef getNameWithContext() const { - return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name; + StringRef getNameWithContext(bool WithBracket = false) const { + return FunctionSamples::ProfileIsCS + ? Context.getNameWithContext(WithBracket) + : Name; } /// Return the original function name. diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 3f52a2f6163b..999e75eddffa 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -488,8 +488,12 @@ class SampleProfileReader { /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; + /// Whether function profiles are context-sensitive. bool ProfileIsCS = false; + /// Number of context-sensitive profiles. + uint32_t CSProfileCount = 0; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; }; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index c42931174bc0..c9f41687c356 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -222,8 +222,6 @@ std::error_code SampleProfileReaderText::readImpl() { sampleprof_error Result = sampleprof_error::success; InlineCallStack InlineStack; - int CSProfileCount = 0; - int RegularProfileCount = 0; uint32_t ProbeProfileCount = 0; // SeenMetadata tracks whether we have processed metadata for the current @@ -257,11 +255,9 @@ std::error_code SampleProfileReaderText::readImpl() { SampleContext FContext(FName); if (FContext.hasContext()) ++CSProfileCount; - else - ++RegularProfileCount; Profiles[FContext] = FunctionSamples(); FunctionSamples &FProfile = Profiles[FContext]; - FProfile.setName(FContext.getName()); + FProfile.setName(FContext.getNameWithoutContext()); FProfile.setContext(FContext); MergeResult(Result, FProfile.addTotalSamples(NumSamples)); MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); @@ -324,13 +320,14 @@ std::error_code SampleProfileReaderText::readImpl() { } } - assert((RegularProfileCount == 0 || CSProfileCount == 0) && + assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); ProfileIsCS = (CSProfileCount > 0); assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && "Cannot have both probe-based profiles and regular profiles"); ProfileIsProbeBased = (ProbeProfileCount > 0); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; + FunctionSamples::ProfileIsCS = ProfileIsCS; if (Result == sampleprof_error::success) computeSummary(); @@ -546,12 +543,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { if (std::error_code EC = FName.getError()) return EC; - Profiles[*FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[*FName]; - FProfile.setName(*FName); - + SampleContext FContext(*FName); + Profiles[FContext] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FContext]; + FProfile.setName(FContext.getNameWithoutContext()); + FProfile.setContext(FContext); FProfile.addHeadSamples(*NumHeadSamples); + if (FContext.hasContext()) + CSProfileCount++; + if (std::error_code EC = readProfile(FProfile)) return EC; return sampleprof_error::success; @@ -654,40 +655,44 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { return EC; } assert(Data == End && "More data is read than expected"); - return sampleprof_error::success; - } - - if (Remapper) { - for (auto Name : FuncsToUse) { - Remapper->insert(Name); + } else { + if (Remapper) { + for (auto Name : FuncsToUse) { + Remapper->insert(Name); + } } - } - if (useMD5()) { - for (auto Name : FuncsToUse) { - auto GUID = std::to_string(MD5Hash(Name)); - auto iter = FuncOffsetTable.find(StringRef(GUID)); - if (iter == FuncOffsetTable.end()) - continue; - const uint8_t *FuncProfileAddr = Start + iter->second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } else { - for (auto NameOffset : FuncOffsetTable) { - auto FuncName = NameOffset.first; - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) - continue; - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; + if (useMD5()) { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + } else { + for (auto NameOffset : FuncOffsetTable) { + SampleContext FContext(NameOffset.first); + auto FuncName = FContext.getNameWithoutContext(); + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) + continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } } + Data = End; } - Data = End; + assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && + "Cannot have both context-sensitive and regular profile"); + ProfileIsCS = (CSProfileCount > 0); + FunctionSamples::ProfileIsCS = ProfileIsCS; return sampleprof_error::success; } @@ -887,7 +892,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { if (std::error_code EC = Checksum.getError()) return EC; - Profiles[*FName].setFunctionHash(*Checksum); + SampleContext FContext(*FName); + Profiles[FContext].setFunctionHash(*Checksum); } return sampleprof_error::success; } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 71dba6281f76..d3bc05e06fdf 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -147,7 +147,7 @@ std::error_code SampleProfileWriterExtBinaryBase::write( std::error_code SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) { uint64_t Offset = OutputStream->tell(); - StringRef Name = S.getName(); + StringRef Name = S.getNameWithContext(true); FuncOffsetTable[Name] = Offset - SecLBRProfileStart; encodeULEB128(S.getHeadSamples(), *OutputStream); return writeBody(S); @@ -635,7 +635,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() { std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { auto &OS = *OutputStream; - if (std::error_code EC = writeNameIdx(S.getName())) + if (std::error_code EC = writeNameIdx(S.getNameWithContext(true))) return EC; encodeULEB128(S.getTotalSamples(), OS); diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 37fc27e91100..660d79de667c 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -179,7 +179,7 @@ SampleContextTracker::SampleContextTracker( SampleContext Context(FuncSample.first(), RawContext); LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n"); if (!Context.isBaseContext()) - FuncToCtxtProfileSet[Context.getName()].insert(FSamples); + FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll index ed32c2a0027b..adda7022047d 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll @@ -1,18 +1,22 @@ ; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly ; based on inline decision, so post inline counts are accurate. +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t + ; Note that we need new pass manager to enable top-down processing for sample profile loader ; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; main:3 @ _Z5funcAi ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL ; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; main:3 @ _Z5funcAi ; _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT @factor = dso_local global i32 3, align 4, !dbg !0 diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext new file mode 100644 index 000000000000..eead4d4d62f0 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext @@ -0,0 +1,36 @@ +[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11 + 0: 6 + 1: 6 + 3: 287884 + 4: 287864 _Z3fibi:315608 + 15: 23 +[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 + 0: 15 + 1: 15 + 3: 74946 + 4: 74941 _Z3fibi:82359 + 10: 23324 + 11: 23327 _Z3fibi:25228 + 15: 11 +[main]:154:0 + 2: 12 + 3: 18 _Z5funcAi:11 + 3.1: 18 _Z5funcBi:19 +[external:12 @ main]:154:12 + 2: 12 + 3: 10 _Z5funcAi:7 + 3.1: 10 _Z5funcBi:11 +[main:3.1 @ _Z5funcBi]:120:19 + 0: 19 + 1: 19 _Z8funcLeafi:20 + 3: 12 +[externalA:17 @ _Z5funcBi]:120:3 + 0: 3 + 1: 3 +[external:10 @ _Z5funcBi]:120:10 + 0: 10 + 1: 10 +[main:3 @ _Z5funcAi]:99:11 + 0: 10 + 1: 10 _Z8funcLeafi:11 + 3: 24 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-profile.test new file mode 100644 index 000000000000..04c573ddece3 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/cs-sample-profile.test @@ -0,0 +1,4 @@ +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext +RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext +RUN: llvm-profdata merge --sample --extbinary %p/Inputs/cs-sample.proftext -o %t.prof && llvm-profdata merge --sample --text %t.prof -o %t1.proftext +RUN: diff -b %t1.proftext %S/Inputs/cs-sample.proftext diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 8dc43924c067..7e53c30c7579 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -696,7 +696,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, Remapper ? remapSamples(I->second, *Remapper, Result) : FunctionSamples(); FunctionSamples &Samples = Remapper ? Remapped : I->second; - StringRef FName = Samples.getName(); + StringRef FName = Samples.getNameWithContext(true); MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight)); if (Result != sampleprof_error::success) { std::error_code EC = make_error_code(Result); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 265beccb84a8..7624fd3f2808 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -164,7 +164,7 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr) { if (Ret.second) { SampleContext FContext(Ret.first->first(), RawContext); FunctionSamples &FProfile = Ret.first->second; - FProfile.setName(FContext.getName()); + FProfile.setName(FContext.getNameWithoutContext()); FProfile.setContext(FContext); } return Ret.first->second; From f2cabaac9525ba4b86301136e21ec9aad6aaf326 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Sun, 31 Jan 2021 22:31:51 -0800 Subject: [PATCH 053/318] [CSSPGO] Tweaking inlining with pseudo probes. Fixing up a couple places where `getCallSiteIdentifier` is needed to support pseudo-probe-based callsites. Also fixing an issue in the extbinary profile reader where the metadata section is not fully scanned based on the number of profiles loaded only for the current module. Reviewed By: wmi, wenlei Differential Revision: https://reviews.llvm.org/D95791 (cherry picked from commit 224fee8219bb3aed34f13ce40935e1b3ede90a0f) --- llvm/lib/ProfileData/SampleProfReader.cpp | 9 +- .../Transforms/IPO/SampleContextTracker.cpp | 11 +- .../Inputs/pseudo-probe-inline.prof | 18 ++ .../SampleProfile/pseudo-probe-inline.ll | 175 ++++++++++++++++++ 4 files changed, 204 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index c9f41687c356..370ffc8e2885 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -883,7 +883,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { if (!ProfileIsProbeBased) return sampleprof_error::success; - for (unsigned I = 0; I < Profiles.size(); ++I) { + while (Data < End) { auto FName(readStringFromTable()); if (std::error_code EC = FName.getError()) return EC; @@ -893,8 +893,13 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { return EC; SampleContext FContext(*FName); - Profiles[FContext].setFunctionHash(*Checksum); + // No need to load metadata for profiles that are not loaded in the current + // module. + if (Profiles.count(FContext)) + Profiles[FContext].setFunctionHash(*Checksum); } + + assert(Data == End && "More data is read than expected"); return sampleprof_error::success; } diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 660d79de667c..fad72985dedd 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -308,8 +308,7 @@ void SampleContextTracker::promoteMergeContextSamplesTree( return; // Get the context that needs to be promoted - LineLocation CallSite(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()); + LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); ContextTrieNode *NodeToPromo = CallerNode->getChildContext(CallSite, CalleeName); if (!NodeToPromo) @@ -370,9 +369,7 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL, return nullptr; return CallContext->getChildContext( - LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()), - CalleeName); + FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); } ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { @@ -386,8 +383,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { if (Name.empty()) Name = PrevDIL->getScope()->getSubprogram()->getName(); S.push_back( - std::make_pair(LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()), Name)); + std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); PrevDIL = DIL; } diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof new file mode 100644 index 000000000000..fd3ff773e85d --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof @@ -0,0 +1,18 @@ +[foo]:23:23 + 1: 23 + 2: 23 zen:23 + !CFGChecksum: 281479271677951 +[foo:2 @ zen]:765858:23 + 1: 23 + 2: 382920 + 3: 382915 + !CFGChecksum: 138828622701 +[bar]:23:23 + 1: 23 + 2: 23 zen:23 + !CFGChecksum: 281479271677951 +[bar:2 @ zen]:765858:23 + 1: 23 + 2: 382920 + 3: 382915 + !CFGChecksum: 138828622701 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll new file mode 100644 index 000000000000..a5033a0dc190 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -0,0 +1,175 @@ +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s +; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml + +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-inline.prof -o %t2 +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s +; RUN: FileCheck %s -check-prefix=YAML < %t2.opt.yaml + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@factor = dso_local global i32 3, align 4 + +define dso_local i32 @foo(i32 %x) #0 !dbg !12 { +entry: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0) + %add = add nsw i32 %x, 100000, !dbg !19 +;; Check zen is fully inlined so there's no call to zen anymore. +;; Check code from the inlining of zen is properly annotated here. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK-NOT: call i32 @zen + %call = call i32 @zen(i32 %add), !dbg !20 + ret i32 %call, !dbg !21 +} + +; CHECK: define dso_local i32 @zen +define dso_local i32 @zen(i32 %x) #0 !dbg !22 { +entry: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0) + %cmp = icmp sgt i32 %x, 0, !dbg !26 + br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28 + +while.cond: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) + %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ] + %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29 + br i1 %cmp1, label %while.body, label %if.end, !dbg !31 + +while.body: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) + %0 = load volatile i32, i32* @factor, align 4, !dbg !32 + %sub = sub nsw i32 %x.addr.0, %0, !dbg !39 + br label %while.cond, !dbg !31 + +while.cond2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) + %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ] + %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !44 + +while.body4: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) + %1 = load volatile i32, i32* @factor, align 4, !dbg !45 + %add = add nsw i32 %x.addr.1, %1, !dbg !48 + br label %while.cond2, !dbg !44 + +if.end: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) + %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ] + ret i32 %x.addr.2, !dbg !51 +} + +; CHECK: !llvm.pseudo_probe_desc = !{![[#DESC0:]], ![[#DESC1:]]} +; CHECK: ![[#DESC0]] = !{i64 [[#GUID1]], i64 [[#HASH1:]], !"foo"} +; CHECK: ![[#DESC1]] = !{i64 [[#GUID2]], i64 [[#HASH2:]], !"zen"} +; CHECK: ![[PD1]] = !{!"branch_weights", i32 25, i32 1} +; CHECK: ![[PD2]] = !{!"branch_weights", i32 382916, i32 25} + +; Checking to see if YAML file is generated and contains remarks +;YAML: --- !Passed +;YAML-NEXT: Pass: sample-profile-inline +;YAML-NEXT: Name: Inlined +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 10, Column: 11 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - Callee: zen +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 38, Column: 0 } +;YAML-NEXT: - String: ' inlined into ' +;YAML-NEXT: - Caller: foo +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 9, Column: 0 } +;YAML-NEXT: - String: ' to match profiling context' +;YAML-NEXT: - String: ' with ' +;YAML-NEXT: - String: '(cost=' +;YAML-NEXT: - Cost: '15' +;YAML-NEXT: - String: ', threshold=' +;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - String: ')' +;YAML-NEXT: - String: ' at callsite ' +;YAML-NEXT: - String: foo +;YAML-NEXT: - String: ':' +;YAML-NEXT: - Line: '1' +;YAML-NEXT: - String: ':' +;YAML-NEXT: - Column: '11' +;YAML-NEXT: - String: ';' +;YAML-NEXT: ... +;YAML: --- !Analysis +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: AppliedSamples +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 10, Column: 22 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'Applied ' +;YAML-NEXT: - NumSamples: '23' +;YAML-NEXT: - String: ' samples from profile (ProbeId=' +;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ')' +;YAML-NEXT: ... +;YAML: --- !Analysis +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: AppliedSamples +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 39, Column: 9 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'Applied ' +;YAML-NEXT: - NumSamples: '23' +;YAML-NEXT: - String: ' samples from profile (ProbeId=' +;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ')' +;YAML-NEXT: ... +;YAML: --- !Analysis +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: AppliedSamples +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 41, Column: 14 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'Applied ' +;YAML-NEXT: - NumSamples: '382920' +;YAML-NEXT: - String: ' samples from profile (ProbeId=' +;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ')' +;YAML-NEXT: ... + +attributes #0 = {"use-sample-profile"} + +!llvm.module.flags = !{!8, !9} + +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3) +!3 = !DIFile(filename: "test.cpp", directory: "test") +!4 = !{} +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{i32 7, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!12 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 9, type: !13, scopeLine: 9, unit: !2) +!13 = !DISubroutineType(types: !14) +!14 = !{!7, !7} +!18 = !DILocation(line: 0, scope: !12) +!19 = !DILocation(line: 10, column: 22, scope: !12) +!20 = !DILocation(line: 10, column: 11, scope: !12) +!21 = !DILocation(line: 12, column: 3, scope: !12) +!22 = distinct !DISubprogram(name: "zen", scope: !3, file: !3, line: 37, type: !13, scopeLine: 38, unit: !2) +!25 = !DILocation(line: 0, scope: !22) +!26 = !DILocation(line: 39, column: 9, scope: !27) +!27 = distinct !DILexicalBlock(scope: !22, file: !3, line: 39, column: 7) +!28 = !DILocation(line: 39, column: 7, scope: !22) +!29 = !DILocation(line: 41, column: 14, scope: !30) +!30 = distinct !DILexicalBlock(scope: !27, file: !3, line: 39, column: 14) +!31 = !DILocation(line: 41, column: 5, scope: !30) +!32 = !DILocation(line: 42, column: 16, scope: !33) +!33 = distinct !DILexicalBlock(scope: !30, file: !3, line: 41, column: 19) +!38 = !DILocation(line: 42, column: 12, scope: !33) +!39 = !DILocation(line: 42, column: 9, scope: !33) +!42 = !DILocation(line: 48, column: 14, scope: !43) +!43 = distinct !DILexicalBlock(scope: !27, file: !3, line: 46, column: 8) +!44 = !DILocation(line: 48, column: 5, scope: !43) +!45 = !DILocation(line: 49, column: 16, scope: !46) +!46 = distinct !DILexicalBlock(scope: !43, file: !3, line: 48, column: 19) +!47 = !DILocation(line: 49, column: 12, scope: !46) +!48 = !DILocation(line: 49, column: 9, scope: !46) +!51 = !DILocation(line: 53, column: 3, scope: !22) From b9fa16f2234edddf6e0f449a0e7b646ee9046cf3 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Fri, 22 Jan 2021 15:52:46 -0800 Subject: [PATCH 054/318] [CSSPGO] Passing the clang driver switch -fpseudo-probe-for-profiling to the linker. As titled. Reviewed By: wmi, wenlei Differential Revision: https://reviews.llvm.org/D95271 (cherry picked from commit d3e2e3740d0730cb6788c771bb01a8f3e935bf2e) --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 +++++ clang/test/Driver/pseudo-probe-lto.c | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/pseudo-probe-lto.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 42c5319041d0..1f6c13d5cc96 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1147,7 +1147,7 @@ def fprofile_update_EQ : Joined<["-"], "fprofile-update=">, defm pseudo_probe_for_profiling : BoolFOption<"pseudo-probe-for-profiling", CodeGenOpts<"PseudoProbeForProfiling">, DefaultFalse, PosFlag, NegFlag, - BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiler">>; + BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiling">>; def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Generate instrumented code to collect order file into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 6a95aa5ec628..bcaea71dca94 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -605,6 +605,11 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back("-plugin-opt=new-pass-manager"); } + // Pass an option to enable pseudo probe emission. + if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling, + options::OPT_fno_pseudo_probe_for_profiling, false)) + CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling"); + // Setup statistics file output. SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D); if (!StatsFile.empty()) diff --git a/clang/test/Driver/pseudo-probe-lto.c b/clang/test/Driver/pseudo-probe-lto.c new file mode 100644 index 000000000000..e319b8c0098b --- /dev/null +++ b/clang/test/Driver/pseudo-probe-lto.c @@ -0,0 +1,10 @@ +// RUN: touch %t.o +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto=thin -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fno-pseudo-probe-for-profiling -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 2>&1 | FileCheck %s --check-prefix=NOPROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fno-pseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=NOPROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fpseudo-probe-for-profiling -fno-pseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=NOPROBE + +// PROBE: -plugin-opt=pseudo-probe-for-profiling +// NOPROBE-NOT: -plugin-opt=pseudo-probe-for-profiling From 27ff658e97528540e4425c0cb6400f3e5355f53a Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Sun, 3 Jan 2021 16:43:06 -0800 Subject: [PATCH 055/318] [CSSPGO] Call site prioritized inlining for sample PGO This change implemented call site prioritized BFS profile guided inlining for sample profile loader. The new inlining strategy maximize the benefit of context-sensitive profile as mentioned in the follow up discussion of CSSPGO RFC. The change will not affect today's AutoFDO as it's opt-in. CSSPGO now defaults to the new FDO inliner, but can fall back to today's replay inliner using a switch (`-sample-profile-prioritized-inline=0`). Motivation With baseline AutoFDO, the inliner in sample profile loader only replays previous inlining, and the use of profile is only for pruning previous inlining that turned out to be cold. Due to the nature of replay, the FDO inliner is simple with hotness being the only decision factor. It has the following limitations that we're improving now for CSSPGO. - It doesn't take inline candidate size into account. Since it's doing replay, the size growth is bounded by previous CGSCC inlining. With context-sensitive profile, FDO inliner is no longer limited by previous inlining, so we need to take size into account to avoid significant size bloat. - The way it looks at hotness is not accurate. It uses total samples in an inlinee as proxy for hotness, while what really matters for an inline decision is the call site count. This is an unfortunate fall back because call site count and callee entry count are not reliable due to dwarf based correlation, especially for inlinees. Now paired with pseudo-probe, we have accurate call site count and callee's entry count, so we can use that to gauge hotness more accurately. - It treats all call sites from a block as hot as long as there's one call site considered hot. This is normally true, but since total samples is used as hotness proxy, this transitiveness within block magnifies the inacurate hotness heuristic. With pseduo-probe and the change above, this is no longer an issue for CSSPGO. New FDO Inliner Putting all the requirement for CSSPGO together, we need a top-down call site prioritized BFS inliner. Here're reasons why each component is needed. - Top-down: We need a top-down inliner to better leverage context-sensitive profile, so inlining is driven by accurate context profile, and post-inline is also accurate. This is already implemented in https://reviews.llvm.org/D70655. - Size Cap: For top-down inliner, taking function size into account for inline decision alone isn't sufficient to control size growth. We also need to explicitly cap size growth because with top-down inlining, we can grow inliner size significantly with large number of smaller inlinees even if each individually passes the cost/size check. - Prioritize call sites: With size cap, inlining order also becomes important, because if we stop inlining due to size budget limit, we'd want to use budget towards the most beneficial call sites. - BFS inline: Same as call site prioritization, if we stop inlining due to size budget limit, we want a balanced inline tree, rather than going deep on one call path. Note that the new inliner avoids repeatedly evaluating same set of call site, so it should help with compile time too. For this reason, we could transition today's FDO inliner to use a queue with equal priority to avoid wasted reevaluation of same call site (TODO). Speculative indirect call promotion and inlining is also supported now with CSSPGO just like baseline AutoFDO. Tunings and knobs I created tuning knobs for size growth/cap control, and for hot threshold separate from CGSCC inliner. The default values are selected based on initial tuning with CSSPGO. Results Evaluated with an internal LLVM fork couple months ago, plus another change to adjust hot-threshold cutoff for context profile (will send up after this one), the new inliner show ~1% geomean perf win on spec2006 with CSSPGO, while reducing code size too. The measurement was done using train-train setup, MonoLTO w/ new pass manager and pseudo-probe. Note that this is just a starting point - we hope that the new inliner will open up more opportunity with CSSPGO, but it will certainly take more time and effort to make it fully calibrated and ready for bigger workloads (we're working on it). Differential Revision: https://reviews.llvm.org/D94001 (cherry picked from commit 6bae5973c476e16dbbc82030d65c7859a6628e89) --- .../Transforms/IPO/SampleContextTracker.h | 6 +- .../Transforms/IPO/SampleContextTracker.cpp | 73 ++- llvm/lib/Transforms/IPO/SampleProfile.cpp | 443 ++++++++++++++++-- .../Inputs/indirect-call-csspgo.prof | 10 + .../SampleProfile/csspgo-inline-debug.ll | 166 +++++++ .../SampleProfile/csspgo-inline-icall.ll | 63 +++ .../Transforms/SampleProfile/csspgo-inline.ll | 180 +++++++ .../profile-context-tracker-debug.ll | 25 +- .../SampleProfile/profile-context-tracker.ll | 15 +- .../SampleProfile/pseudo-probe-inline.ll | 4 +- 10 files changed, 904 insertions(+), 81 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline.ll diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h index 5b2600144fa3..526e141838c4 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -23,6 +23,7 @@ #include "llvm/ProfileData/SampleProf.h" #include #include +#include using namespace llvm; using namespace sampleprof; @@ -42,7 +43,7 @@ class ContextTrieNode { CallSiteLoc(CallLoc){}; ContextTrieNode *getChildContext(const LineLocation &CallSite, StringRef CalleeName); - ContextTrieNode *getChildContext(const LineLocation &CallSite); + ContextTrieNode *getHottestChildContext(const LineLocation &CallSite); ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate = true); @@ -94,6 +95,9 @@ class SampleContextTracker { // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, StringRef CalleeName); + // Get samples for indirect call targets for call site at given location. + std::vector + getIndirectCalleeContextSamplesFor(const DILocation *DIL); // Query context profile for a given location. The full context // is identified by input DILocation. FunctionSamples *getContextSamplesFor(const DILocation *DIL); diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index fad72985dedd..41d7f363e1a4 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -30,7 +30,7 @@ namespace llvm { ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, StringRef CalleeName) { if (CalleeName.empty()) - return getChildContext(CallSite); + return getHottestChildContext(CallSite); uint32_t Hash = nodeHash(CalleeName, CallSite); auto It = AllChildContext.find(Hash); @@ -40,18 +40,22 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, } ContextTrieNode * -ContextTrieNode::getChildContext(const LineLocation &CallSite) { +ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) { // CSFDO-TODO: This could be slow, change AllChildContext so we can // do point look up for child node by call site alone. - // CSFDO-TODO: Return the child with max count for indirect call + // Retrieve the child node with max count for indirect call ContextTrieNode *ChildNodeRet = nullptr; + uint64_t MaxCalleeSamples = 0; for (auto &It : AllChildContext) { ContextTrieNode &ChildNode = It.second; - if (ChildNode.CallSiteLoc == CallSite) { - if (ChildNodeRet) - return nullptr; - else - ChildNodeRet = &ChildNode; + if (ChildNode.CallSiteLoc != CallSite) + continue; + FunctionSamples *Samples = ChildNode.getFunctionSamples(); + if (!Samples) + continue; + if (Samples->getTotalSamples() > MaxCalleeSamples) { + ChildNodeRet = &ChildNode; + MaxCalleeSamples = Samples->getTotalSamples(); } } @@ -191,12 +195,12 @@ FunctionSamples * SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, StringRef CalleeName) { LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n"); - // CSFDO-TODO: We use CalleeName to differentiate indirect call - // We need to get sample for indirect callee too. DILocation *DIL = Inst.getDebugLoc(); if (!DIL) return nullptr; + // For indirect call, CalleeName will be empty, in which case the context + // profile for callee with largest total samples will be returned. ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName); if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); @@ -209,6 +213,26 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, return nullptr; } +std::vector +SampleContextTracker::getIndirectCalleeContextSamplesFor( + const DILocation *DIL) { + std::vector R; + if (!DIL) + return R; + + ContextTrieNode *CallerNode = getContextFor(DIL); + LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); + for (auto &It : CallerNode->getAllChildContext()) { + ContextTrieNode &ChildNode = It.second; + if (ChildNode.getCallSiteLoc() != CallSite) + continue; + if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples()) + R.push_back(CalleeSamples); + } + + return R; +} + FunctionSamples * SampleContextTracker::getContextSamplesFor(const DILocation *DIL) { assert(DIL && "Expect non-null location"); @@ -295,11 +319,6 @@ void SampleContextTracker::promoteMergeContextSamplesTree( const Instruction &Inst, StringRef CalleeName) { LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n" << Inst << "\n"); - // CSFDO-TODO: We also need to promote context profile from indirect - // calls. We won't have callee names from those from call instr. - if (CalleeName.empty()) - return; - // Get the caller context for the call instruction, we don't use callee // name from call because there can be context from indirect calls too. DILocation *DIL = Inst.getDebugLoc(); @@ -309,6 +328,22 @@ void SampleContextTracker::promoteMergeContextSamplesTree( // Get the context that needs to be promoted LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); + // For indirect call, CalleeName will be empty, in which case we need to + // promote all non-inlined child context profiles. + if (CalleeName.empty()) { + for (auto &It : CallerNode->getAllChildContext()) { + ContextTrieNode *NodeToPromo = &It.second; + if (CallSite != NodeToPromo->getCallSiteLoc()) + continue; + FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples(); + if (FromSamples && FromSamples->getContext().hasState(InlinedContext)) + continue; + promoteMergeContextSamplesTree(*NodeToPromo); + } + return; + } + + // Get the context for the given callee that needs to be promoted ContextTrieNode *NodeToPromo = CallerNode->getChildContext(CallSite, CalleeName); if (!NodeToPromo) @@ -328,6 +363,8 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( LLVM_DEBUG(dbgs() << " Found context tree root to promote: " << FromSamples->getContext() << "\n"); + assert(!FromSamples->getContext().hasState(InlinedContext) && + "Shouldn't promote inlined context profile"); StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext(); return promoteMergeContextSamplesTree(NodeToPromo, RootContext, ContextStrToRemove); @@ -360,14 +397,12 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL, StringRef CalleeName) { assert(DIL && "Expect non-null location"); - // CSSPGO-TODO: need to support indirect callee - if (CalleeName.empty()) - return nullptr; - ContextTrieNode *CallContext = getContextFor(DIL); if (!CallContext) return nullptr; + // When CalleeName is empty, the child context profile with max + // total samples will be returned. return CallContext->getChildContext( FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 264ac4065e8c..665c4078f3ee 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -108,6 +109,14 @@ STATISTIC(NumMismatchedProfile, "Number of functions with CFG mismatched profile"); STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile"); +STATISTIC(NumCSInlinedHitMinLimit, + "Number of functions with FDO inline stopped due to min size limit"); +STATISTIC(NumCSInlinedHitMaxLimit, + "Number of functions with FDO inline stopped due to max size limit"); +STATISTIC( + NumCSInlinedHitGrowthLimit, + "Number of functions with FDO inline stopped due to growth size limit"); + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt SampleProfileFile( @@ -171,6 +180,38 @@ static cl::opt ProfileSizeInline( cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size.")); +static cl::opt ProfileInlineGrowthLimit( + "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), + cl::desc("The size growth ratio limit for proirity-based sample profile " + "loader inlining.")); + +static cl::opt ProfileInlineLimitMin( + "sample-profile-inline-limit-min", cl::Hidden, cl::init(100), + cl::desc("The lower bound of size growth limit for " + "proirity-based sample profile loader inlining.")); + +static cl::opt ProfileInlineLimitMax( + "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), + cl::desc("The upper bound of size growth limit for " + "proirity-based sample profile loader inlining.")); + +static cl::opt ProfileICPThreshold( + "sample-profile-icp-threshold", cl::Hidden, cl::init(5), + cl::desc( + "Relative hotness threshold for indirect " + "call promotion in proirity-based sample profile loader inlining.")); + +static cl::opt SampleHotCallSiteThreshold( + "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), + cl::desc("Hot callsite threshold for proirity-based sample profile loader " + "inlining.")); + +static cl::opt CallsitePrioritizedInline( + "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Use call site prioritized inlining for sample profile loader." + "Currently only CSSPGO is supported.")); + static cl::opt SampleColdCallSiteThreshold( "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites")); @@ -313,6 +354,31 @@ class GUIDToFuncNameMapper { DenseMap &CurrentGUIDToFuncNameMap; }; +// Inline candidate used by iterative callsite prioritized inliner +struct InlineCandidate { + CallBase *CallInstr; + const FunctionSamples *CalleeSamples; + uint64_t CallsiteCount; +}; + +// Inline candidate comparer using call site weight +struct CandidateComparer { + bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) { + if (LHS.CallsiteCount != RHS.CallsiteCount) + return LHS.CallsiteCount < RHS.CallsiteCount; + + // Tie breaker using GUID so we have stable/deterministic inlining order + assert(LHS.CalleeSamples && RHS.CalleeSamples && + "Expect non-null FunctionSamples"); + return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) < + RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName()); + } +}; + +using CandidateQueue = + PriorityQueue, + CandidateComparer>; + /// Sample profile pass. /// /// This pass reads profile data from the file specified by @@ -350,9 +416,23 @@ class SampleProfileLoader { findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineCallInstruction(CallBase &CB); + CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName, + uint64_t &Sum, uint64_t Count, CallBase *I, + const char *&Reason); + bool inlineCallInstruction(CallBase &CB, + const FunctionSamples *CalleeSamples); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); + // Helper functions call-site prioritized BFS inliner + // Will change the main FDO inliner to be work list based directly in + // upstream, then merge this change with that and remove the duplication. + InlineCost shouldInlineCandidate(InlineCandidate &Candidate); + bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); + bool tryInlineCandidate(InlineCandidate &Candidate, + SmallVector &InlinedCallSites); + bool + inlineHotFunctionsWithPriority(Function &F, + DenseSet &InlinedGUIDs); // Inline cold/small functions in addition to hot ones bool shouldInlineColdCallee(CallBase &CallInst); void emitOptimizationRemarksForInlineCandidates( @@ -918,6 +998,31 @@ SampleProfileLoader::findIndirectCallFunctionSamples( return R; } + auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { + assert(L && R && "Expect non-null FunctionSamples"); + if (L->getEntrySamples() != R->getEntrySamples()) + return L->getEntrySamples() > R->getEntrySamples(); + return FunctionSamples::getGUID(L->getName()) < + FunctionSamples::getGUID(R->getName()); + }; + + if (ProfileIsCS) { + auto CalleeSamples = + ContextTracker->getIndirectCalleeContextSamplesFor(DIL); + if (CalleeSamples.empty()) + return R; + + // For CSSPGO, we only use target context profile's entry count + // as that already includes both inlined callee and non-inlined ones.. + Sum = 0; + for (const auto *const FS : CalleeSamples) { + Sum += FS->getEntrySamples(); + R.push_back(FS); + } + llvm::sort(R, FSCompare); + return R; + } + const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return R; @@ -935,12 +1040,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( Sum += NameFS.second.getEntrySamples(); R.push_back(&NameFS.second); } - llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) { - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); - return FunctionSamples::getGUID(L->getName()) < - FunctionSamples::getGUID(R->getName()); - }); + llvm::sort(R, FSCompare); } return R; } @@ -977,7 +1077,32 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return it.first->second; } -bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { +CallBase * +SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName, + uint64_t &Sum, uint64_t Count, + CallBase *I, const char *&Reason) { + Reason = "Callee function not available"; + // R->getValue() != &F is to prevent promoting a recursive call. + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will + // simply ignore it instead of handling it explicitly. + auto R = SymbolMap.find(CalleeName); + if (R != SymbolMap.end() && R->getValue() && + !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && + R->getValue()->hasFnAttribute("use-sample-profile") && + R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) { + auto *DI = + &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE); + Sum -= Count; + return DI; + } + return nullptr; +} + +bool SampleProfileLoader::inlineCallInstruction( + CallBase &CB, const FunctionSamples *CalleeSamples) { if (ExternalInlineAdvisor) { auto Advice = ExternalInlineAdvisor->getAdvice(CB); if (!Advice->isInliningRecommended()) { @@ -1012,6 +1137,9 @@ bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { // The call to InlineFunction erases I, so we can't pass it here. emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, true, CSINLINE_DEBUG); + if (ProfileIsCS) + ContextTracker->markContextSamplesInlined(CalleeSamples); + ++NumCSInlined; return true; } return false; @@ -1129,34 +1257,17 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI)) continue; - const char *Reason = "Callee function not available"; - // R->getValue() != &F is to prevent promoting a recursive call. - // If it is a recursive call, we do not inline it as it could bloat - // the code exponentially. There is way to better handle this, e.g. - // clone the caller first, and inline the cloned caller if it is - // recursive. As llvm does not inline recursive calls, we will - // simply ignore it instead of handling it explicitly. + const char *Reason = nullptr; auto CalleeFunctionName = FS->getFuncName(); - auto R = SymbolMap.find(CalleeFunctionName); - if (R != SymbolMap.end() && R->getValue() && - !R->getValue()->isDeclaration() && - R->getValue()->getSubprogram() && - R->getValue()->hasFnAttribute("use-sample-profile") && - R->getValue() != &F && - isLegalToPromote(*I, R->getValue(), &Reason)) { - uint64_t C = FS->getEntrySamples(); - auto &DI = - pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE); - Sum -= C; + if (CallBase *DI = + tryPromoteIndirectCall(F, CalleeFunctionName, Sum, + FS->getEntrySamples(), I, Reason)) { PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && - inlineCallInstruction(cast(DI))) { - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined(FS); + inlineCallInstruction(cast(*DI), FS)) { localNotInlinedCallSites.erase(I); LocalChanged = true; - ++NumCSInlined; } } else { LLVM_DEBUG(dbgs() @@ -1166,13 +1277,11 @@ bool SampleProfileLoader::inlineHotFunctions( } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(*I)) { - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined( - localNotInlinedCallSites[I]); + if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I) + ? localNotInlinedCallSites[I] + : nullptr)) { localNotInlinedCallSites.erase(I); LocalChanged = true; - ++NumCSInlined; } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { findCalleeFunctionSamples(*I)->findInlinedFunctions( @@ -1186,6 +1295,11 @@ bool SampleProfileLoader::inlineHotFunctions( } } + // For CS profile, profile for not inlined context will be merged when + // base profile is being trieved + if (ProfileIsCS) + return Changed; + // Accumulate not inlined callsite information into notInlinedSamples for (const auto &Pair : localNotInlinedCallSites) { CallBase *I = Pair.getFirst(); @@ -1232,6 +1346,254 @@ bool SampleProfileLoader::inlineHotFunctions( return Changed; } +bool SampleProfileLoader::tryInlineCandidate( + InlineCandidate &Candidate, SmallVector &InlinedCallSites) { + + CallBase &CB = *Candidate.CallInstr; + Function *CalledFunction = CB.getCalledFunction(); + assert(CalledFunction && "Expect a callee with definition"); + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *BB = CB.getParent(); + + InlineCost Cost = shouldInlineCandidate(Candidate); + if (Cost.isNever()) { + ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) + << "incompatible inlining"); + return false; + } + + if (!Cost) + return false; + + InlineFunctionInfo IFI(nullptr, GetAC); + if (InlineFunction(CB, IFI).isSuccess()) { + // The call to InlineFunction erases I, so we can't pass it here. + emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, + true, CSINLINE_DEBUG); + + // Now populate the list of newly exposed call sites. + InlinedCallSites.clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites.push_back(I); + + if (ProfileIsCS) + ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); + ++NumCSInlined; + return true; + } + return false; +} + +bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, + CallBase *CB) { + assert(CB && "Expect non-null call instruction"); + + if (isa(CB)) + return false; + + // Find the callee's profile. For indirect call, find hottest target profile. + const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB); + if (!CalleeSamples) + return false; + + uint64_t CallsiteCount = 0; + ErrorOr Weight = getBlockWeight(CB->getParent()); + if (Weight) + CallsiteCount = Weight.get(); + if (CalleeSamples) + CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples()); + + *NewCandidate = {CB, CalleeSamples, CallsiteCount}; + return true; +} + +InlineCost +SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { + assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); + + std::unique_ptr Advice = nullptr; + if (ExternalInlineAdvisor) { + Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); + if (!Advice->isInliningRecommended()) { + Advice->recordUnattemptedInlining(); + return InlineCost::getNever("not previously inlined"); + } + Advice->recordInlining(); + return InlineCost::getAlways("previously inlined"); + } + + // Adjust threshold based on call site hotness, only do this for callsite + // prioritized inliner because otherwise cost-benefit check is done earlier. + int SampleThreshold = SampleColdCallSiteThreshold; + if (CallsitePrioritizedInline) { + if (Candidate.CallsiteCount > PSI->getHotCountThreshold()) + SampleThreshold = SampleHotCallSiteThreshold; + else if (!ProfileSizeInline) + return InlineCost::getNever("cold callsite"); + } + + Function *Callee = Candidate.CallInstr->getCalledFunction(); + assert(Callee && "Expect a definition for inline candidate of direct call"); + + InlineParams Params = getInlineParams(); + Params.ComputeFullInlineCost = true; + // Checks if there is anything in the reachable portion of the callee at + // this callsite that makes this inlining potentially illegal. Need to + // set ComputeFullInlineCost, otherwise getInlineCost may return early + // when cost exceeds threshold without checking all IRs in the callee. + // The acutal cost does not matter because we only checks isNever() to + // see if it is legal to inline the callsite. + InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, + GetTTI(*Callee), GetAC, GetTLI); + + // For old FDO inliner, we inline the call site as long as cost is not + // "Never". The cost-benefit check is done earlier. + if (!CallsitePrioritizedInline) { + if (Cost.isNever()) + return Cost; + return InlineCost::getAlways("hot callsite previously inlined"); + } + + // Honor always inline and never inline from call analyzer + if (Cost.isNever() || Cost.isAlways()) + return Cost; + + // Otherwise only use the cost from call analyzer, but overwite threshold with + // Sample PGO threshold. + return InlineCost::get(Cost.getCost(), SampleThreshold); +} + +bool SampleProfileLoader::inlineHotFunctionsWithPriority( + Function &F, DenseSet &InlinedGUIDs) { + DenseSet PromotedInsns; + assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); + + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + + // Populating worklist with initial call sites from root inliner, along + // with call site weights. + CandidateQueue CQueue; + InlineCandidate NewCandidate; + for (auto &BB : F) { + for (auto &I : BB.getInstList()) { + auto *CB = dyn_cast(&I); + if (!CB) + continue; + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.push(NewCandidate); + } + } + + // Cap the size growth from profile guided inlining. This is needed even + // though cost of each inline candidate already accounts for callee size, + // because with top-down inlining, we can grow inliner size significantly + // with large number of smaller inlinees each pass the cost check. + assert(ProfileInlineLimitMax >= ProfileInlineLimitMin && + "Max inline size limit should not be smaller than min inline size " + "limit."); + unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit; + SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); + SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin); + if (ExternalInlineAdvisor) + SizeLimit = std::numeric_limits::max(); + + // Perform iterative BFS call site prioritized inlining + bool Changed = false; + while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) { + InlineCandidate Candidate = CQueue.top(); + CQueue.pop(); + CallBase *I = Candidate.CallInstr; + Function *CalledFunction = I->getCalledFunction(); + + if (CalledFunction == &F) + continue; + if (I->isIndirectCall()) { + if (PromotedInsns.count(I)) + continue; + uint64_t Sum; + auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); + uint64_t SumOrigin = Sum; + for (const auto *FS : CalleeSamples) { + // TODO: Consider disable pre-lTO ICP for MonoLTO as well + if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { + FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), + PSI->getOrCompHotCountThreshold()); + continue; + } + uint64_t EntryCountDistributed = FS->getEntrySamples(); + // In addition to regular inline cost check, we also need to make sure + // ICP isn't introducing excessive speculative checks even if individual + // target looks beneficial to promote and inline. That means we should + // only do ICP when there's a small number dominant targets. + if (EntryCountDistributed < SumOrigin / ProfileICPThreshold) + break; + // TODO: Fix CallAnalyzer to handle all indirect calls. + // For indirect call, we don't run CallAnalyzer to get InlineCost + // before actual inlining. This is because we could see two different + // types from the same definition, which makes CallAnalyzer choke as + // it's expecting matching parameter type on both caller and callee + // side. See example from PR18962 for the triggering cases (the bug was + // fixed, but we generate different types). + if (!PSI->isHotCount(EntryCountDistributed)) + break; + const char *Reason = nullptr; + auto CalleeFunctionName = FS->getFuncName(); + if (CallBase *DI = tryPromoteIndirectCall( + F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) { + // Attach function profile for promoted indirect callee, and update + // call site count for the promoted inline candidate too. + Candidate = {DI, FS, EntryCountDistributed}; + PromotedInsns.insert(I); + SmallVector InlinedCallSites; + // If profile mismatches, we should not attempt to inline DI. + if ((isa(DI) || isa(DI)) && + tryInlineCandidate(Candidate, InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); + } + Changed = true; + } + } else { + LLVM_DEBUG(dbgs() + << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); + } + } + } else if (CalledFunction && CalledFunction->getSubprogram() && + !CalledFunction->isDeclaration()) { + SmallVector InlinedCallSites; + if (tryInlineCandidate(Candidate, InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); + } + Changed = true; + } + } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { + findCalleeFunctionSamples(*I)->findInlinedFunctions( + InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); + } + } + + if (!CQueue.empty()) { + if (SizeLimit == (unsigned)ProfileInlineLimitMax) + ++NumCSInlinedHitMaxLimit; + else if (SizeLimit == (unsigned)ProfileInlineLimitMin) + ++NumCSInlinedHitMinLimit; + else + ++NumCSInlinedHitGrowthLimit; + } + + return Changed; +} + /// Find equivalence classes for the given block. /// /// This finds all the blocks that are guaranteed to execute the same @@ -1833,7 +2195,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { } DenseSet InlinedGUIDs; - Changed |= inlineHotFunctions(F, InlinedGUIDs); + if (ProfileIsCS && CallsitePrioritizedInline) + Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs); + else + Changed |= inlineHotFunctions(F, InlinedGUIDs); // Compute basic block weights. Changed |= computeBlockWeights(F); @@ -1978,6 +2343,12 @@ bool SampleProfileLoader::doInitialization(Module &M, ProfileIsCS = true; FunctionSamples::ProfileIsCS = true; + // Enable priority-base inliner and size inline by default for CSSPGO. + if (!ProfileSizeInline.getNumOccurrences()) + ProfileSizeInline = true; + if (!CallsitePrioritizedInline.getNumOccurrences()) + CallsitePrioritizedInline = true; + // Tracker for profiles under different context ContextTracker = std::make_unique(Reader->getProfiles()); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof new file mode 100644 index 000000000000..095c7a1fc480 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof @@ -0,0 +1,10 @@ +[test]:63067:0 + 1: 3345 _Z3barv:1398 _Z3foov:2059 + 2: 100 _Z3bazv:102 + 3: 100 _Z3zoov:102 +[test:1 @ _Z3barv]:200:100 + 1: 100 +[test:1 @ _Z3foov]:4220:1200 + 14: 4220 +[test:2 @ _Z3bazv]:200:100 + 5: 100 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll new file mode 100644 index 000000000000..e5f2f7571eaf --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll @@ -0,0 +1,166 @@ +; REQUIRES: asserts +; Test that the new FDO inliner using prioty queue will not visit same call site again and again. +; Use debug prints as repeated call site evaluation is not visible from final inline decision. + +; Note that we need new pass manager to enable top-down processing for sample profile loader +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=OLD-INLINE +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=1 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=NEW-INLINE + +; Old inliner will evaluate the same call site three times +; OLD-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; OLD-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; OLD-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; OLD-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; OLD-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; OLD-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi + +; New inliner only evaluate the same call site once +; NEW-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; NEW-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; NEW-INLINE-NOT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; NEW-INLINE-NOT: Callee context found: main:3.1 @ _Z5funcBi + +@factor = dso_local global i32 3, align 4, !dbg !0 + +define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { +entry: + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret i32 %add3, !dbg !27 + +for.body: ; preds = %for.body, %entry + %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] + %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 + %add = add nuw nsw i32 %x.011, 1, !dbg !31 + %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 + %add2 = add i32 %call, %r.010, !dbg !34 + %add3 = add i32 %add2, %call1, !dbg !35 + %dec = add nsw i32 %x.011, -1, !dbg !36 + %cmp = icmp eq i32 %x.011, 0, !dbg !38 + br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 + ret i32 %call, !dbg !46 +} + +define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { +entry: + %cmp = icmp sgt i32 %x, 0, !dbg !57 + br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 + +while.cond2.preheader: ; preds = %entry + %cmp313 = icmp slt i32 %x, 0, !dbg !60 + br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 + +while.body: ; preds = %while.body, %entry + %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] + %tmp = load volatile i32, i32* @factor, align 4, !dbg !64 + %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 + %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 + %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 + br i1 %cmp1, label %while.body, label %if.end, !dbg !71 + +while.body4: ; preds = %while.body4, %while.cond2.preheader + %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] + %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72 + %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 + %add = add nsw i32 %call5, %x.addr.114, !dbg !75 + %cmp3 = icmp slt i32 %add, 0, !dbg !60 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 + +if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader + %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] + ret i32 %x.addr.2, !dbg !76 +} + +define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { +entry: + %sub = add nsw i32 %x, -100000, !dbg !51 + %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 + ret i32 %call, !dbg !53 +} + +declare i32 @_Z3fibi(i32) + +attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!14, !15, !16} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") +!4 = !{} +!5 = !{!6, !10, !11} +!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !9} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!12 = !{!0} +!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{!"clang version 11.0.0"} +!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!9} +!21 = !{!22, !23} +!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) +!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) +!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) +!25 = !DILocation(line: 13, column: 3, scope: !26) +!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) +!27 = !DILocation(line: 17, column: 3, scope: !18) +!28 = !DILocation(line: 14, column: 10, scope: !29) +!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) +!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) +!31 = !DILocation(line: 14, column: 29, scope: !29) +!32 = !DILocation(line: 14, column: 21, scope: !33) +!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) +!34 = !DILocation(line: 14, column: 19, scope: !29) +!35 = !DILocation(line: 14, column: 7, scope: !29) +!36 = !DILocation(line: 13, column: 33, scope: !37) +!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) +!38 = !DILocation(line: 13, column: 26, scope: !39) +!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) +!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!44 = !DILocation(line: 27, column: 22, scope: !40) +!45 = !DILocation(line: 27, column: 11, scope: !40) +!46 = !DILocation(line: 29, column: 3, scope: !40) +!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!51 = !DILocation(line: 33, column: 22, scope: !47) +!52 = !DILocation(line: 33, column: 11, scope: !47) +!53 = !DILocation(line: 35, column: 3, scope: !47) +!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!57 = !DILocation(line: 49, column: 9, scope: !58) +!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) +!59 = !DILocation(line: 49, column: 7, scope: !54) +!60 = !DILocation(line: 58, column: 14, scope: !61) +!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) +!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) +!63 = !DILocation(line: 58, column: 5, scope: !61) +!64 = !DILocation(line: 52, column: 16, scope: !65) +!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) +!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) +!67 = !DILocation(line: 52, column: 12, scope: !65) +!68 = !DILocation(line: 52, column: 9, scope: !65) +!69 = !DILocation(line: 51, column: 14, scope: !70) +!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) +!71 = !DILocation(line: 51, column: 5, scope: !70) +!72 = !DILocation(line: 59, column: 16, scope: !73) +!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) +!74 = !DILocation(line: 59, column: 12, scope: !73) +!75 = !DILocation(line: 59, column: 9, scope: !73) +!76 = !DILocation(line: 63, column: 3, scope: !54) diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll new file mode 100644 index 000000000000..3ec64326da2d --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll @@ -0,0 +1,63 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s + +define void @test(void ()*) #0 !dbg !3 { +;; Add two direct call to force top-down order for sample profile loader + call void @_Z3foov(), !dbg !7 + call void @_Z3barv(), !dbg !7 + call void @_Z3bazv(), !dbg !7 + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 + call void %3(), !dbg !4 + %4 = alloca void ()* + store void ()* %0, void ()** %4 + %5 = load void ()*, void ()** %4 + call void %5(), !dbg !5 + ret void +} + +define void @_Z3foov() #0 !dbg !8 { + ret void +} + +define void @_Z3barv() #0 !dbg !9 { + ret void +} + +define void @_Z3bazv() #0 !dbg !10 { + ret void +} + +define void @_Z3zoov() #0 !dbg !11 { + ret void +} + +attributes #0 = {"use-sample-profile"} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "test.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, unit: !0) +!4 = !DILocation(line: 4, scope: !3) +!5 = !DILocation(line: 5, scope: !3) +!6 = !DILocation(line: 6, scope: !3) +!7 = !DILocation(line: 7, scope: !3) +!8 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 29, unit: !0) +!9 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 32, unit: !0) +!10 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 24, unit: !0) +!11 = distinct !DISubprogram(name: "zoo", linkageName: "_Z3zoov", scope: !1, file: !1, line: 24, unit: !0) + + +; ICP-ALL: remark: test.cc:5:0: _Z3bazv inlined into test +; ICP-ALL-NEXT: remark: test.cc:4:0: _Z3foov inlined into test +; ICP-ALL-NEXT: remark: test.cc:4:0: _Z3barv inlined into test +; ICP-ALL-NOT: remark + +; ICP-HOT: remark: test.cc:4:0: _Z3foov inlined into test +; ICP-HOT-NOT: remark diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll new file mode 100644 index 000000000000..14e916d8c2e8 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -0,0 +1,180 @@ +; Test for CSSPGO's new early inliner using priority queue + +; Note that we need new pass manager to enable top-down processing for sample profile loader +; Test we inlined the following in top-down order with old inliner +; main:3 @ _Z5funcAi +; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi +; _Z5funcBi:1 @ _Z8funcLeafi +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; +; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW +; +; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; +; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; +; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob. +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2 + + +; INLINE-BASE: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 +; INLINE-BASE: remark: merged.cpp:27:11: _Z8funcLeafi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10 +; INLINE-BASE: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 + +; INLINE-NEW: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 +; INLINE-NEW-NOT: remark + +; INLINE-NEW-LIMIT1-NOT: remark + +; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: _Z8funcLeafi inlined into _Z5funcAi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 +; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 +; INLINE-NEW-LIMIT2-NOT: remark + +@factor = dso_local global i32 3, align 4, !dbg !0 + +define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { +entry: + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret i32 %add3, !dbg !27 + +for.body: ; preds = %for.body, %entry + %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] + %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 + %add = add nuw nsw i32 %x.011, 1, !dbg !31 + %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 + %add2 = add i32 %call, %r.010, !dbg !34 + %add3 = add i32 %add2, %call1, !dbg !35 + %dec = add nsw i32 %x.011, -1, !dbg !36 + %cmp = icmp eq i32 %x.011, 0, !dbg !38 + br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 + ret i32 %call, !dbg !46 +} + +define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { +entry: + %cmp = icmp sgt i32 %x, 0, !dbg !57 + br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 + +while.cond2.preheader: ; preds = %entry + %cmp313 = icmp slt i32 %x, 0, !dbg !60 + br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 + +while.body: ; preds = %while.body, %entry + %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] + %tmp = load volatile i32, i32* @factor, align 4, !dbg !64 + %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 + %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 + %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 + br i1 %cmp1, label %while.body, label %if.end, !dbg !71 + +while.body4: ; preds = %while.body4, %while.cond2.preheader + %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] + %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72 + %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 + %add = add nsw i32 %call5, %x.addr.114, !dbg !75 + %cmp3 = icmp slt i32 %add, 0, !dbg !60 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 + +if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader + %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] + ret i32 %x.addr.2, !dbg !76 +} + +define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { +entry: + %sub = add nsw i32 %x, -100000, !dbg !51 + %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 + ret i32 %call, !dbg !53 +} + +declare i32 @_Z3fibi(i32) + +attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!14, !15, !16} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") +!4 = !{} +!5 = !{!6, !10, !11} +!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !9} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!12 = !{!0} +!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{!"clang version 11.0.0"} +!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!9} +!21 = !{!22, !23} +!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) +!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) +!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) +!25 = !DILocation(line: 13, column: 3, scope: !26) +!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) +!27 = !DILocation(line: 17, column: 3, scope: !18) +!28 = !DILocation(line: 14, column: 10, scope: !29) +!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) +!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) +!31 = !DILocation(line: 14, column: 29, scope: !29) +!32 = !DILocation(line: 14, column: 21, scope: !33) +!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) +!34 = !DILocation(line: 14, column: 19, scope: !29) +!35 = !DILocation(line: 14, column: 7, scope: !29) +!36 = !DILocation(line: 13, column: 33, scope: !37) +!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) +!38 = !DILocation(line: 13, column: 26, scope: !39) +!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) +!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!44 = !DILocation(line: 27, column: 22, scope: !40) +!45 = !DILocation(line: 27, column: 11, scope: !40) +!46 = !DILocation(line: 29, column: 3, scope: !40) +!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!51 = !DILocation(line: 33, column: 22, scope: !47) +!52 = !DILocation(line: 33, column: 11, scope: !47) +!53 = !DILocation(line: 35, column: 3, scope: !47) +!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!57 = !DILocation(line: 49, column: 9, scope: !58) +!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) +!59 = !DILocation(line: 49, column: 7, scope: !54) +!60 = !DILocation(line: 58, column: 14, scope: !61) +!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) +!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) +!63 = !DILocation(line: 58, column: 5, scope: !61) +!64 = !DILocation(line: 52, column: 16, scope: !65) +!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) +!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) +!67 = !DILocation(line: 52, column: 12, scope: !65) +!68 = !DILocation(line: 52, column: 9, scope: !65) +!69 = !DILocation(line: 51, column: 14, scope: !70) +!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) +!71 = !DILocation(line: 51, column: 5, scope: !70) +!72 = !DILocation(line: 59, column: 16, scope: !73) +!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) +!74 = !DILocation(line: 59, column: 12, scope: !73) +!75 = !DILocation(line: 59, column: 9, scope: !73) +!76 = !DILocation(line: 63, column: 3, scope: !54) diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll index 1a7a53457a5d..7789e18b394a 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll @@ -3,11 +3,11 @@ ; based on inline decision, so post inline counts are accurate. ; Note that we need new pass manager to enable top-down processing for sample profile loader -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-prioritized-inline=0 -sample-profile-inline-size=0 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT -; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile +; Test we inlined the following in top-down order and promot rest not inlined context profile into base profile ; main:3 @ _Z5funcAi ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi @@ -20,13 +20,9 @@ ; INLINE-ALL-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi ; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi ; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi -; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi( -; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi ; INLINE-ALL-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z8funcLeafi ; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi -; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi -; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi ; INLINE-ALL-NEXT: Getting callee context for instr: %call.i1 = tail call i32 @_Z3fibi ; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi ; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcAi @@ -48,24 +44,23 @@ ; INLINE-ALL-NEXT: Getting base profile for function: _Z8funcLeafi ; INLINE-ALL-NEXT: Merging context profile into base profile: _Z8funcLeafi -; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile -; main:3 @ _Z5funcAi +; Test we inlined the following in top-down order and promot rest not inlined context profile into base profile ; _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-HOT: Getting base profile for function: main ; INLINE-HOT-NEXT: Merging context profile into base profile: main ; INLINE-HOT-NEXT: Found context tree root to promote: external:12 @ main ; INLINE-HOT-NEXT: Context promoted and merged to: main -; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !58 +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi ; INLINE-HOT-NEXT: Callee context found: main:3.1 @ _Z5funcBi -; INLINE-HOT-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !63 +; INLINE-HOT-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi ; INLINE-HOT-NEXT: Callee context found: main:3 @ _Z5funcAi ; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcAi ; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcAi ; INLINE-HOT-NEXT: Found context tree root to promote: main:3 @ _Z5funcAi ; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi ; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi:1 @ _Z8funcLeafi -; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 ; INLINE-HOT-NEXT: Callee context found: _Z5funcAi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcAi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 @@ -79,11 +74,11 @@ ; INLINE-HOT-NEXT: Context promoted to: _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Found context tree root to promote: externalA:17 @ _Z5funcBi ; INLINE-HOT-NEXT: Context promoted and merged to: _Z5funcBi -; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !50 +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi ; INLINE-HOT-NEXT: Callee context found: _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi -; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 -; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 +; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi +; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi ; INLINE-HOT-NEXT: Getting base profile for function: _Z8funcLeafi ; INLINE-HOT-NEXT: Merging context profile into base profile: _Z8funcLeafi diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll index adda7022047d..8d4e23829941 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll @@ -4,19 +4,18 @@ ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t ; Note that we need new pass manager to enable top-down processing for sample profile loader -; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile +; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; main:3 @ _Z5funcAi ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL - -; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile -; main:3 @ _Z5funcAi +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; +; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT @factor = dso_local global i32 3, align 4, !dbg !0 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index a5033a0dc190..d47359fa0b5f 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s ; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-inline.prof -o %t2 -; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s ; RUN: FileCheck %s -check-prefix=YAML < %t2.opt.yaml target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" From c2f3f45b5c5bd6f9b86a766fc40130b34acb8293 Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Tue, 19 Jan 2021 23:29:14 -0800 Subject: [PATCH 056/318] [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline Refactoring SampleProfileLoader::inlineHotFunctions to use helpers from CSSPGO inlining and reduce similar code in the inlining loop, plus minor cleanup for AFDO path. This is resubmit of D95024, with build break and overtighten assertion fixed. Test Plan: (cherry picked from commit 1645f465be85223e9f5b6303a3e5e0e491fd819f) --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 205 +++++++----------- .../SampleProfile/pseudo-probe-inline.ll | 2 +- llvm/test/Transforms/SampleProfile/remarks.ll | 4 +- 3 files changed, 80 insertions(+), 131 deletions(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 665c4078f3ee..2cfefd3a18ea 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -416,20 +416,18 @@ class SampleProfileLoader { findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName, - uint64_t &Sum, uint64_t Count, CallBase *I, - const char *&Reason); - bool inlineCallInstruction(CallBase &CB, - const FunctionSamples *CalleeSamples); + // Attempt to promote indirect call and also inline the promoted call + bool tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t &Sum, + DenseSet &PromotedInsns, + SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); - // Helper functions call-site prioritized BFS inliner - // Will change the main FDO inliner to be work list based directly in - // upstream, then merge this change with that and remove the duplication. InlineCost shouldInlineCandidate(InlineCandidate &Candidate); bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); - bool tryInlineCandidate(InlineCandidate &Candidate, - SmallVector &InlinedCallSites); + bool + tryInlineCandidate(InlineCandidate &Candidate, + SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctionsWithPriority(Function &F, DenseSet &InlinedGUIDs); @@ -1077,70 +1075,46 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return it.first->second; } -CallBase * -SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName, - uint64_t &Sum, uint64_t Count, - CallBase *I, const char *&Reason) { - Reason = "Callee function not available"; +/// Attempt to promote indirect call and also inline the promoted call. +/// +/// \param F Caller function. +/// \param Candidate ICP and inline candidate. +/// \param Sum Sum of target counts for indirect call. +/// \param PromotedInsns Map to keep track of indirect call already processed. +/// \param Candidate ICP and inline candidate. +/// \param InlinedCallSite Output vector for new call sites exposed after +/// inlining. +bool SampleProfileLoader::tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t &Sum, + DenseSet &PromotedInsns, + SmallVector *InlinedCallSite) { + const char *Reason = "Callee function not available"; // R->getValue() != &F is to prevent promoting a recursive call. // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. // clone the caller first, and inline the cloned caller if it is // recursive. As llvm does not inline recursive calls, we will // simply ignore it instead of handling it explicitly. - auto R = SymbolMap.find(CalleeName); + auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName()); if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && R->getValue()->hasFnAttribute("use-sample-profile") && - R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) { + R->getValue() != &F && + isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) { auto *DI = - &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE); - Sum -= Count; - return DI; - } - return nullptr; -} - -bool SampleProfileLoader::inlineCallInstruction( - CallBase &CB, const FunctionSamples *CalleeSamples) { - if (ExternalInlineAdvisor) { - auto Advice = ExternalInlineAdvisor->getAdvice(CB); - if (!Advice->isInliningRecommended()) { - Advice->recordUnattemptedInlining(); - return false; + &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(), + Candidate.CallsiteCount, Sum, false, ORE); + if (DI) { + Sum -= Candidate.CallsiteCount; + PromotedInsns.insert(Candidate.CallInstr); + Candidate.CallInstr = DI; + if (isa(DI) || isa(DI)) + return tryInlineCandidate(Candidate, InlinedCallSite); } - // Dummy record, we don't use it for replay. - Advice->recordInlining(); - } - - Function *CalledFunction = CB.getCalledFunction(); - assert(CalledFunction); - DebugLoc DLoc = CB.getDebugLoc(); - BasicBlock *BB = CB.getParent(); - InlineParams Params = getInlineParams(); - Params.ComputeFullInlineCost = true; - // Checks if there is anything in the reachable portion of the callee at - // this callsite that makes this inlining potentially illegal. Need to - // set ComputeFullInlineCost, otherwise getInlineCost may return early - // when cost exceeds threshold without checking all IRs in the callee. - // The acutal cost does not matter because we only checks isNever() to - // see if it is legal to inline the callsite. - InlineCost Cost = - getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI); - if (Cost.isNever()) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) - << "incompatible inlining"); - return false; - } - InlineFunctionInfo IFI(nullptr, GetAC); - if (InlineFunction(CB, IFI).isSuccess()) { - // The call to InlineFunction erases I, so we can't pass it here. - emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, - true, CSINLINE_DEBUG); - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined(CalleeSamples); - ++NumCSInlined; - return true; + } else { + LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " + << Candidate.CalleeSamples->getFuncName() << " because " + << Reason << "\n"); } return false; } @@ -1206,10 +1180,11 @@ bool SampleProfileLoader::inlineHotFunctions( "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled"); - DenseMap localNotInlinedCallSites; + DenseMap LocalNotInlinedCallSites; bool Changed = false; - while (true) { - bool LocalChanged = false; + bool LocalChanged = true; + while (LocalChanged) { + LocalChanged = false; SmallVector CIS; for (auto &BB : F) { bool Hot = false; @@ -1223,7 +1198,7 @@ bool SampleProfileLoader::inlineHotFunctions( "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); if (FS->getEntrySamples() > 0 || ProfileIsCS) - localNotInlinedCallSites.try_emplace(CB, FS); + LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI)) Hot = true; else if (shouldInlineColdCallee(*CB)) @@ -1241,6 +1216,11 @@ bool SampleProfileLoader::inlineHotFunctions( } for (CallBase *I : CIS) { Function *CalledFunction = I->getCalledFunction(); + InlineCandidate Candidate = {I, + LocalNotInlinedCallSites.count(I) + ? LocalNotInlinedCallSites[I] + : nullptr, + 0 /* dummy count */}; // Do not inline recursive calls. if (CalledFunction == &F) continue; @@ -1257,30 +1237,16 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI)) continue; - const char *Reason = nullptr; - auto CalleeFunctionName = FS->getFuncName(); - if (CallBase *DI = - tryPromoteIndirectCall(F, CalleeFunctionName, Sum, - FS->getEntrySamples(), I, Reason)) { - PromotedInsns.insert(I); - // If profile mismatches, we should not attempt to inline DI. - if ((isa(DI) || isa(DI)) && - inlineCallInstruction(cast(*DI), FS)) { - localNotInlinedCallSites.erase(I); - LocalChanged = true; - } - } else { - LLVM_DEBUG(dbgs() - << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); + Candidate = {I, FS, FS->getEntrySamples()}; + if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) { + LocalNotInlinedCallSites.erase(I); + LocalChanged = true; } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I) - ? localNotInlinedCallSites[I] - : nullptr)) { - localNotInlinedCallSites.erase(I); + if (tryInlineCandidate(Candidate)) { + LocalNotInlinedCallSites.erase(I); LocalChanged = true; } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1288,11 +1254,7 @@ bool SampleProfileLoader::inlineHotFunctions( InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); } } - if (LocalChanged) { - Changed = true; - } else { - break; - } + Changed |= LocalChanged; } // For CS profile, profile for not inlined context will be merged when @@ -1301,7 +1263,7 @@ bool SampleProfileLoader::inlineHotFunctions( return Changed; // Accumulate not inlined callsite information into notInlinedSamples - for (const auto &Pair : localNotInlinedCallSites) { + for (const auto &Pair : LocalNotInlinedCallSites) { CallBase *I = Pair.getFirst(); Function *Callee = I->getCalledFunction(); if (!Callee || Callee->isDeclaration()) @@ -1347,7 +1309,7 @@ bool SampleProfileLoader::inlineHotFunctions( } bool SampleProfileLoader::tryInlineCandidate( - InlineCandidate &Candidate, SmallVector &InlinedCallSites) { + InlineCandidate &Candidate, SmallVector *InlinedCallSites) { CallBase &CB = *Candidate.CallInstr; Function *CalledFunction = CB.getCalledFunction(); @@ -1372,9 +1334,11 @@ bool SampleProfileLoader::tryInlineCandidate( true, CSINLINE_DEBUG); // Now populate the list of newly exposed call sites. - InlinedCallSites.clear(); - for (auto &I : IFI.InlinedCallSites) - InlinedCallSites.push_back(I); + if (InlinedCallSites) { + InlinedCallSites->clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites->push_back(I); + } if (ProfileIsCS) ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); @@ -1409,8 +1373,6 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, InlineCost SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { - assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); - std::unique_ptr Advice = nullptr; if (ExternalInlineAdvisor) { Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); @@ -1446,18 +1408,16 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, GetTTI(*Callee), GetAC, GetTLI); + // Honor always inline and never inline from call analyzer + if (Cost.isNever() || Cost.isAlways()) + return Cost; + // For old FDO inliner, we inline the call site as long as cost is not // "Never". The cost-benefit check is done earlier. if (!CallsitePrioritizedInline) { - if (Cost.isNever()) - return Cost; - return InlineCost::getAlways("hot callsite previously inlined"); + return InlineCost::get(Cost.getCost(), INT_MAX); } - // Honor always inline and never inline from call analyzer - if (Cost.isNever() || Cost.isAlways()) - return Cost; - // Otherwise only use the cost from call analyzer, but overwite threshold with // Sample PGO threshold. return InlineCost::get(Cost.getCost(), SampleThreshold); @@ -1542,34 +1502,23 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( // fixed, but we generate different types). if (!PSI->isHotCount(EntryCountDistributed)) break; - const char *Reason = nullptr; - auto CalleeFunctionName = FS->getFuncName(); - if (CallBase *DI = tryPromoteIndirectCall( - F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) { - // Attach function profile for promoted indirect callee, and update - // call site count for the promoted inline candidate too. - Candidate = {DI, FS, EntryCountDistributed}; - PromotedInsns.insert(I); - SmallVector InlinedCallSites; - // If profile mismatches, we should not attempt to inline DI. - if ((isa(DI) || isa(DI)) && - tryInlineCandidate(Candidate, InlinedCallSites)) { - for (auto *CB : InlinedCallSites) { - if (getInlineCandidate(&NewCandidate, CB)) - CQueue.emplace(NewCandidate); - } - Changed = true; + SmallVector InlinedCallSites; + // Attach function profile for promoted indirect callee, and update + // call site count for the promoted inline candidate too. + Candidate = {I, FS, EntryCountDistributed}; + if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns, + &InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); } - } else { - LLVM_DEBUG(dbgs() - << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); + Changed = true; } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { SmallVector InlinedCallSites; - if (tryInlineCandidate(Candidate, InlinedCallSites)) { + if (tryInlineCandidate(Candidate, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index d47359fa0b5f..5359fd4da067 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -89,7 +89,7 @@ if.end: ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '15' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - Threshold: '2147483647' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: foo diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll index 3add1e74abaa..46f016433b20 100644 --- a/llvm/test/Transforms/SampleProfile/remarks.ll +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -21,7 +21,7 @@ ; We are expecting foo() to be inlined in main() (almost all the cycles are ; spent inside foo). -; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21; +; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21; ; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21; ; The back edge for the loop is the hottest edge in the loop subgraph. @@ -47,7 +47,7 @@ ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '130' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - Threshold: '2147483647' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: main From a9157c5628dc89b13936bbc8eef261cb02d63d40 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Fri, 11 Dec 2020 12:18:31 -0800 Subject: [PATCH 057/318] [CSSPGO] Introducing distribution factor for pseudo probe. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count. This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes. A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead. Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D93264 (cherry picked from commit 3d89b3cbec230633e8228787819b15116c1a1730) --- clang/test/CodeGen/pseudo-probe-emit.c | 8 +- llvm/include/llvm/IR/IntrinsicInst.h | 8 +- llvm/include/llvm/IR/Intrinsics.td | 2 +- llvm/include/llvm/IR/PseudoProbe.h | 27 ++- .../llvm/Passes/StandardInstrumentations.h | 2 + llvm/include/llvm/ProfileData/SampleProf.h | 10 ++ .../llvm/Transforms/IPO/SampleProfileProbe.h | 41 +++++ llvm/lib/IR/PseudoProbe.cpp | 41 +++++ llvm/lib/Passes/PassBuilder.cpp | 6 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Passes/StandardInstrumentations.cpp | 1 + llvm/lib/Transforms/IPO/SampleProfile.cpp | 108 +++++++++--- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 162 +++++++++++++++++- .../Inputs/pseudo-probe-update.prof | 8 + .../SampleProfile/pseudo-probe-emit-inline.ll | 20 +-- .../SampleProfile/pseudo-probe-emit.ll | 22 ++- .../SampleProfile/pseudo-probe-inline.ll | 38 ++-- .../SampleProfile/pseudo-probe-profile.ll | 42 ++++- .../SampleProfile/pseudo-probe-update.ll | 45 +++++ .../SampleProfile/pseudo-probe-verify.ll | 77 +++++++++ 20 files changed, 595 insertions(+), 74 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c index 059673b6992e..fccc8f04844d 100644 --- a/clang/test/CodeGen/pseudo-probe-emit.c +++ b/clang/test/CodeGen/pseudo-probe-emit.c @@ -6,12 +6,12 @@ void bar(); void go(); void foo(int x) { - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) if (x == 0) - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1) bar(); else - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1) go(); - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) } diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 9d68f3fdde6c..df3a1d568756 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -981,12 +981,16 @@ class PseudoProbeInst : public IntrinsicInst { return cast(const_cast(getArgOperand(0))); } + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(1))); + } + ConstantInt *getAttributes() const { return cast(const_cast(getArgOperand(2))); } - ConstantInt *getIndex() const { - return cast(const_cast(getArgOperand(1))); + ConstantInt *getFactor() const { + return cast(const_cast(getArgOperand(3))); } }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index b2bfc6e6f9e6..21307ed1bd91 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1298,7 +1298,7 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int // Like the sideeffect intrinsic defined above, this intrinsic is treated by the // optimizer as having opaque side effects so that it won't be get rid of or moved // out of the block it probes. -def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; // Intrinsics to support half precision floating point format diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h index e0370c264102..5165e80caa2d 100644 --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -16,28 +16,39 @@ #include "llvm/ADT/Optional.h" #include #include +#include namespace llvm { class Instruction; +class BasicBlock; constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; +// The saturated distrution factor representing 100% for block probes. +constexpr static uint64_t PseudoProbeFullDistributionFactor = + std::numeric_limits::max(); + struct PseudoProbeDwarfDiscriminator { +public: // The following APIs encodes/decodes per-probe information to/from a // 32-bit integer which is organized as: // [2:0] - 0x7, this is reserved for regular discriminator, // see DWARF discriminator encoding rule // [18:3] - probe id - // [25:19] - reserved + // [25:19] - probe distribution factor // [28:26] - probe type, see PseudoProbeType // [31:29] - reserved for probe attributes - static uint32_t packProbeData(uint32_t Index, uint32_t Type) { + static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags, + uint32_t Factor) { assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16"); assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7"); - return (Index << 3) | (Type << 26) | 0x7; + assert(Flags <= 0x7); + assert(Factor <= 100 && + "Probe distribution factor too big to encode, exceeding 100"); + return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7; } static uint32_t extractProbeIndex(uint32_t Value) { @@ -51,16 +62,26 @@ struct PseudoProbeDwarfDiscriminator { static uint32_t extractProbeAttributes(uint32_t Value) { return (Value >> 29) & 0x7; } + + static uint32_t extractProbeFactor(uint32_t Value) { + return (Value >> 19) & 0x7F; + } + + // The saturated distrution factor representing 100% for callsites. + constexpr static uint8_t FullDistributionFactor = 100; }; struct PseudoProbe { uint32_t Id; uint32_t Type; uint32_t Attr; + float Factor; }; Optional extractProbe(const Instruction &Inst); +void setProbeDistributionFactor(Instruction &Inst, float Factor); + } // end namespace llvm #endif // LLVM_IR_PSEUDOPROBE_H diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 795a980878e2..61c86b0468f2 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -22,6 +22,7 @@ #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/SampleProfileProbe.h" #include #include @@ -273,6 +274,7 @@ class StandardInstrumentations { OptBisectInstrumentation OptBisect; PreservedCFGCheckerInstrumentation PreservedCFGChecker; IRChangedPrinter PrintChangedIR; + PseudoProbeVerifier PseudoProbeVerification; VerifyInstrumentation Verify; bool VerifyEach; diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 346bc4c81d86..25d5b2376c11 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -347,6 +347,16 @@ class SampleRecord { return SortedTargets; } + /// Prorate call targets by a distribution factor. + static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, + float DistributionFactor) { + CallTargetMap AdjustedTargets; + for (const auto &I : Targets) { + AdjustedTargets[I.first()] = I.second * DistributionFactor; + } + return AdjustedTargets; + } + /// Merge the samples in \p Other into this record. /// Optionally scale sample counts by \p Weight. sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) { diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 78117fd4a9c2..cab893b50d19 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -16,6 +16,10 @@ #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" @@ -29,6 +33,8 @@ class Module; using namespace sampleprof; using BlockIdMap = std::unordered_map; using InstructionIdMap = std::unordered_map; +using ProbeFactorMap = std::unordered_map; +using FuncProbeFactorMap = StringMap; enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; @@ -43,6 +49,33 @@ class PseudoProbeDescriptor { uint64_t getFunctionHash() const { return FunctionHash; } }; +// A pseudo probe verifier that can be run after each IR passes to detect the +// violation of updating probe factors. In principle, the sum of distribution +// factor for a probe should be identical before and after a pass. For a +// function pass, the factor sum for a probe would be typically 100%. +class PseudoProbeVerifier { +public: + void registerCallbacks(PassInstrumentationCallbacks &PIC); + + // Implementation of pass instrumentation callbacks for new pass manager. + void runAfterPass(StringRef PassID, Any IR); + +private: + // Allow a little bias due the rounding to integral factors. + constexpr static float DistributionFactorVariance = 0.02; + // Distribution factors from last pass. + FuncProbeFactorMap FunctionProbeFactors; + + void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors); + void runAfterPass(const Module *M); + void runAfterPass(const LazyCallGraph::SCC *C); + void runAfterPass(const Function *F); + void runAfterPass(const Loop *L); + bool shouldVerifyFunction(const Function *F); + void verifyProbeFactors(const Function *F, + const ProbeFactorMap &ProbeFactors); +}; + // This class serves sample counts correlation for SampleProfileLoader by // analyzing pseudo probes and their function descriptors injected by // SampleProfileProber. @@ -102,5 +135,13 @@ class SampleProfileProbePass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +class PseudoProbeUpdatePass : public PassInfoMixin { + void runOnFunction(Function &F, FunctionAnalysisManager &FAM); + +public: + PseudoProbeUpdatePass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp index 804214f06e7a..80d2963938d4 100644 --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -35,6 +35,9 @@ Optional extractProbeFromDiscriminator(const Instruction &Inst) { PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); Probe.Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator); + Probe.Factor = + PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) / + (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor; return Probe; } } @@ -47,6 +50,8 @@ Optional extractProbe(const Instruction &Inst) { Probe.Id = II->getIndex()->getZExtValue(); Probe.Type = (uint32_t)PseudoProbeType::Block; Probe.Attr = II->getAttributes()->getZExtValue(); + Probe.Factor = II->getFactor()->getZExtValue() / + (float)PseudoProbeFullDistributionFactor; return Probe; } @@ -55,4 +60,40 @@ Optional extractProbe(const Instruction &Inst) { return None; } + +void setProbeDistributionFactor(Instruction &Inst, float Factor) { + assert(Factor >= 0 && Factor <= 1 && + "Distribution factor must be in [0, 1.0]"); + if (auto *II = dyn_cast(&Inst)) { + IRBuilder<> Builder(&Inst); + uint64_t IntFactor = PseudoProbeFullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + auto OrigFactor = II->getFactor()->getZExtValue(); + if (IntFactor != OrigFactor) + II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor)); + } else if (isa(&Inst) && !isa(&Inst)) { + if (const DebugLoc &DLoc = Inst.getDebugLoc()) { + const DILocation *DIL = DLoc; + auto Discriminator = DIL->getDiscriminator(); + if (DILocation::isPseudoProbeDiscriminator(Discriminator)) { + auto Index = + PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); + auto Type = + PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); + auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes( + Discriminator); + // Round small factors to 0 to avoid over-counting. + uint32_t IntFactor = + PseudoProbeDwarfDiscriminator::FullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, Attr, IntFactor); + DIL = DIL->cloneWithDiscriminator(V); + Inst.setDebugLoc(DIL); + } + } + } +} } // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d4c4c6e01ef5..6c1a7c75d30a 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1423,6 +1423,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1477,6 +1480,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { if (PTO.Coroutines) MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 860bfade733d..877cb9ed13b3 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) +MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) #undef MODULE_PASS #ifndef CGSCC_ANALYSIS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index a8bfe02d4432..6795aed7b04e 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks( OptBisect.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); PrintChangedIR.registerCallbacks(PIC); + PseudoProbeVerification.registerCallbacks(PIC); if (VerifyEach) Verify.registerCallbacks(PIC); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 2cfefd3a18ea..b2a9127773c3 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -108,6 +108,8 @@ STATISTIC(NumCSNotInlined, STATISTIC(NumMismatchedProfile, "Number of functions with CFG mismatched profile"); STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile"); +STATISTIC(NumDuplicatedInlinesite, + "Number of inlined callsites with a partial distribution factor"); STATISTIC(NumCSInlinedHitMinLimit, "Number of functions with FDO inline stopped due to min size limit"); @@ -358,7 +360,14 @@ class GUIDToFuncNameMapper { struct InlineCandidate { CallBase *CallInstr; const FunctionSamples *CalleeSamples; + // Prorated callsite count, which will be used to guide inlining. For example, + // if a callsite is duplicated in LTO prelink, then in LTO postlink the two + // copies will get their own distribution factors and their prorated counts + // will be used to decide if they should be inlined independently. uint64_t CallsiteCount; + // Call site distribution factor to prorate the profile samples for a + // duplicated callsite. Default value is 1.0. + float CallsiteDistribution; }; // Inline candidate comparer using call site weight @@ -418,8 +427,8 @@ class SampleProfileLoader { const FunctionSamples *findFunctionSamples(const Instruction &I) const; // Attempt to promote indirect call and also inline the promoted call bool tryPromoteAndInlineCandidate( - Function &F, InlineCandidate &Candidate, uint64_t &Sum, - DenseSet &PromotedInsns, + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, + uint64_t &Sum, DenseSet &PromotedInsns, SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); @@ -886,7 +895,7 @@ ErrorOr SampleProfileLoader::getProbeWeight(const Instruction &Inst) { const ErrorOr &R = FS->findSamplesAt(Probe->Id, 0); if (R) { - uint64_t Samples = R.get(); + uint64_t Samples = R.get() * Probe->Factor; bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples); if (FirstMark) { ORE->emit([&]() { @@ -894,13 +903,17 @@ ErrorOr SampleProfileLoader::getProbeWeight(const Instruction &Inst) { Remark << "Applied " << ore::NV("NumSamples", Samples); Remark << " samples from profile (ProbeId="; Remark << ore::NV("ProbeId", Probe->Id); + Remark << ", Factor="; + Remark << ore::NV("Factor", Probe->Factor); + Remark << ", OriginalSamples="; + Remark << ore::NV("OriginalSamples", R.get()); Remark << ")"; return Remark; }); } - LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst - << " - weight: " << R.get() << ")\n"); + << " - weight: " << R.get() << " - factor: " + << format("%0.2f", Probe->Factor) << ")\n"); return Samples; } return R; @@ -1085,7 +1098,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { /// \param InlinedCallSite Output vector for new call sites exposed after /// inlining. bool SampleProfileLoader::tryPromoteAndInlineCandidate( - Function &F, InlineCandidate &Candidate, uint64_t &Sum, + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, DenseSet &PromotedInsns, SmallVector *InlinedCallSite) { const char *Reason = "Callee function not available"; @@ -1106,10 +1119,28 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate( Candidate.CallsiteCount, Sum, false, ORE); if (DI) { Sum -= Candidate.CallsiteCount; + // Prorate the indirect callsite distribution. + // Do not update the promoted direct callsite distribution at this + // point since the original distribution combined with the callee + // profile will be used to prorate callsites from the callee if + // inlined. Once not inlined, the direct callsite distribution should + // be prorated so that the it will reflect the real callsite counts. + setProbeDistributionFactor(*Candidate.CallInstr, + Candidate.CallsiteDistribution * Sum / + SumOrigin); PromotedInsns.insert(Candidate.CallInstr); Candidate.CallInstr = DI; - if (isa(DI) || isa(DI)) - return tryInlineCandidate(Candidate, InlinedCallSite); + if (isa(DI) || isa(DI)) { + bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite); + if (!Inlined) { + // Prorate the direct callsite distribution so that it reflects real + // callsite counts. + setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution * + Candidate.CallsiteCount / + SumOrigin); + } + return Inlined; + } } } else { LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " @@ -1216,11 +1247,11 @@ bool SampleProfileLoader::inlineHotFunctions( } for (CallBase *I : CIS) { Function *CalledFunction = I->getCalledFunction(); - InlineCandidate Candidate = {I, - LocalNotInlinedCallSites.count(I) - ? LocalNotInlinedCallSites[I] - : nullptr, - 0 /* dummy count */}; + InlineCandidate Candidate = { + I, + LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I] + : nullptr, + 0 /* dummy count */, 1.0 /* dummy distribution factor */}; // Do not inline recursive calls. if (CalledFunction == &F) continue; @@ -1229,6 +1260,7 @@ bool SampleProfileLoader::inlineHotFunctions( continue; uint64_t Sum; for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { + uint64_t SumOrigin = Sum; if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); @@ -1237,8 +1269,9 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI)) continue; - Candidate = {I, FS, FS->getEntrySamples()}; - if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) { + Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; } @@ -1343,6 +1376,23 @@ bool SampleProfileLoader::tryInlineCandidate( if (ProfileIsCS) ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); ++NumCSInlined; + + // Prorate inlined probes for a duplicated inlining callsite which probably + // has a distribution less than 100%. Samples for an inlinee should be + // distributed among the copies of the original callsite based on each + // callsite's distribution factor for counts accuracy. Note that an inlined + // probe may come with its own distribution factor if it has been duplicated + // in the inlinee body. The two factor are multiplied to reflect the + // aggregation of duplication. + if (Candidate.CallsiteDistribution < 1) { + for (auto &I : IFI.InlinedCallSites) { + if (Optional Probe = extractProbe(*I)) + setProbeDistributionFactor(*I, Probe->Factor * + Candidate.CallsiteDistribution); + } + NumDuplicatedInlinesite++; + } + return true; } return false; @@ -1360,14 +1410,19 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, if (!CalleeSamples) return false; + float Factor = 1.0; + if (Optional Probe = extractProbe(*CB)) + Factor = Probe->Factor; + uint64_t CallsiteCount = 0; ErrorOr Weight = getBlockWeight(CB->getParent()); if (Weight) CallsiteCount = Weight.get(); if (CalleeSamples) - CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples()); + CallsiteCount = std::max( + CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); - *NewCandidate = {CB, CalleeSamples, CallsiteCount}; + *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1479,6 +1534,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( uint64_t Sum; auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); uint64_t SumOrigin = Sum; + Sum *= Candidate.CallsiteDistribution; for (const auto *FS : CalleeSamples) { // TODO: Consider disable pre-lTO ICP for MonoLTO as well if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1486,7 +1542,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( PSI->getOrCompHotCountThreshold()); continue; } - uint64_t EntryCountDistributed = FS->getEntrySamples(); + uint64_t EntryCountDistributed = + FS->getEntrySamples() * Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1505,9 +1562,10 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( SmallVector InlinedCallSites; // Attach function profile for promoted indirect callee, and update // call site count for the promoted inline candidate too. - Candidate = {I, FS, EntryCountDistributed}; - if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns, - &InlinedCallSites)) { + Candidate = {I, FS, EntryCountDistributed, + Candidate.CallsiteDistribution}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); @@ -1965,6 +2023,14 @@ void SampleProfileLoader::propagateWeights(Function &F) { auto T = FS->findCallTargetMapAt(CallSite); if (!T || T.get().empty()) continue; + // Prorate the callsite counts to reflect what is already done to the + // callsite, such as ICP or calliste cloning. + if (FunctionSamples::ProfileIsProbeBased) { + if (Optional Probe = extractProbe(I)) { + if (Probe->Factor < 1) + T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); + } + } SmallVector SortedCallTargets = GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 7cecd20b78d8..a885c3ee4ded 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -12,6 +12,7 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -25,8 +26,10 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include #include using namespace llvm; @@ -35,6 +38,115 @@ using namespace llvm; STATISTIC(ArtificialDbgLine, "Number of probes that have an artificial debug line"); +static cl::opt + VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden, + cl::desc("Do pseudo probe verification")); + +static cl::list VerifyPseudoProbeFuncList( + "verify-pseudo-probe-funcs", cl::Hidden, + cl::desc("The option to specify the name of the functions to verify.")); + +static cl::opt + UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden, + cl::desc("Update pseudo probe distribution factor")); + +bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) { + // Skip function declaration. + if (F->isDeclaration()) + return false; + // Skip function that will not be emitted into object file. The prevailing + // defintion will be verified instead. + if (F->hasAvailableExternallyLinkage()) + return false; + // Do a name matching. + static std::unordered_set VerifyFuncNames( + VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end()); + return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str()); +} + +void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) { + if (VerifyPseudoProbe) { + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &) { + this->runAfterPass(P, IR); + }); + } +} + +// Callback to run after each transformation for the new pass manager. +void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) { + std::string Banner = + "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n"; + dbgs() << Banner; + if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else + llvm_unreachable("Unknown IR unit"); +} + +void PseudoProbeVerifier::runAfterPass(const Module *M) { + for (const Function &F : *M) + runAfterPass(&F); +} + +void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) { + for (const LazyCallGraph::Node &N : *C) + runAfterPass(&N.getFunction()); +} + +void PseudoProbeVerifier::runAfterPass(const Function *F) { + if (!shouldVerifyFunction(F)) + return; + ProbeFactorMap ProbeFactors; + for (const auto &BB : *F) + collectProbeFactors(&BB, ProbeFactors); + verifyProbeFactors(F, ProbeFactors); +} + +void PseudoProbeVerifier::runAfterPass(const Loop *L) { + const Function *F = L->getHeader()->getParent(); + runAfterPass(F); +} + +void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block, + ProbeFactorMap &ProbeFactors) { + for (const auto &I : *Block) { + if (Optional Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += Probe->Factor; + } +} + +void PseudoProbeVerifier::verifyProbeFactors( + const Function *F, const ProbeFactorMap &ProbeFactors) { + bool BannerPrinted = false; + auto &PrevProbeFactors = FunctionProbeFactors[F->getName()]; + for (const auto &I : ProbeFactors) { + float CurProbeFactor = I.second; + if (PrevProbeFactors.count(I.first)) { + float PrevProbeFactor = PrevProbeFactors[I.first]; + if (std::abs(CurProbeFactor - PrevProbeFactor) > + DistributionFactorVariance) { + if (!BannerPrinted) { + dbgs() << "Function " << F->getName() << ":\n"; + BannerPrinted = true; + } + dbgs() << "Probe " << I.first << "\tprevious factor " + << format("%0.2f", PrevProbeFactor) << "\tcurrent factor " + << format("%0.2f", CurProbeFactor) << "\n"; + } + } + + // Update + PrevProbeFactors[I.first] = I.second; + } +} + PseudoProbeManager::PseudoProbeManager(const Module &M) { if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { for (const auto *Operand : FuncInfo->operands()) { @@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { Function *ProbeFn = llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe); Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index), - Builder.getInt32(0)}; + Builder.getInt32(0), + Builder.getInt64(PseudoProbeFullDistributionFactor)}; auto *Probe = Builder.CreateCall(ProbeFn, Args); AssignDebugLoc(Probe); } @@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { // Levarge the 32-bit discriminator field of debug data to store the ID and // type of a callsite probe. This gets rid of the dependency on plumbing a // customized metadata through the codegen pipeline. - uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type); + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor); if (auto DIL = Call->getDebugLoc()) { DIL = DIL->cloneWithDiscriminator(V); Call->setDebugLoc(DIL); @@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M, return PreservedAnalyses::none(); } + +void PseudoProbeUpdatePass::runOnFunction(Function &F, + FunctionAnalysisManager &FAM) { + BlockFrequencyInfo &BFI = FAM.getResult(F); + auto BBProfileCount = [&BFI](BasicBlock *BB) { + return BFI.getBlockProfileCount(BB) + ? BFI.getBlockProfileCount(BB).getValue() + : 0; + }; + + // Collect the sum of execution weight for each probe. + ProbeFactorMap ProbeFactors; + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += BBProfileCount(&Block); + } + } + + // Fix up over-counted probes. + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional Probe = extractProbe(I)) { + float Sum = ProbeFactors[Probe->Id]; + if (Sum != 0) + setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum); + } + } + } +} + +PreservedAnalyses PseudoProbeUpdatePass::run(Module &M, + ModuleAnalysisManager &AM) { + if (UpdatePseudoProbe) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + runOnFunction(F, FAM); + } + } + return PreservedAnalyses::none(); +} diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof new file mode 100644 index 000000000000..62f9bd5992e7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof @@ -0,0 +1,8 @@ +foo:3200:13 + 1: 13 + 2: 7 + 3: 6 + 4: 13 + 5: 7 + 6: 6 + !CFGChecksum: 844530426352218 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll index 7e3c7e8deda2..4f730ba09a3a 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll @@ -11,14 +11,14 @@ ; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ define dso_local void @foo2() !dbg !7 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]] ; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 ret void, !dbg !10 } define dso_local void @foo() #0 !dbg !11 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]] ; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 call void @foo2(), !dbg !12 @@ -26,9 +26,9 @@ define dso_local void @foo() #0 !dbg !11 { } define dso_local i32 @entry() !dbg !14 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]] ; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 ; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 @@ -41,13 +41,13 @@ define dso_local i32 @entry() !dbg !14 { ; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo" ; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]]) ; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]]) -;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe -;; with an index of 2. -; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751) +;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe +;; with an index of 2 and a scale of 100%. +; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551) ; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry" ; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3, scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]]) ; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]]) -; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751) +; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551) ; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]]) ; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1]], inlinedAt: ![[#INL2]]) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll index 2074b708380f..da5d46a32287 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -11,32 +11,36 @@ ;; Check the generation of pseudoprobe intrinsic call. +@a = dso_local global i32 0, align 4 + define void @foo(i32 %x) !dbg !3 { bb0: %cmp = icmp eq i32 %x, 0 -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 br i1 %cmp, label %bb1, label %bb2 bb1: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 + store i32 6, i32* @a, align 4 br label %bb3 bb2: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 + store i32 8, i32* @a, align 4 br label %bb3 bb3: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]] ret void, !dbg !12 } @@ -44,7 +48,7 @@ declare void @bar(i32 %x) define internal void @foo2(void (i32)* %f) !dbg !4 { entry: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 ; Check pseudo_probe metadata attached to the indirect call instruction. @@ -64,13 +68,13 @@ entry: ; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]]) ; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]]) ; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]]) -;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe +;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe ;; with an index of 2. -; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887) +; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687) ; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) -;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe +;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe ;; with an index of 3. -; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759) +; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559) ; Check the generation of .pseudo_probe_desc section ; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index 5359fd4da067..055d41792290 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -12,18 +12,18 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local i32 @foo(i32 %x) #0 !dbg !12 { entry: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1) %add = add nsw i32 %x, 100000, !dbg !19 ;; Check zen is fully inlined so there's no call to zen anymore. ;; Check code from the inlining of zen is properly annotated here. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]] -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1) ; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]] -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1) ; CHECK-NOT: call i32 @zen %call = call i32 @zen(i32 %add), !dbg !20 ret i32 %call, !dbg !21 @@ -32,36 +32,36 @@ entry: ; CHECK: define dso_local i32 @zen define dso_local i32 @zen(i32 %x) #0 !dbg !22 { entry: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1) %cmp = icmp sgt i32 %x, 0, !dbg !26 br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28 while.cond: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1) %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ] %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29 br i1 %cmp1, label %while.body, label %if.end, !dbg !31 while.body: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1) %0 = load volatile i32, i32* @factor, align 4, !dbg !32 %sub = sub nsw i32 %x.addr.0, %0, !dbg !39 br label %while.cond, !dbg !31 while.cond2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1) %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ] %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42 br i1 %cmp3, label %while.body4, label %if.end, !dbg !44 while.body4: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1) %1 = load volatile i32, i32* @factor, align 4, !dbg !45 %add = add nsw i32 %x.addr.1, %1, !dbg !48 br label %while.cond2, !dbg !44 if.end: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1) %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ] ret i32 %x.addr.2, !dbg !51 } @@ -109,6 +109,10 @@ if.end: ;YAML-NEXT: - NumSamples: '23' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '23' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... ;YAML: --- !Analysis @@ -121,6 +125,10 @@ if.end: ;YAML-NEXT: - NumSamples: '23' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '23' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... ;YAML: --- !Analysis @@ -133,6 +141,10 @@ if.end: ;YAML-NEXT: - NumSamples: '382920' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '382920' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll index 25fd04e9d710..34629a3743eb 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll @@ -8,26 +8,26 @@ entry: store i32 %x, i32* %x.addr, align 4 %0 = load i32, i32* %x.addr, align 4 %cmp = icmp eq i32 %0, 0 - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) br i1 %cmp, label %if.then, label %if.else ; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]] if.then: ; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]] call void %f(i32 1) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) store i32 1, i32* %retval, align 4 br label %return if.else: ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]] call void %f(i32 2) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) store i32 2, i32* %retval, align 4 br label %return return: - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) %1 = load i32, i32* %retval, align 4 ret i32 %1 } @@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"} ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7} ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) -;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe +;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe ;; with an index of 5. -; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911) +; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} -; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) -;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe +;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe ;; with an index of 6. -; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919) +; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) +; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719) ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2} !llvm.module.flags = !{!9, !10} @@ -69,6 +69,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '13' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -80,6 +84,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '5' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '7' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -91,6 +99,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '7' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -102,6 +114,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '6' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '6' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -113,6 +129,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '3' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '6' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -124,4 +144,8 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '4' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '13' ;YAML-NEXT: - String: ')' diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll new file mode 100644 index 000000000000..992afedd14f7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S | FileCheck %s + +declare i32 @f1() +declare i32 @f2() +declare void @f3() + + +;; This tests that the branch in 'merge' can be cloned up into T1. +define i32 @foo(i1 %cond, i1 %cond2) #0 { +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) + br i1 %cond, label %T1, label %F1 +T1: +; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]] + %v1 = call i32 @f1() +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) +;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080) + %cond3 = icmp eq i32 %v1, 412 + br label %Merge +F1: +; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]] + %v2 = call i32 @f2() +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) +;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968) + br label %Merge +Merge: + + %A = phi i1 [%cond3, %T1], [%cond2, %F1] + %B = phi i32 [%v1, %T1], [%v2, %F1] + br i1 %A, label %T2, label %F2 +T2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) + call void @f3() + ret i32 %B +F2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) + ret i32 %B +} + +; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7} +; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6} + +attributes #0 = {"use-sample-profile"} + diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll new file mode 100644 index 000000000000..fd57dd8bc526 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll @@ -0,0 +1,77 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY +; RUN: FileCheck %s < %t + +; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass *** +; VERIFY: Function foo: +; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00 +; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00 + +declare void @foo2() nounwind + +define void @foo(i32 %x) { +bb: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) + %tmp = alloca [5 x i32*], align 16 + br label %bb7.preheader + +bb3.loopexit: + %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ] + %tmp5.not = icmp eq i32 %spec.select.lcssa, 0 + br i1 %tmp5.not, label %bb24, label %bb7.preheader + +bb7.preheader: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) + %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ] + br label %bb10 + +bb10: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) + %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ] + %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ] + %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv + %tmp14 = load i32*, i32** %tmp13, align 8 + %tmp15.not = icmp ne i32* %tmp14, null + %tmp18 = sext i1 %tmp15.not to i32 + %spec.select = add nsw i32 %tmp1.14, %tmp18 + call void @foo2(), !dbg !12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 5 + br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13 + +bb24: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) + ret void +} + +;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe +;; with an index of 6 and a scale of -1%. +; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]]) +; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7} +!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{!"clang version 3.9.0"} +!12 = !DILocation(line: 2, column: 20, scope: !4) +!13 = distinct !{!13, !14} +!14 = !{!"llvm.loop.unroll.full"} From ad2086658df181369a09ad69dac260a41dbab814 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 3 Feb 2021 20:57:59 -0500 Subject: [PATCH 058/318] [OpenMP][NVPTX] Take functions in `deviceRTLs` as `convergent` OpenMP device compiler (similar to other SPMD compilers) assumes that functions are convergent by default to avoid invalid transformations, such as the bug (https://bugs.llvm.org/show_bug.cgi?id=49021). Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95971 (cherry picked from commit 0f0ce3c12edefd25448e39c4d20718a10d3d42c1) --- clang/lib/Frontend/CompilerInvocation.cpp | 2 + .../OpenMP/target_attribute_convergent.cpp | 13 +++ .../libomptarget/test/offloading/bug49021.cpp | 85 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 clang/test/OpenMP/target_attribute_convergent.cpp create mode 100644 openmp/libomptarget/test/offloading/bug49021.cpp diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index d8be4ea14868..036388ebd355 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2470,6 +2470,8 @@ void CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); + Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice; + if (Opts.OpenMP || Opts.OpenMPSimd) { if (int Version = getLastArgIntValue( Args, OPT_fopenmp_version_EQ, diff --git a/clang/test/OpenMP/target_attribute_convergent.cpp b/clang/test/OpenMP/target_attribute_convergent.cpp new file mode 100644 index 000000000000..932214e987c8 --- /dev/null +++ b/clang/test/OpenMP/target_attribute_convergent.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s +// expected-no-diagnostics + +#pragma omp declare target + +void foo() {} + +#pragma omp end declare target + +// CHECK: Function Attrs: {{.*}}convergent{{.*}} +// CHECK: define hidden void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]] +// CHECK: attributes [[ATTRIBUTE_NUMBER]] = { {{.*}}convergent{{.*}} } diff --git a/openmp/libomptarget/test/offloading/bug49021.cpp b/openmp/libomptarget/test/offloading/bug49021.cpp new file mode 100644 index 000000000000..bcdbf68b10e0 --- /dev/null +++ b/openmp/libomptarget/test/offloading/bug49021.cpp @@ -0,0 +1,85 @@ +// RUN: %libomptarget-compilexx-aarch64-unknown-linux-gnu -O3 && %libomptarget-run-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compilexx-powerpc64-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-powerpc64le-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-x86_64-pc-linux-gnu -O3 && %libomptarget-run-x86_64-pc-linux-gnu +// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda -O3 && %libomptarget-run-nvptx64-nvidia-cuda + +#include + +template int test_map() { + std::cout << "map(complex<>)" << std::endl; + T a(0.2), a_check; +#pragma omp target map(from : a_check) + { a_check = a; } + + if (a_check != a) { + std::cout << " wrong results"; + return 1; + } + + return 0; +} + +template int test_reduction() { + std::cout << "flat parallelism" << std::endl; + T sum(0), sum_host(0); + const int size = 100; + T array[size]; + for (int i = 0; i < size; i++) { + array[i] = i; + sum_host += array[i]; + } + +#pragma omp target teams distribute parallel for map(to: array[:size]) \ + reduction(+ : sum) + for (int i = 0; i < size; i++) + sum += array[i]; + + if (sum != sum_host) + std::cout << " wrong results " << sum << " host " << sum_host << std::endl; + + std::cout << "hierarchical parallelism" << std::endl; + const int nblock(10), block_size(10); + T block_sum[nblock]; +#pragma omp target teams distribute map(to \ + : array[:size]) \ + map(from \ + : block_sum[:nblock]) + for (int ib = 0; ib < nblock; ib++) { + T partial_sum = 0; + const int istart = ib * block_size; + const int iend = (ib + 1) * block_size; +#pragma omp parallel for reduction(+ : partial_sum) + for (int i = istart; i < iend; i++) + partial_sum += array[i]; + block_sum[ib] = partial_sum; + } + + sum = 0; + for (int ib = 0; ib < nblock; ib++) { + sum += block_sum[ib]; + } + + if (sum != sum_host) { + std::cout << " wrong results " << sum << " host " << sum_host << std::endl; + return 1; + } + + return 0; +} + +template int test_complex() { + int ret = 0; + ret |= test_map(); + ret |= test_reduction(); + return ret; +} + +int main() { + int ret = 0; + std::cout << "Testing float" << std::endl; + ret |= test_complex(); + std::cout << "Testing double" << std::endl; + ret |= test_complex(); + return ret; +} From e8cdcaeae406527c9a76b3dc5c522391c81dfdfd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 1 Feb 2021 10:56:09 -0800 Subject: [PATCH 059/318] [X86] Accept 64-bit GPRs for vextractps when using a register that requires EVEX. This is consistent with the VEX version. It also fixes a sorting issue in the matching table that caused the EVEX version to be prioritized over VEX in intel syntax. Fixes issue [2] from PR48991. (cherry picked from commit c691fe14da93a7c9eff466231515d6d4d16124fa) --- llvm/lib/Target/X86/X86InstrAVX512.td | 4 ++-- llvm/test/MC/X86/intel-syntax-x86-64-avx.s | 4 ++++ llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0c2b278fdd7b..19012797ae9a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, EXTRACT_get_vextract256_imm, [HasAVX512]>; // vextractps - extract 32 bits from XMM -def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), +def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, + [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX, VEX_WIG, Sched<[WriteVecExtract]>; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), diff --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s index bb57cb287f38..c1f20d204a8c 100644 --- a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s +++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s @@ -167,3 +167,7 @@ // CHECK: vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910] // CHECK: encoding: [0xc4,0xa1,0x6d,0xf5,0x8c,0xf1,0x02,0x00,0x00,0xe0] vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910] + +// CHECK: vextractps ecx, xmm2, 1 +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd1,0x01] + vextractps ecx, xmm2, 1 diff --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s index 29bde03c5860..31c43afe5017 100644 --- a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s +++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s @@ -1260,3 +1260,6 @@ // CHECK: encoding: [0x62,0xf1,0x7e,0x89,0xe6,0x11] vcvtdq2pd xmm2 {k1} {z}, qword ptr [rcx] +// CHECK: vextractps ecx, xmm17, 1 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0xc9,0x01] + vextractps rcx, xmm17, 1 From 7fad20eccc4f9fe5d03b2e381e26e8eb13a3e3be Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 4 Feb 2021 08:44:20 -0500 Subject: [PATCH 060/318] Revert "[OpenMP] Disabled profiling in `libomp` by default to unblock link errors" This reverts commit f5602e0bf31ab590da19fa357980a753dbfd666e. --- openmp/CMakeLists.txt | 6 ------ openmp/docs/design/Runtimes.rst | 5 +---- openmp/runtime/CMakeLists.txt | 6 +++--- openmp/runtime/src/CMakeLists.txt | 12 +----------- openmp/runtime/src/kmp_config.h.cmake | 4 ++-- openmp/runtime/src/kmp_runtime.cpp | 6 +++--- 6 files changed, 10 insertions(+), 29 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 4787d4b5a321..67600bebdafb 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -86,12 +86,6 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) -option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF) - -# Build host runtime library, after LIBOMPTARGET variables are set since they are needed -# to enable time profiling support in the OpenMP runtime. -add_subdirectory(runtime) - if (OPENMP_ENABLE_LIBOMPTARGET) # Check that the library can actually be built. if (APPLE OR WIN32) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index ad36e43eccdc..016b88ba324b 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -48,10 +48,7 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ for time trace output. Using this library is enabled by default when building using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will -be saved to the filename specified by the environment variable. For multi-threaded -applications, profiling in ``libomp`` is also needed. Setting the CMake option -``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will -turn ``libomp`` into a C++ library. +be saved to the filename specified by the environment variable. .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 8828ff8ef455..9fdd04f41646 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -34,6 +34,7 @@ if(${OPENMP_STANDALONE_BUILD}) # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") + set(LIBOMPTARGET_PROFILING_SUPPORT FALSE) else() # Part of LLVM build # Determine the native architecture from LLVM. string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH) @@ -65,11 +66,10 @@ else() # Part of LLVM build libomp_get_architecture(LIBOMP_ARCH) endif () set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS}) + # Time profiling support + set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING}) endif() -# Time profiling support -set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING}) - # FUJITSU A64FX is a special processor because its cache line size is 256. # We need to pass this information into kmp_config.h. if(LIBOMP_ARCH STREQUAL "aarch64") diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 822f9ca2b825..2e927df84f5c 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,14 +50,6 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() -# Building with time profiling support requires LLVM directory includes. -if(LIBOMP_PROFILING_SUPPORT) - include_directories( - ${LLVM_MAIN_INCLUDE_DIR} - ${LLVM_INCLUDE_DIR} - ) -endif() - # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) @@ -143,7 +135,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS) libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS) # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled. -if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING)) +if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING)) add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES}) # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}) @@ -152,8 +144,6 @@ else() LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS} LINK_COMPONENTS Support ) - # libomp must be a C++ library such that it can link libLLVMSupport - set(LIBOMP_LINKER_LANGUAGE CXX) endif() set_target_properties(omp PROPERTIES diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index f6aee7197ee8..3d682c690fc7 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -44,8 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMP_PROFILING_SUPPORT -#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT +#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT +#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index a6e32bd008e1..4a0634d59cff 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -32,7 +32,7 @@ #include "ompt-specific.h" #endif -#if OMP_PROFILING_SUPPORT +#if OMPTARGET_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" static char *ProfileTraceFile = nullptr; #endif @@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { /* ------------------------------------------------------------------------ */ void *__kmp_launch_thread(kmp_info_t *this_thr) { -#if OMP_PROFILING_SUPPORT +#if OMPTARGET_PROFILING_SUPPORT ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity if (ProfileTraceFile) @@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); KMP_MB(); -#if OMP_PROFILING_SUPPORT +#if OMPTARGET_PROFILING_SUPPORT llvm::timeTraceProfilerFinishThread(); #endif return this_thr; From bc2dad1671598a87423c61c355d03db49ce76907 Mon Sep 17 00:00:00 2001 From: Peter Waller Date: Tue, 26 Jan 2021 11:55:24 +0000 Subject: [PATCH 061/318] [clang][aarch64][WOA64][docs] Release note for longjmp crash with /guard:cf Add a release note workaround for PR47463. Bug: https://bugs.llvm.org/show_bug.cgi?id=47463 Differential Revision: https://reviews.llvm.org/D95435 --- clang/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9efd4c01f053..c17d84de320c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -153,6 +153,11 @@ Windows Support - Implicitly add ``.exe`` suffix for MinGW targets, even when cross compiling. (This matches a change from GCC 8.) +- Windows on Arm64: programs using the C standard library's setjmp and longjmp + functions may crash with a "Security check failure or stack buffer overrun" + exception. To workaround (with reduced security), compile with + /guard:cf,nolongjmp. + C Language Changes in Clang --------------------------- From 66c7b449acf402bdc87b69db5778b7b43958d217 Mon Sep 17 00:00:00 2001 From: Giorgis Georgakoudis Date: Mon, 25 Jan 2021 14:10:50 -0800 Subject: [PATCH 062/318] [OpenMP] Fix building using LLVM_ENABLE_RUNTIMES Fix when time profiling is enabled. Related to: D94855 Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D95398 (cherry picked from commit bb40e6731843de92f1c73ad6efceb8a89e045ea6) --- openmp/CMakeLists.txt | 10 +++++----- openmp/runtime/src/CMakeLists.txt | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 67600bebdafb..f89857dc98d6 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -55,11 +55,6 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") - -# Build host runtime library. -add_subdirectory(runtime) - - set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, @@ -86,6 +81,11 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) + +# Build host runtime library, after LIBOMPTARGET variables are set since they are needed +# to enable time profiling support in the OpenMP runtime. +add_subdirectory(runtime) + if (OPENMP_ENABLE_LIBOMPTARGET) # Check that the library can actually be built. if (APPLE OR WIN32) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 2e927df84f5c..9c5dba55b705 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,6 +50,15 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() +# Building with time profiling support for libomptarget requires +# LLVM directory includes. +if(LIBOMPTARGET_PROFILING_SUPPORT) + include_directories( + ${LLVM_MAIN_INCLUDE_DIR} + ${LLVM_INCLUDE_DIR} + ) +endif() + # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) From 92a5106e8055bab7da46095a832904444862728b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 07:24:19 -0500 Subject: [PATCH 063/318] [OpenMP] Disabled profiling in `libomp` by default to unblock link errors Link error occurred when time profiling in libomp is enabled by default because `libomp` is assumed to be a C library but the dependence on `libLLVMSupport` for profiling is a C++ library. Currently the issue blocks all OpenMP tests in Phabricator. This patch set a new CMake option `OPENMP_ENABLE_LIBOMP_PROFILING` to enable/disable the feature. By default it is disabled. Note that once time profiling is enabled for `libomp`, it becomes a C++ library. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95585 (cherry picked from commit c571b168349fdf22d1dc8b920bcffa3d5161f0a2) --- openmp/CMakeLists.txt | 1 + openmp/docs/design/Runtimes.rst | 5 ++++- openmp/runtime/CMakeLists.txt | 6 +++--- openmp/runtime/src/CMakeLists.txt | 9 +++++---- openmp/runtime/src/kmp_config.h.cmake | 4 ++-- openmp/runtime/src/kmp_runtime.cpp | 6 +++--- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index f89857dc98d6..b8a2822877e3 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -81,6 +81,7 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) +option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF) # Build host runtime library, after LIBOMPTARGET variables are set since they are needed # to enable time profiling support in the OpenMP runtime. diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 016b88ba324b..ad36e43eccdc 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -48,7 +48,10 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ for time trace output. Using this library is enabled by default when building using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will -be saved to the filename specified by the environment variable. +be saved to the filename specified by the environment variable. For multi-threaded +applications, profiling in ``libomp`` is also needed. Setting the CMake option +``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will +turn ``libomp`` into a C++ library. .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 9fdd04f41646..8828ff8ef455 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -34,7 +34,6 @@ if(${OPENMP_STANDALONE_BUILD}) # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") - set(LIBOMPTARGET_PROFILING_SUPPORT FALSE) else() # Part of LLVM build # Determine the native architecture from LLVM. string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH) @@ -66,10 +65,11 @@ else() # Part of LLVM build libomp_get_architecture(LIBOMP_ARCH) endif () set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS}) - # Time profiling support - set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING}) endif() +# Time profiling support +set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING}) + # FUJITSU A64FX is a special processor because its cache line size is 256. # We need to pass this information into kmp_config.h. if(LIBOMP_ARCH STREQUAL "aarch64") diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 9c5dba55b705..822f9ca2b825 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,9 +50,8 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() -# Building with time profiling support for libomptarget requires -# LLVM directory includes. -if(LIBOMPTARGET_PROFILING_SUPPORT) +# Building with time profiling support requires LLVM directory includes. +if(LIBOMP_PROFILING_SUPPORT) include_directories( ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_INCLUDE_DIR} @@ -144,7 +143,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS) libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS) # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled. -if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING)) +if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING)) add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES}) # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}) @@ -153,6 +152,8 @@ else() LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS} LINK_COMPONENTS Support ) + # libomp must be a C++ library such that it can link libLLVMSupport + set(LIBOMP_LINKER_LANGUAGE CXX) endif() set_target_properties(omp PROPERTIES diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 3d682c690fc7..f6aee7197ee8 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -44,8 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT -#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT +#cmakedefine01 LIBOMP_PROFILING_SUPPORT +#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 4a0634d59cff..a6e32bd008e1 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -32,7 +32,7 @@ #include "ompt-specific.h" #endif -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" static char *ProfileTraceFile = nullptr; #endif @@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { /* ------------------------------------------------------------------------ */ void *__kmp_launch_thread(kmp_info_t *this_thr) { -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity if (ProfileTraceFile) @@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); KMP_MB(); -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT llvm::timeTraceProfilerFinishThread(); #endif return this_thr; From 72f12467ded52160d52025e13a6217f00fe25f68 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 4 Feb 2021 13:26:59 +0100 Subject: [PATCH 064/318] Add a release note about deprecating the clang-cl /fallback flag As discussed in https://lists.llvm.org/pipermail/cfe-dev/2021-January/067524.html The flag has been removed on the main branch in D95876. Differential revision: https://reviews.llvm.org/D96016 --- clang/docs/ReleaseNotes.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c17d84de320c..f4ca8a855142 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -109,6 +109,10 @@ Deprecated Compiler Flags The following options are deprecated and ignored. They will be removed in future versions of Clang. +- The clang-cl ``/fallback`` flag, which made clang-cl invoke Microsoft Visual + C++ on files it couldn't compile itself, has been deprecated. It will be + removed in Clang 13. + - ... Modified Compiler Flags From 4e7933905578456a30b281bbbe832d8d938feed0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 5 Feb 2021 01:40:33 +0000 Subject: [PATCH 065/318] workflows: Update libclang-abi-tests to work with minor release baselines --- .github/workflows/libclang-abi-tests.yml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index 5681c7c8166e..320a88c1d407 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -20,6 +20,7 @@ jobs: ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }} BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} + BASELINE_VERSION_MINOR: ${{ steps.vars.outputs.BASELINE_VERSION_MINOR }} LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} @@ -36,16 +37,35 @@ jobs: - name: Setup Variables id: vars run: | + minor_version=0 + remote_repo='https://github.com/llvm/llvm-project' if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then - echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + major_version=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + baseline_ref="$major_version.0.0" + + # If there is a minor release, we want to use that as the base line. + minor_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9] | tail -n1 | grep -o 'llvmorg-.\+' || true` + if [ -n "$minor_ref" ]; then + baseline_ref=$minor_ref + else + # Check if we have a release candidate + rc_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9]-rc* | tail -n1 | grep -o 'llvmorg-.\+' || true` + if [ -n "$rc_ref" ]; then + baseline_ref=$rc_ref + fi + fi + echo ::set-output name=BASELINE_VERSION_MAJOR::$major_version + echo ::set-output name=BASELINE_REF::$baseline_ref echo ::set-output name=ABI_HEADERS::clang-c echo ::set-output name=ABI_LIBS::libclang.so else echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + echo ::set-output name=BASELINE_REF::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.0.0 echo ::set-output name=ABI_HEADERS::. echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so fi + abi-dump: needs: abi-dump-setup runs-on: ubuntu-latest @@ -57,7 +77,7 @@ jobs: include: - name: build-baseline llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} - ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 + ref: ${{ needs.abi-dump-setup.outputs.BASELINE_REF }} repo: llvm/llvm-project - name: build-latest llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} From 81febec8a327ecbe83575ac280c2931718ab5e33 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 29 Jan 2021 12:56:23 +0100 Subject: [PATCH 066/318] [MemCpyOpt] Add test for incorrect optimization across lifetime (NFC) This only affects the MemorySSA-based implementation. --- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 43 ++++++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 1d2b699ee96d..5dc13ca10054 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. @@ -9,8 +9,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 -define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) { -; CHECK-LABEL: @_ZN4CordC2EOS_( +define void @call_slot(i8* nocapture dereferenceable(16) %arg1) { +; CHECK-LABEL: @call_slot( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[ARG1:%.*]], i64 7 ; CHECK-NEXT: store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1 @@ -27,4 +27,39 @@ bb: ret void } +; FIXME: Miscompile. +define void @memcpy_memcpy_across_lifetime(i8* noalias %p1, i8* noalias %p2, i8* noalias %p3) { +; NO_MSSA-LABEL: @memcpy_memcpy_across_lifetime( +; NO_MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; NO_MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 +; NO_MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) +; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; NO_MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) +; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false) +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @memcpy_memcpy_across_lifetime( +; MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 +; MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) +; MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; MSSA-NEXT: ret void +; + %a = alloca [16 x i8] + %a8 = bitcast [16 x i8]* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* %a8) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a8, i8* %p1, i64 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p2, i8* %a8, i64 16, i1 false) + call void @llvm.lifetime.end.p0i8(i64 16, i8* %a8) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p3, i8* %p2, i64 16, i1 false) + ret void +} + attributes #1 = { argmemonly nounwind } From 12a772b1a09a1b5c3f43d08c2804973506b8a859 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 31 Jan 2021 17:55:24 +0100 Subject: [PATCH 067/318] [MemorySSA] Don't treat lifetime.end as NoAlias MemorySSA currently treats lifetime.end intrinsics as not aliasing anything. This breaks MemorySSA-based MemCpyOpt, because we'll happily move a read of a pointer below a lifetime.end intrinsic, as no clobber is reported. I think the MemorySSA modelling here isn't correct: lifetime.end(p) has approximately the same effect as doing a memcpy(p, undef), and should be treated as a clobber. This patch removes the special handling of lifetime.end, leaving alias analysis to handle it appropriately. Differential Revision: https://reviews.llvm.org/D95763 --- llvm/lib/Analysis/MemorySSA.cpp | 26 -------------- .../Analysis/MemorySSA/lifetime-simple.ll | 9 +++-- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 36 +++++++------------ 3 files changed, 16 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index 52dca7d378e1..4722b68e20e9 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -281,7 +281,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, // clobbers where they don't really exist at all. Please see D43269 for // context. switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: @@ -358,22 +357,6 @@ struct UpwardsMemoryQuery { } // end anonymous namespace -static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, - BatchAAResults &AA) { - Instruction *Inst = MD->getMemoryInst(); - if (IntrinsicInst *II = dyn_cast(Inst)) { - switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_end: { - MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1)); - return AA.alias(ArgLoc, Loc) == MustAlias; - } - default: - return false; - } - } - return false; -} - template static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { @@ -1465,15 +1448,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( } MemoryDef *MD = cast(VersionStack[UpperBound]); - // If the lifetime of the pointer ends at this instruction, it's live on - // entry. - if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { - // Reset UpperBound to liveOnEntryDef's place in the stack - UpperBound = 0; - FoundClobberResult = true; - LocInfo.AR = MustAlias; - break; - } ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); if (CA.IsClobber) { FoundClobberResult = true; diff --git a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll index 33327c5539f6..2d0481c18415 100644 --- a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll +++ b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll @@ -1,8 +1,7 @@ ; RUN: opt -basic-aa -print-memoryssa -verify-memoryssa -enable-new-pm=0 -analyze < %s 2>&1 | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s -; This test checks a number of things: -; First, the lifetime markers should not clobber any uses of Q or P. -; Second, the loads of P are MemoryUse(LiveOnEntry) due to the placement of the markers vs the loads. +; This test checks that lifetime markers are considered clobbers of %P, +; and due to lack of noalias information, of %Q as well. define i8 @test(i8* %P, i8* %Q) { entry: @@ -18,10 +17,10 @@ entry: ; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 32, i8* %P) call void @llvm.lifetime.end.p0i8(i64 32, i8* %P) -; CHECK: MemoryUse(liveOnEntry) +; CHECK: MemoryUse(3) ; CHECK-NEXT: %1 = load i8, i8* %P %1 = load i8, i8* %P -; CHECK: MemoryUse(2) +; CHECK: MemoryUse(3) ; CHECK-NEXT: %2 = load i8, i8* %Q %2 = load i8, i8* %Q ret i8 %1 diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 5dc13ca10054..c7e7666307ab 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. @@ -27,29 +27,17 @@ bb: ret void } -; FIXME: Miscompile. define void @memcpy_memcpy_across_lifetime(i8* noalias %p1, i8* noalias %p2, i8* noalias %p3) { -; NO_MSSA-LABEL: @memcpy_memcpy_across_lifetime( -; NO_MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; NO_MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 -; NO_MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) -; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) -; NO_MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) -; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false) -; NO_MSSA-NEXT: ret void -; -; MSSA-LABEL: @memcpy_memcpy_across_lifetime( -; MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 -; MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) -; MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) -; MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) -; MSSA-NEXT: ret void +; CHECK-LABEL: @memcpy_memcpy_across_lifetime( +; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false) +; CHECK-NEXT: ret void ; %a = alloca [16 x i8] %a8 = bitcast [16 x i8]* %a to i8* From 716eef9ad5b367e5cbcc22c8ac53395f9bdbe7a5 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 4 Feb 2021 20:14:14 -0500 Subject: [PATCH 068/318] [OpenMP][libomptarget] Fixed an issue that device sync is skipped if the kernel doesn't have any argument Currently if there is not kernel argument, device synchronization will be skipped. This can lead to two issues: 1. If there is any device error, it will not be captured; 2. The target region might end before the kernel is done, which is not spec conformant. The test added in this patch only runs on NVPTX platform, although it will not be executed by Phab at all. It also requires `not` which is not available on most systems. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D96067 (cherry picked from commit b68a6b09e60a24733b923a0fc282746a855852da) --- openmp/libomptarget/src/omptarget.cpp | 22 +++++++++++++++---- .../libomptarget/test/offloading/assert.cpp | 8 +++++++ 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/assert.cpp diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 90966d25fb26..e4b7b18bc70b 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -451,6 +451,17 @@ struct DeallocTgtPtrInfo { : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete), HasCloseModifier(HasCloseModifier) {} }; + +/// Synchronize device +static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo"); + if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) { + REPORT("Failed to synchronize device.\n"); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -631,11 +642,9 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, // AsyncInfo->Queue will not be nullptr, so again, we don't need to // synchronize. if (AsyncInfo && AsyncInfo->Queue) { - Ret = Device.synchronize(AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Failed to synchronize device.\n"); + Ret = syncDevice(Device, AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } } // Deallocate target pointer @@ -1307,6 +1316,11 @@ int target(ident_t *loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, REPORT("Failed to process data after launching the kernel.\n"); return OFFLOAD_FAIL; } + } else if (AsyncInfo.Queue) { + // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't + // hava any argument, and the device supports async operations, so we need a + // sync at this point. + return syncDevice(Device, &AsyncInfo); } return OFFLOAD_SUCCESS; diff --git a/openmp/libomptarget/test/offloading/assert.cpp b/openmp/libomptarget/test/offloading/assert.cpp new file mode 100644 index 000000000000..00112dd92cc6 --- /dev/null +++ b/openmp/libomptarget/test/offloading/assert.cpp @@ -0,0 +1,8 @@ +// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda && %libomptarget-run-fail-nvptx64-nvidia-cuda + +int main(int argc, char *argv[]) { +#pragma omp target + { __builtin_trap(); } + + return 0; +} From 395ef8d5c67905646b72dd5ef2d8eb60cabb8634 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 2 Feb 2021 16:58:38 -0500 Subject: [PATCH 069/318] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20Rename=20inclu?= =?UTF-8?q?de/support=20to=20include/=5F=5Fsupport?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do ship those headers, so the directory name should not be something that can potentially conflict with user-defined directories. This is a cherry-pick of b51756819a85563ae063e98eeb3d6af8e44c8f64. Differential Revision: https://reviews.llvm.org/D96059 --- libcxx/include/CMakeLists.txt | 38 +++++++++---------- libcxx/include/__locale | 20 +++++----- .../android/locale_bionic.h | 6 +-- .../{support => __support}/fuchsia/xlocale.h | 6 +-- .../{support => __support}/ibm/limits.h | 2 +- .../ibm/locale_mgmt_aix.h | 2 +- .../{support => __support}/ibm/nanosleep.h | 0 .../{support => __support}/ibm/support.h | 2 +- .../{support => __support}/ibm/xlocale.h | 7 ++-- .../{support => __support}/musl/xlocale.h | 2 +- .../{support => __support}/newlib/xlocale.h | 6 +-- .../{support => __support}/nuttx/xlocale.h | 6 +-- .../{support => __support}/openbsd/xlocale.h | 4 +- .../solaris/floatingpoint.h | 0 .../{support => __support}/solaris/wchar.h | 0 .../{support => __support}/solaris/xlocale.h | 0 .../win32/limits_msvc_win32.h | 2 +- .../win32/locale_win32.h | 2 +- .../xlocale/__nop_locale_mgmt.h | 2 +- .../xlocale/__posix_l_fallback.h | 2 +- .../xlocale/__strtonum_fallback.h | 2 +- libcxx/include/__threading_support | 2 +- libcxx/include/bit | 2 +- libcxx/include/limits | 4 +- libcxx/src/CMakeLists.txt | 2 +- libcxx/src/locale.cpp | 2 +- libcxx/src/support/solaris/xlocale.cpp | 2 +- libcxx/src/support/win32/locale_win32.cpp | 2 +- libcxx/src/support/win32/support.cpp | 2 +- libcxx/src/support/win32/thread_win32.cpp | 2 +- .../gn/secondary/libcxx/include/BUILD.gn | 38 +++++++++---------- 31 files changed, 85 insertions(+), 84 deletions(-) rename libcxx/include/{support => __support}/android/locale_bionic.h (90%) rename libcxx/include/{support => __support}/fuchsia/xlocale.h (74%) rename libcxx/include/{support => __support}/ibm/limits.h (97%) rename libcxx/include/{support => __support}/ibm/locale_mgmt_aix.h (96%) rename libcxx/include/{support => __support}/ibm/nanosleep.h (100%) rename libcxx/include/{support => __support}/ibm/support.h (95%) rename libcxx/include/{support => __support}/ibm/xlocale.h (97%) rename libcxx/include/{support => __support}/musl/xlocale.h (95%) rename libcxx/include/{support => __support}/newlib/xlocale.h (82%) rename libcxx/include/{support => __support}/nuttx/xlocale.h (70%) rename libcxx/include/{support => __support}/openbsd/xlocale.h (78%) rename libcxx/include/{support => __support}/solaris/floatingpoint.h (100%) rename libcxx/include/{support => __support}/solaris/wchar.h (100%) rename libcxx/include/{support => __support}/solaris/xlocale.h (100%) rename libcxx/include/{support => __support}/win32/limits_msvc_win32.h (96%) rename libcxx/include/{support => __support}/win32/locale_win32.h (99%) rename libcxx/include/{support => __support}/xlocale/__nop_locale_mgmt.h (94%) rename libcxx/include/{support => __support}/xlocale/__posix_l_fallback.h (98%) rename libcxx/include/{support => __support}/xlocale/__strtonum_fallback.h (96%) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 77e5e556d684..29a317b8ae9a 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -150,25 +150,25 @@ set(files string.h string_view strstream - support/android/locale_bionic.h - support/fuchsia/xlocale.h - support/ibm/limits.h - support/ibm/locale_mgmt_aix.h - support/ibm/nanosleep.h - support/ibm/support.h - support/ibm/xlocale.h - support/musl/xlocale.h - support/newlib/xlocale.h - support/nuttx/xlocale.h - support/openbsd/xlocale.h - support/solaris/floatingpoint.h - support/solaris/wchar.h - support/solaris/xlocale.h - support/win32/limits_msvc_win32.h - support/win32/locale_win32.h - support/xlocale/__nop_locale_mgmt.h - support/xlocale/__posix_l_fallback.h - support/xlocale/__strtonum_fallback.h + __support/android/locale_bionic.h + __support/fuchsia/xlocale.h + __support/ibm/limits.h + __support/ibm/locale_mgmt_aix.h + __support/ibm/nanosleep.h + __support/ibm/support.h + __support/ibm/xlocale.h + __support/musl/xlocale.h + __support/newlib/xlocale.h + __support/nuttx/xlocale.h + __support/openbsd/xlocale.h + __support/solaris/floatingpoint.h + __support/solaris/wchar.h + __support/solaris/xlocale.h + __support/win32/limits_msvc_win32.h + __support/win32/locale_win32.h + __support/xlocale/__nop_locale_mgmt.h + __support/xlocale/__posix_l_fallback.h + __support/xlocale/__strtonum_fallback.h system_error tgmath.h thread diff --git a/libcxx/include/__locale b/libcxx/include/__locale index a2da7d78049f..77e5faab2676 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -21,30 +21,30 @@ #include #if defined(_LIBCPP_MSVCRT_LIKE) # include -# include +# include <__support/win32/locale_win32.h> #elif defined(__NuttX__) -# include +# include <__support/nuttx/xlocale.h> #elif defined(_AIX) || defined(__MVS__) -# include +# include <__support/ibm/xlocale.h> #elif defined(__ANDROID__) -# include +# include <__support/android/locale_bionic.h> #elif defined(__sun__) # include -# include +# include <__support/solaris/xlocale.h> #elif defined(_NEWLIB_VERSION) -# include +# include <__support/newlib/xlocale.h> #elif defined(__OpenBSD__) -# include +# include <__support/openbsd/xlocale.h> #elif (defined(__APPLE__) || defined(__FreeBSD__) \ || defined(__EMSCRIPTEN__) || defined(__IBMCPP__)) # include #elif defined(__Fuchsia__) -# include +# include <__support/fuchsia/xlocale.h> #elif defined(__wasi__) // WASI libc uses musl's locales support. -# include +# include <__support/musl/xlocale.h> #elif defined(_LIBCPP_HAS_MUSL_LIBC) -# include +# include <__support/musl/xlocale.h> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/support/android/locale_bionic.h b/libcxx/include/__support/android/locale_bionic.h similarity index 90% rename from libcxx/include/support/android/locale_bionic.h rename to libcxx/include/__support/android/locale_bionic.h index f05a6a0522ca..8c6d4bd0dc32 100644 --- a/libcxx/include/support/android/locale_bionic.h +++ b/libcxx/include/__support/android/locale_bionic.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/android/locale_bionic.h ------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -28,13 +28,13 @@ extern "C" { #include #include #if __ANDROID_API__ < 21 -#include +#include <__support/xlocale/__posix_l_fallback.h> #endif // In NDK versions later than 16, locale-aware functions are provided by // legacy_stdlib_inlines.h #if __NDK_MAJOR__ <= 16 #if __ANDROID_API__ < 21 -#include +#include <__support/xlocale/__strtonum_fallback.h> #elif __ANDROID_API__ < 26 #if defined(__cplusplus) diff --git a/libcxx/include/support/fuchsia/xlocale.h b/libcxx/include/__support/fuchsia/xlocale.h similarity index 74% rename from libcxx/include/support/fuchsia/xlocale.h rename to libcxx/include/__support/fuchsia/xlocale.h index b86ce9efbd11..e8def81480ea 100644 --- a/libcxx/include/support/fuchsia/xlocale.h +++ b/libcxx/include/__support/fuchsia/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/fuchsia/xlocale.h ------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,8 +14,8 @@ #include #include -#include -#include +#include <__support/xlocale/__posix_l_fallback.h> +#include <__support/xlocale/__strtonum_fallback.h> #endif // defined(__Fuchsia__) diff --git a/libcxx/include/support/ibm/limits.h b/libcxx/include/__support/ibm/limits.h similarity index 97% rename from libcxx/include/support/ibm/limits.h rename to libcxx/include/__support/ibm/limits.h index d1c59f066a87..45f1f1e3684c 100644 --- a/libcxx/include/support/ibm/limits.h +++ b/libcxx/include/__support/ibm/limits.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------------- support/ibm/limits.h ---------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/ibm/locale_mgmt_aix.h b/libcxx/include/__support/ibm/locale_mgmt_aix.h similarity index 96% rename from libcxx/include/support/ibm/locale_mgmt_aix.h rename to libcxx/include/__support/ibm/locale_mgmt_aix.h index e452dc32529d..4f658c3eee30 100644 --- a/libcxx/include/support/ibm/locale_mgmt_aix.h +++ b/libcxx/include/__support/ibm/locale_mgmt_aix.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/ibm/locale_mgmt_aix.h --------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/ibm/nanosleep.h b/libcxx/include/__support/ibm/nanosleep.h similarity index 100% rename from libcxx/include/support/ibm/nanosleep.h rename to libcxx/include/__support/ibm/nanosleep.h diff --git a/libcxx/include/support/ibm/support.h b/libcxx/include/__support/ibm/support.h similarity index 95% rename from libcxx/include/support/ibm/support.h rename to libcxx/include/__support/ibm/support.h index 0569cbe7460d..a7751b017666 100644 --- a/libcxx/include/support/ibm/support.h +++ b/libcxx/include/__support/ibm/support.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===----------------------- support/ibm/support.h ----------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/ibm/xlocale.h b/libcxx/include/__support/ibm/xlocale.h similarity index 97% rename from libcxx/include/support/ibm/xlocale.h rename to libcxx/include/__support/ibm/xlocale.h index fde137cde260..ad07a255fc95 100644 --- a/libcxx/include/support/ibm/xlocale.h +++ b/libcxx/include/__support/ibm/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------------- support/ibm/xlocale.h -------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,7 +9,8 @@ #ifndef _LIBCPP_SUPPORT_IBM_XLOCALE_H #define _LIBCPP_SUPPORT_IBM_XLOCALE_H -#include + +#include <__support/ibm/locale_mgmt_aix.h> #include "cstdlib" @@ -218,7 +219,7 @@ size_t strftime_l(char *__s, size_t __size, const char *__fmt, #elif defined(__MVS__) #include // POSIX routines -#include +#include <__support/xlocale/__posix_l_fallback.h> #endif // defined(__MVS__) // The following are not POSIX routines. These are quick-and-dirty hacks diff --git a/libcxx/include/support/musl/xlocale.h b/libcxx/include/__support/musl/xlocale.h similarity index 95% rename from libcxx/include/support/musl/xlocale.h rename to libcxx/include/__support/musl/xlocale.h index 722d13fa1d66..2508a8e8e0ca 100644 --- a/libcxx/include/support/musl/xlocale.h +++ b/libcxx/include/__support/musl/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/musl/xlocale.h ------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/newlib/xlocale.h b/libcxx/include/__support/newlib/xlocale.h similarity index 82% rename from libcxx/include/support/newlib/xlocale.h rename to libcxx/include/__support/newlib/xlocale.h index 25fa798b6d02..b75f9263a4c4 100644 --- a/libcxx/include/support/newlib/xlocale.h +++ b/libcxx/include/__support/newlib/xlocale.h @@ -17,9 +17,9 @@ #include #if !defined(__NEWLIB__) || __NEWLIB__ < 2 || \ __NEWLIB__ == 2 && __NEWLIB_MINOR__ < 5 -#include -#include -#include +#include <__support/xlocale/__nop_locale_mgmt.h> +#include <__support/xlocale/__posix_l_fallback.h> +#include <__support/xlocale/__strtonum_fallback.h> #endif #endif // _NEWLIB_VERSION diff --git a/libcxx/include/support/nuttx/xlocale.h b/libcxx/include/__support/nuttx/xlocale.h similarity index 70% rename from libcxx/include/support/nuttx/xlocale.h rename to libcxx/include/__support/nuttx/xlocale.h index b70d62005046..be738e3b64e4 100644 --- a/libcxx/include/support/nuttx/xlocale.h +++ b/libcxx/include/__support/nuttx/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/nuttx/xlocale.h -------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,8 +11,8 @@ #define _LIBCPP_SUPPORT_NUTTX_XLOCALE_H #if defined(__NuttX__) -#include -#include +#include <__support/xlocale/__posix_l_fallback.h> +#include <__support/xlocale/__strtonum_fallback.h> #endif // __NuttX__ #endif diff --git a/libcxx/include/support/openbsd/xlocale.h b/libcxx/include/__support/openbsd/xlocale.h similarity index 78% rename from libcxx/include/support/openbsd/xlocale.h rename to libcxx/include/__support/openbsd/xlocale.h index fbfaedd127c6..1136fa327fac 100644 --- a/libcxx/include/support/openbsd/xlocale.h +++ b/libcxx/include/__support/openbsd/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/openbsd/xlocale.h -----------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,6 +14,6 @@ #include #include #include -#include +#include <__support/xlocale/__strtonum_fallback.h> #endif diff --git a/libcxx/include/support/solaris/floatingpoint.h b/libcxx/include/__support/solaris/floatingpoint.h similarity index 100% rename from libcxx/include/support/solaris/floatingpoint.h rename to libcxx/include/__support/solaris/floatingpoint.h diff --git a/libcxx/include/support/solaris/wchar.h b/libcxx/include/__support/solaris/wchar.h similarity index 100% rename from libcxx/include/support/solaris/wchar.h rename to libcxx/include/__support/solaris/wchar.h diff --git a/libcxx/include/support/solaris/xlocale.h b/libcxx/include/__support/solaris/xlocale.h similarity index 100% rename from libcxx/include/support/solaris/xlocale.h rename to libcxx/include/__support/solaris/xlocale.h diff --git a/libcxx/include/support/win32/limits_msvc_win32.h b/libcxx/include/__support/win32/limits_msvc_win32.h similarity index 96% rename from libcxx/include/support/win32/limits_msvc_win32.h rename to libcxx/include/__support/win32/limits_msvc_win32.h index 7bb835559a3b..758d24647b1b 100644 --- a/libcxx/include/support/win32/limits_msvc_win32.h +++ b/libcxx/include/__support/win32/limits_msvc_win32.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------ support/win32/limits_msvc_win32.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/win32/locale_win32.h b/libcxx/include/__support/win32/locale_win32.h similarity index 99% rename from libcxx/include/support/win32/locale_win32.h rename to libcxx/include/__support/win32/locale_win32.h index 897c36be70c6..d32a7a8ad304 100644 --- a/libcxx/include/support/win32/locale_win32.h +++ b/libcxx/include/__support/win32/locale_win32.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------------- support/win32/locale_win32.h -------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/xlocale/__nop_locale_mgmt.h b/libcxx/include/__support/xlocale/__nop_locale_mgmt.h similarity index 94% rename from libcxx/include/support/xlocale/__nop_locale_mgmt.h rename to libcxx/include/__support/xlocale/__nop_locale_mgmt.h index f33d3894c3a9..57b18842ff45 100644 --- a/libcxx/include/support/xlocale/__nop_locale_mgmt.h +++ b/libcxx/include/__support/xlocale/__nop_locale_mgmt.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------ support/xlocale/__nop_locale_mgmt.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/xlocale/__posix_l_fallback.h b/libcxx/include/__support/xlocale/__posix_l_fallback.h similarity index 98% rename from libcxx/include/support/xlocale/__posix_l_fallback.h rename to libcxx/include/__support/xlocale/__posix_l_fallback.h index f3df6c46fbab..00d69d19e8c8 100644 --- a/libcxx/include/support/xlocale/__posix_l_fallback.h +++ b/libcxx/include/__support/xlocale/__posix_l_fallback.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------- support/xlocale/__posix_l_fallback.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/xlocale/__strtonum_fallback.h b/libcxx/include/__support/xlocale/__strtonum_fallback.h similarity index 96% rename from libcxx/include/support/xlocale/__strtonum_fallback.h rename to libcxx/include/__support/xlocale/__strtonum_fallback.h index df38598056a6..1172a5d57236 100644 --- a/libcxx/include/support/xlocale/__strtonum_fallback.h +++ b/libcxx/include/__support/xlocale/__strtonum_fallback.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------- support/xlocale/__strtonum_fallback.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support index 473c9c3bbe49..de572f3ff84d 100644 --- a/libcxx/include/__threading_support +++ b/libcxx/include/__threading_support @@ -17,7 +17,7 @@ #include #ifdef __MVS__ -# include +# include <__support/ibm/nanosleep.h> #endif #ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER diff --git a/libcxx/include/bit b/libcxx/include/bit index fe360179c5ca..f8c37c3d6bbf 100644 --- a/libcxx/include/bit +++ b/libcxx/include/bit @@ -62,7 +62,7 @@ namespace std { #include <__debug> #if defined(__IBMCPP__) -#include "support/ibm/support.h" +#include "__support/ibm/support.h" #endif #if defined(_LIBCPP_COMPILER_MSVC) #include diff --git a/libcxx/include/limits b/libcxx/include/limits index 6d5d1e1aca75..8f97cd10a8b1 100644 --- a/libcxx/include/limits +++ b/libcxx/include/limits @@ -105,11 +105,11 @@ template<> class numeric_limits; #include #if defined(_LIBCPP_COMPILER_MSVC) -#include "support/win32/limits_msvc_win32.h" +#include "__support/win32/limits_msvc_win32.h" #endif // _LIBCPP_MSVCRT #if defined(__IBMCPP__) -#include "support/ibm/limits.h" +#include "__support/ibm/limits.h" #endif // __IBMCPP__ #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index c482068fa99a..9965104cb5b2 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -107,7 +107,7 @@ endif() if (LIBCXX_CONFIGURE_IDE) file(GLOB_RECURSE LIBCXX_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/*) if(WIN32) - file( GLOB LIBCXX_WIN32_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/support/win32/*.h) + file( GLOB LIBCXX_WIN32_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/__support/win32/*.h) list(APPEND LIBCXX_HEADERS ${LIBCXX_WIN32_HEADERS}) endif() # Force them all into the headers dir on MSVC, otherwise they end up at diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index f109389f68f3..a0209d0ce8cf 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -29,7 +29,7 @@ #include "cwctype" #include "__sso_allocator" #if defined(_LIBCPP_MSVCRT) || defined(__MINGW32__) -#include "support/win32/locale_win32.h" +#include "__support/win32/locale_win32.h" #elif !defined(__BIONIC__) && !defined(__NuttX__) #include #endif diff --git a/libcxx/src/support/solaris/xlocale.cpp b/libcxx/src/support/solaris/xlocale.cpp index d68a39f4dfe5..d25adcd21d30 100644 --- a/libcxx/src/support/solaris/xlocale.cpp +++ b/libcxx/src/support/solaris/xlocale.cpp @@ -8,7 +8,7 @@ #ifdef __sun__ -#include "support/solaris/xlocale.h" +#include "__support/solaris/xlocale.h" #include #include #include diff --git a/libcxx/src/support/win32/locale_win32.cpp b/libcxx/src/support/win32/locale_win32.cpp index b7062db352ad..e7c6005fc1a3 100644 --- a/libcxx/src/support/win32/locale_win32.cpp +++ b/libcxx/src/support/win32/locale_win32.cpp @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/win32/locale_win32.cpp ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/src/support/win32/support.cpp b/libcxx/src/support/win32/support.cpp index d156e02e3e84..52453f547926 100644 --- a/libcxx/src/support/win32/support.cpp +++ b/libcxx/src/support/win32/support.cpp @@ -1,5 +1,5 @@ // -*- C++ -*- -//===----------------------- support/win32/support.h ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/src/support/win32/thread_win32.cpp b/libcxx/src/support/win32/thread_win32.cpp index 83e7e9f6ce5b..35c4c871457d 100644 --- a/libcxx/src/support/win32/thread_win32.cpp +++ b/libcxx/src/support/win32/thread_win32.cpp @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/win32/thread_win32.cpp ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 644f0a767558..2ca495b08fba 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -217,25 +217,25 @@ copy("include") { "string.h", "string_view", "strstream", - "support/android/locale_bionic.h", - "support/fuchsia/xlocale.h", - "support/ibm/limits.h", - "support/ibm/locale_mgmt_aix.h", - "support/ibm/nanosleep.h", - "support/ibm/support.h", - "support/ibm/xlocale.h", - "support/musl/xlocale.h", - "support/newlib/xlocale.h", - "support/nuttx/xlocale.h", - "support/openbsd/xlocale.h", - "support/solaris/floatingpoint.h", - "support/solaris/wchar.h", - "support/solaris/xlocale.h", - "support/win32/limits_msvc_win32.h", - "support/win32/locale_win32.h", - "support/xlocale/__nop_locale_mgmt.h", - "support/xlocale/__posix_l_fallback.h", - "support/xlocale/__strtonum_fallback.h", + "__support/android/locale_bionic.h", + "__support/fuchsia/xlocale.h", + "__support/ibm/limits.h", + "__support/ibm/locale_mgmt_aix.h", + "__support/ibm/nanosleep.h", + "__support/ibm/support.h", + "__support/ibm/xlocale.h", + "__support/musl/xlocale.h", + "__support/newlib/xlocale.h", + "__support/nuttx/xlocale.h", + "__support/openbsd/xlocale.h", + "__support/solaris/floatingpoint.h", + "__support/solaris/wchar.h", + "__support/solaris/xlocale.h", + "__support/win32/limits_msvc_win32.h", + "__support/win32/locale_win32.h", + "__support/xlocale/__nop_locale_mgmt.h", + "__support/xlocale/__posix_l_fallback.h", + "__support/xlocale/__strtonum_fallback.h", "system_error", "tgmath.h", "thread", From bc39d53d9a4f1ed7c903648f3fd408296fd55c95 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 1 Feb 2021 15:18:42 -0800 Subject: [PATCH 070/318] =?UTF-8?q?[=F0=9F=8D=92]Disable=20CFI=20in=20=5F?= =?UTF-8?q?=5Fget=5Felem=20to=20allow=20casting=20a=20pointer=20to=20unini?= =?UTF-8?q?tialized=20memory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes usage of shared_ptr with CFI enabled, which is llvm.org/pr48993. (cherry pick of commit bab74864168bb5e28ecbc0294fe1095d8da7f569) Differential Revision: https://reviews.llvm.org/D96063 --- libcxx/include/memory | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/include/memory b/libcxx/include/memory index a00916c8c03f..39d0f5bee6a5 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -2647,7 +2647,7 @@ private: _Alloc *__alloc = reinterpret_cast<_Alloc*>(__first); return __alloc; } - _Tp* __get_elem() _NOEXCEPT { + _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT { _CompressedPair *__as_pair = reinterpret_cast<_CompressedPair*>(__blob_); typename _CompressedPair::_Base2* __second = _CompressedPair::__get_second_base(__as_pair); _Tp *__elem = reinterpret_cast<_Tp*>(__second); From 251f3295b498b699aa2b926167a788a6b6dbc033 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 3 Feb 2021 17:00:20 -0500 Subject: [PATCH 071/318] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20Fix=20libcxx?= =?UTF-8?q?=20build=20on=2032bit=20architectures=20with=2064bit=20time=5Ft?= =?UTF-8?q?=20defaults=20e.g.=20riscv32?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by Khem Raj. (cherry pick of commit 85b9c5ccc172a1e61c7ecaaec4752587cb6f1e26) Differential Revision: https://reviews.llvm.org/D96062 --- libcxx/src/atomic.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp index 6b73ed771cd1..9ae1fb5199bf 100644 --- a/libcxx/src/atomic.cpp +++ b/libcxx/src/atomic.cpp @@ -19,6 +19,12 @@ #include #include +// libc++ uses SYS_futex as a universal syscall name. However, on 32 bit architectures +// with a 64 bit time_t, we need to specify SYS_futex_time64. +#if !defined(SYS_futex) && defined(SYS_futex_time64) +# define SYS_futex SYS_futex_time64 +#endif + #else // <- Add other operating systems here // Baseline needs no new headers From d7d818c3615e4ff6bb283df0c1ddbb2b2cd50075 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Wed, 27 Jan 2021 13:02:45 -0800 Subject: [PATCH 072/318] Fix runInTerminal failures on Windows stella.stemenova mentioned in https://reviews.llvm.org/D93951 failures on Windows for this test. I'm fixing the macro definitions and disabling the tests for python versions lower than 3.7. I'll figure out that actual issue with python3.6 after the buildbots are fine again. (cherry picked from commit ab5591e1d8f5abcfa9e75193d3e8a29087b61425) --- .../runInTerminal/TestVSCode_runInTerminal.py | 34 +++++++++++++++---- lldb/tools/lldb-vscode/FifoFiles.cpp | 10 +++--- lldb/tools/lldb-vscode/FifoFiles.h | 1 + lldb/tools/lldb-vscode/lldb-vscode.cpp | 4 +-- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py index 055b5a5bed87..047cc317596f 100644 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py @@ -33,20 +33,30 @@ def readErrorMessage(self, fifo_file): with open(fifo_file, "r") as file: return file.readline() + def isTestSupported(self): + # For some strange reason, this test fails on python3.6 + if not (sys.version_info.major == 3 and sys.version_info.minor >= 7): + return False + try: + # We skip this test for debug builds because it takes too long parsing lldb's own + # debug info. Release builds are fine. + # Checking the size of the lldb-vscode binary seems to be a decent proxy for a quick + # detection. It should be far less than 1 MB in Release builds. + if os.path.getsize(os.environ["LLDBVSCODE_EXEC"]) < 1000000: + return True + except: + return False + @skipIfWindows @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_runInTerminal(self): + if not self.isTestSupported(): + return ''' Tests the "runInTerminal" reverse request. It makes sure that the IDE can launch the inferior with the correct environment variables and arguments. ''' - if "debug" in str(os.environ["LLDBVSCODE_EXEC"]).lower(): - # We skip this test for debug builds because it takes too long parsing lldb's own - # debug info. Release builds are fine. - # Checking this environment variable seems to be a decent proxy for a quick - # detection - return program = self.getBuildArtifact("a.out") source = 'main.c' self.build_and_launch( @@ -77,6 +87,8 @@ def test_runInTerminal(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_runInTerminalInvalidTarget(self): + if not self.isTestSupported(): + return self.build_and_create_debug_adaptor() response = self.launch( "INVALIDPROGRAM", stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"], expectFailure=True) @@ -88,6 +100,8 @@ def test_runInTerminalInvalidTarget(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_missingArgInRunInTerminalLauncher(self): + if not self.isTestSupported(): + return proc = subprocess.run([self.lldbVSCodeExec, "--launch-target", "INVALIDPROGRAM"], capture_output=True, universal_newlines=True) self.assertTrue(proc.returncode != 0) @@ -97,6 +111,8 @@ def test_missingArgInRunInTerminalLauncher(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) @@ -115,6 +131,8 @@ def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) @@ -132,6 +150,8 @@ def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) @@ -150,6 +170,8 @@ def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_NonAttachedRunInTerminalLauncher(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) diff --git a/lldb/tools/lldb-vscode/FifoFiles.cpp b/lldb/tools/lldb-vscode/FifoFiles.cpp index b69970ec0168..0a36c87d4a94 100644 --- a/lldb/tools/lldb-vscode/FifoFiles.cpp +++ b/lldb/tools/lldb-vscode/FifoFiles.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -#if !defined(WIN32) +#include "FifoFiles.h" + +#if LLVM_ON_UNIX #include #include #include @@ -21,8 +23,6 @@ #include "lldb/lldb-defines.h" -#include "FifoFiles.h" - using namespace llvm; namespace lldb_vscode { @@ -30,13 +30,13 @@ namespace lldb_vscode { FifoFile::FifoFile(StringRef path) : m_path(path) {} FifoFile::~FifoFile() { -#if !defined(WIN32) +#if LLVM_ON_UNIX unlink(m_path.c_str()); #endif }; Expected> CreateFifoFile(StringRef path) { -#if defined(WIN32) +#if !LLVM_ON_UNIX return createStringError(inconvertibleErrorCode(), "Unimplemented"); #else if (int err = mkfifo(path.data(), 0600)) diff --git a/lldb/tools/lldb-vscode/FifoFiles.h b/lldb/tools/lldb-vscode/FifoFiles.h index 891b6f574601..f186f65e86c4 100644 --- a/lldb/tools/lldb-vscode/FifoFiles.h +++ b/lldb/tools/lldb-vscode/FifoFiles.h @@ -9,6 +9,7 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H #define LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H +#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/Support/Error.h" #include "JSONUtils.h" diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index c581b9b4a9a0..69eb2e70aa6d 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -3002,8 +3002,8 @@ static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) { // emitted to the debug adaptor. void LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, llvm::StringRef comm_file, char *argv[]) { -#if defined(WIN_32) - llvm::errs() << "runInTerminal is not supported on Windows\n"; +#if !LLVM_ON_UNIX + llvm::errs() << "runInTerminal is only supported on POSIX systems\n"; exit(EXIT_FAILURE); #else RunInTerminalLauncherCommChannel comm_channel(comm_file); From 27aff2aa2ade9d78d0081445eadacd5b5006143e Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Thu, 28 Jan 2021 09:24:30 -0800 Subject: [PATCH 073/318] Fix lldb-vscode builds on Windows targeting POSIX @stella.stamenova found out that lldb-vscode's Win32 macros were failing when building on windows targetings POSIX platforms. I'm changing these macros for LLVM_ON_UNIX, which should be more accurate. (cherry picked from commit 0bca9a7ce2eeaa9f1d732ffbc17769560a2b236e) --- lldb/tools/lldb-vscode/IOStream.cpp | 6 +++--- lldb/tools/lldb-vscode/IOStream.h | 4 +++- lldb/tools/lldb-vscode/RunInTerminal.cpp | 6 +++--- lldb/tools/lldb-vscode/VSCode.cpp | 4 ++-- lldb/tools/lldb-vscode/VSCode.h | 2 ++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 11 ++++++----- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp index 4b11b90b4c2e..fdbfb554aedb 100644 --- a/lldb/tools/lldb-vscode/IOStream.cpp +++ b/lldb/tools/lldb-vscode/IOStream.cpp @@ -8,7 +8,7 @@ #include "IOStream.h" -#if defined(_WIN32) +#if !LLVM_ON_UNIX #include #else #include @@ -33,7 +33,7 @@ StreamDescriptor::~StreamDescriptor() { return; if (m_is_socket) -#if defined(_WIN32) +#if !LLVM_ON_UNIX ::closesocket(m_socket); #else ::close(m_socket); @@ -108,7 +108,7 @@ bool InputStream::read_full(std::ofstream *log, size_t length, } if (bytes_read < 0) { int reason = 0; -#if defined(_WIN32) +#if !LLVM_ON_UNIX if (descriptor.m_is_socket) reason = WSAGetLastError(); else diff --git a/lldb/tools/lldb-vscode/IOStream.h b/lldb/tools/lldb-vscode/IOStream.h index 603ae9adcc2a..1ec7ac3ed0f9 100644 --- a/lldb/tools/lldb-vscode/IOStream.h +++ b/lldb/tools/lldb-vscode/IOStream.h @@ -9,7 +9,9 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H #define LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H -#if defined(_WIN32) +#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX + +#if !LLVM_ON_UNIX // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because diff --git a/lldb/tools/lldb-vscode/RunInTerminal.cpp b/lldb/tools/lldb-vscode/RunInTerminal.cpp index 4db2806924ca..29edf5ca381d 100644 --- a/lldb/tools/lldb-vscode/RunInTerminal.cpp +++ b/lldb/tools/lldb-vscode/RunInTerminal.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -#if !defined(WIN32) +#include "RunInTerminal.h" + +#if LLVM_ON_UNIX #include #include #include @@ -21,8 +23,6 @@ #include "lldb/lldb-defines.h" -#include "RunInTerminal.h" - using namespace llvm; namespace lldb_vscode { diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index e9fdc17f4147..4d0e281c1b8d 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -14,7 +14,7 @@ #include "VSCode.h" #include "llvm/Support/FormatVariadic.h" -#if defined(_WIN32) +#if !LLVM_ON_UNIX #define NOMINMAX #include #include @@ -41,7 +41,7 @@ VSCode::VSCode() stop_at_entry(false), is_attach(false), reverse_request_seq(0), waiting_for_run_in_terminal(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); -#if defined(_WIN32) +#if !LLVM_ON_UNIX // Windows opens stdout and stdin in text mode which converts \n to 13,10 // while the value is just 10 on Darwin/Linux. Setting the file mode to binary // fixes this. diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 8e7dfc078934..a2e1cac8ecf9 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,6 +9,8 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H +#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX + #include #include #include diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 69eb2e70aa6d..b7f39cbb1cb5 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "VSCode.h" + #include #include #include @@ -14,7 +16,7 @@ #include #include #include -#if defined(_WIN32) +#if !LLVM_ON_UNIX // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because @@ -52,9 +54,8 @@ #include "JSONUtils.h" #include "LLDBUtils.h" -#include "VSCode.h" -#if defined(_WIN32) +#if !LLVM_ON_UNIX #ifndef PATH_MAX #define PATH_MAX MAX_PATH #endif @@ -131,7 +132,7 @@ SOCKET AcceptConnection(int portno) { *g_vsc.log << "error: accept (" << strerror(errno) << ")" << std::endl; } -#if defined(_WIN32) +#if !LLVM_ON_UNIX closesocket(sockfd); #else close(sockfd); @@ -3084,7 +3085,7 @@ int main(int argc, char *argv[]) { } } -#if !defined(_WIN32) +#if LLVM_ON_UNIX if (input_args.hasArg(OPT_wait_for_debugger)) { printf("Paused waiting for debugger to attach (pid = %i)...\n", getpid()); pause(); From 1cb6551edb94eea1fc087b346b1e8d13775dc692 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Thu, 4 Feb 2021 10:07:07 -0800 Subject: [PATCH 074/318] [lldb-vscode] correctly use Windows macros @mstorsjo found a mistake that I made when trying to fix some Windows compilation errors encountered by @stella.stamenova. I was incorrectly using the LLVM_ON_UNIX macro. In any case, proper use of #if defined(_WIN32) should be the actual fix. Differential Revision: https://reviews.llvm.org/D96060 (cherry picked from commit 36496cc2992d6fa26e6024971efcfc7d15f69888) --- lldb/tools/lldb-vscode/FifoFiles.cpp | 6 +++--- lldb/tools/lldb-vscode/IOStream.cpp | 6 +++--- lldb/tools/lldb-vscode/IOStream.h | 2 +- lldb/tools/lldb-vscode/RunInTerminal.cpp | 2 +- lldb/tools/lldb-vscode/VSCode.cpp | 4 ++-- lldb/tools/lldb-vscode/lldb-vscode.cpp | 10 +++++----- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lldb/tools/lldb-vscode/FifoFiles.cpp b/lldb/tools/lldb-vscode/FifoFiles.cpp index 0a36c87d4a94..4b14fb16f96c 100644 --- a/lldb/tools/lldb-vscode/FifoFiles.cpp +++ b/lldb/tools/lldb-vscode/FifoFiles.cpp @@ -8,7 +8,7 @@ #include "FifoFiles.h" -#if LLVM_ON_UNIX +#if !defined(_WIN32) #include #include #include @@ -30,13 +30,13 @@ namespace lldb_vscode { FifoFile::FifoFile(StringRef path) : m_path(path) {} FifoFile::~FifoFile() { -#if LLVM_ON_UNIX +#if !defined(_WIN32) unlink(m_path.c_str()); #endif }; Expected> CreateFifoFile(StringRef path) { -#if !LLVM_ON_UNIX +#if defined(_WIN32) return createStringError(inconvertibleErrorCode(), "Unimplemented"); #else if (int err = mkfifo(path.data(), 0600)) diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp index fdbfb554aedb..cd22d906c14c 100644 --- a/lldb/tools/lldb-vscode/IOStream.cpp +++ b/lldb/tools/lldb-vscode/IOStream.cpp @@ -8,7 +8,7 @@ #include "IOStream.h" -#if !LLVM_ON_UNIX +#if defined(_WIN32) #include #else #include @@ -33,7 +33,7 @@ StreamDescriptor::~StreamDescriptor() { return; if (m_is_socket) -#if !LLVM_ON_UNIX +#if defined(_WIN32) ::closesocket(m_socket); #else ::close(m_socket); @@ -108,7 +108,7 @@ bool InputStream::read_full(std::ofstream *log, size_t length, } if (bytes_read < 0) { int reason = 0; -#if !LLVM_ON_UNIX +#if defined(_WIN32) if (descriptor.m_is_socket) reason = WSAGetLastError(); else diff --git a/lldb/tools/lldb-vscode/IOStream.h b/lldb/tools/lldb-vscode/IOStream.h index 1ec7ac3ed0f9..0eb9b6fefb0d 100644 --- a/lldb/tools/lldb-vscode/IOStream.h +++ b/lldb/tools/lldb-vscode/IOStream.h @@ -11,7 +11,7 @@ #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX -#if !LLVM_ON_UNIX +#if defined(_WIN32) // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because diff --git a/lldb/tools/lldb-vscode/RunInTerminal.cpp b/lldb/tools/lldb-vscode/RunInTerminal.cpp index 29edf5ca381d..2126563d9e96 100644 --- a/lldb/tools/lldb-vscode/RunInTerminal.cpp +++ b/lldb/tools/lldb-vscode/RunInTerminal.cpp @@ -8,7 +8,7 @@ #include "RunInTerminal.h" -#if LLVM_ON_UNIX +#if !defined(_WIN32) #include #include #include diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index 4d0e281c1b8d..e9fdc17f4147 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -14,7 +14,7 @@ #include "VSCode.h" #include "llvm/Support/FormatVariadic.h" -#if !LLVM_ON_UNIX +#if defined(_WIN32) #define NOMINMAX #include #include @@ -41,7 +41,7 @@ VSCode::VSCode() stop_at_entry(false), is_attach(false), reverse_request_seq(0), waiting_for_run_in_terminal(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); -#if !LLVM_ON_UNIX +#if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 // while the value is just 10 on Darwin/Linux. Setting the file mode to binary // fixes this. diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index b7f39cbb1cb5..9469690cd7db 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -16,7 +16,7 @@ #include #include #include -#if !LLVM_ON_UNIX +#if defined(_WIN32) // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because @@ -55,7 +55,7 @@ #include "JSONUtils.h" #include "LLDBUtils.h" -#if !LLVM_ON_UNIX +#if defined(_WIN32) #ifndef PATH_MAX #define PATH_MAX MAX_PATH #endif @@ -132,7 +132,7 @@ SOCKET AcceptConnection(int portno) { *g_vsc.log << "error: accept (" << strerror(errno) << ")" << std::endl; } -#if !LLVM_ON_UNIX +#if defined(_WIN32) closesocket(sockfd); #else close(sockfd); @@ -3003,7 +3003,7 @@ static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) { // emitted to the debug adaptor. void LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, llvm::StringRef comm_file, char *argv[]) { -#if !LLVM_ON_UNIX +#if defined(_WIN32) llvm::errs() << "runInTerminal is only supported on POSIX systems\n"; exit(EXIT_FAILURE); #else @@ -3085,7 +3085,7 @@ int main(int argc, char *argv[]) { } } -#if LLVM_ON_UNIX +#if !defined(_WIN32) if (input_args.hasArg(OPT_wait_for_debugger)) { printf("Paused waiting for debugger to attach (pid = %i)...\n", getpid()); pause(); From c9fb4a947e32abfaa73b0b91a58ef71c73316322 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Thu, 4 Feb 2021 17:00:09 -0800 Subject: [PATCH 075/318] [AST] Update LVal before evaluating lambda decl fields. Differential Revision: https://reviews.llvm.org/D96092 (cherry picked from commit 96fb49c3ff8e08680127ddd4ec45a0e6c199243b) --- clang/lib/AST/ExprConstant.cpp | 8 +++++++- clang/test/SemaCXX/constant-expression-cxx2a.cpp | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 56181bbe1166..1c4caa2c1fc0 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -10009,6 +10009,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { auto *CaptureInitIt = E->capture_init_begin(); const LambdaCapture *CaptureIt = ClosureClass->captures_begin(); bool Success = true; + const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(ClosureClass); for (const auto *Field : ClosureClass->fields()) { assert(CaptureInitIt != E->capture_init_end()); // Get the initializer for this field @@ -10019,8 +10020,13 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { if (!CurFieldInit) return Error(E); + LValue Subobject = This; + + if (!HandleLValueMember(Info, E, Subobject, Field, &Layout)) + return false; + APValue &FieldVal = Result.getStructField(Field->getFieldIndex()); - if (!EvaluateInPlace(FieldVal, Info, This, CurFieldInit)) { + if (!EvaluateInPlace(FieldVal, Info, Subobject, CurFieldInit)) { if (!Info.keepEvaluatingAfterFailure()) return false; Success = false; diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp index 4adadc9988ab..86020a09db44 100644 --- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp @@ -1437,3 +1437,13 @@ constexpr bool destroy_at_test() { return true; } static_assert(destroy_at_test()); + +namespace PR48582 { + struct S { + void *p = this; + constexpr S() {} + constexpr S(const S&) {} + }; + constexpr bool b = [a = S(), b = S()] { return a.p == b.p; }(); + static_assert(!b); +} From 8153dee37272a73b1ed74ac1bc12422fac8ef033 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 8 Feb 2021 17:58:05 -0800 Subject: [PATCH 076/318] PR48606: The lifetime of a constexpr heap allocation always started during the same evaluation. It looks like the only case for which this matters is determining whether mutable subobjects of a heap allocation can be modified during constant evaluation. (cherry picked from commit 21e8bb83253e1a2f4b6fad9b53cafe8c530a38e2) --- clang/lib/AST/ExprConstant.cpp | 4 +-- .../test/SemaCXX/cxx2a-constexpr-dynalloc.cpp | 34 +++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1c4caa2c1fc0..cd2b5141ebe8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3497,8 +3497,8 @@ static bool diagnoseMutableFields(EvalInfo &Info, const Expr *E, AccessKinds AK, static bool lifetimeStartedInEvaluation(EvalInfo &Info, APValue::LValueBase Base, bool MutableSubobject = false) { - // A temporary we created. - if (Base.getCallIndex()) + // A temporary or transient heap allocation we created. + if (Base.getCallIndex() || Base.is()) return true; switch (Info.IsEvaluatingDecl) { diff --git a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp index 3647526ff0af..097ca00640e9 100644 --- a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp +++ b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp @@ -176,3 +176,37 @@ constexpr bool construct_after_lifetime_2() { return true; } static_assert(construct_after_lifetime_2()); // expected-error {{}} expected-note {{in call}} + +namespace PR48606 { + struct A { mutable int n = 0; }; + + constexpr bool f() { + A a; + A *p = &a; + p->~A(); + std::construct_at(p); + return true; + } + static_assert(f()); + + constexpr bool g() { + A *p = new A; + p->~A(); + std::construct_at(p); + delete p; + return true; + } + static_assert(g()); + + constexpr bool h() { + std::allocator alloc; + A *p = alloc.allocate(1); + std::construct_at(p); + p->~A(); + std::construct_at(p); + p->~A(); + alloc.deallocate(p); + return true; + } + static_assert(h()); +} From b46924ee5afe234526220c29a497794bf65f8f7f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 27 Jan 2021 10:14:54 +0000 Subject: [PATCH 077/318] Fix "not all control paths return a value" warning. NFCI. --- clang/lib/Basic/ProfileList.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp index 56bc37a79301..2cb05c1c3c07 100644 --- a/clang/lib/Basic/ProfileList.cpp +++ b/clang/lib/Basic/ProfileList.cpp @@ -82,6 +82,7 @@ static StringRef getSectionName(CodeGenOptions::ProfileInstrKind Kind) { case CodeGenOptions::ProfileCSIRInstr: return "csllvm"; } + llvm_unreachable("Unhandled CodeGenOptions::ProfileInstrKind enum"); } llvm::Optional From 8d20c14a8a3dd0f83d4066f957ba4c006d29942b Mon Sep 17 00:00:00 2001 From: Nathan James Date: Fri, 12 Feb 2021 16:55:44 +0000 Subject: [PATCH 078/318] [clangd] Fix clang tidy provider when multiple config files exist in directory tree Currently Clang tidy provider searches from the root directory up to the target directory, this is the opposite of how clang-tidy searches for config files. The result of this is .clang-tidy files are ignored in any subdirectory of a directory containing a .clang-tidy file. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D96204 (cherry picked from commit ba3ea9c60f0f259f0ccc47e47daf8253a5885531) --- clang-tools-extra/clangd/TidyProvider.cpp | 2 +- .../clangd/unittests/CMakeLists.txt | 1 + .../clangd/unittests/TidyProviderTests.cpp | 60 +++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/clangd/unittests/TidyProviderTests.cpp diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index c26c59fd347d..bcf1cd5a6183 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -106,7 +106,7 @@ class DotClangTidyTree { llvm::SmallVector Caches; { std::lock_guard Lock(Mu); - for (auto I = path::begin(Parent), E = path::end(Parent); I != E; ++I) { + for (auto I = path::rbegin(Parent), E = path::rend(Parent); I != E; ++I) { assert(I->end() >= Parent.begin() && I->end() <= Parent.end() && "Canonical path components should be substrings"); llvm::StringRef Ancestor(Parent.begin(), I->end() - Parent.begin()); diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index adf4ac827cce..f4d364720eaf 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -93,6 +93,7 @@ add_unittest(ClangdUnitTests ClangdTests TestIndex.cpp TestTU.cpp TestWorkspace.cpp + TidyProviderTests.cpp TypeHierarchyTests.cpp URITests.cpp XRefsTests.cpp diff --git a/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp b/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp new file mode 100644 index 000000000000..a16c87456a1a --- /dev/null +++ b/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp @@ -0,0 +1,60 @@ +//===-- TidyProviderTests.cpp - Clang tidy configuration provider tests ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestFS.h" +#include "TidyProvider.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { + +namespace { + +TEST(TidyProvider, NestedDirectories) { + MockFS FS; + FS.Files[testPath(".clang-tidy")] = R"yaml( + Checks: 'llvm-*' + CheckOptions: + - key: TestKey + value: 1 +)yaml"; + FS.Files[testPath("sub1/.clang-tidy")] = R"yaml( + Checks: 'misc-*' + CheckOptions: + - key: TestKey + value: 2 +)yaml"; + FS.Files[testPath("sub1/sub2/.clang-tidy")] = R"yaml( + Checks: 'bugprone-*' + CheckOptions: + - key: TestKey + value: 3 + InheritParentConfig: true +)yaml"; + + TidyProvider Provider = provideClangTidyFiles(FS); + + auto BaseOptions = getTidyOptionsForFile(Provider, testPath("File.cpp")); + ASSERT_TRUE(BaseOptions.Checks.hasValue()); + EXPECT_EQ(*BaseOptions.Checks, "llvm-*"); + EXPECT_EQ(BaseOptions.CheckOptions.lookup("TestKey").Value, "1"); + + auto Sub1Options = getTidyOptionsForFile(Provider, testPath("sub1/File.cpp")); + ASSERT_TRUE(Sub1Options.Checks.hasValue()); + EXPECT_EQ(*Sub1Options.Checks, "misc-*"); + EXPECT_EQ(Sub1Options.CheckOptions.lookup("TestKey").Value, "2"); + + auto Sub2Options = + getTidyOptionsForFile(Provider, testPath("sub1/sub2/File.cpp")); + ASSERT_TRUE(Sub2Options.Checks.hasValue()); + EXPECT_EQ(*Sub2Options.Checks, "misc-*,bugprone-*"); + EXPECT_EQ(Sub2Options.CheckOptions.lookup("TestKey").Value, "3"); +} +} // namespace +} // namespace clangd +} // namespace clang From 6604c3050948d602ef24b3d3efbf9f4410494833 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 2 Feb 2021 14:21:33 -0800 Subject: [PATCH 079/318] [GlobalISel] Check if branches use the same MBB in matchOptBrCondByInvertingCond If the G_BR + G_BRCOND in this combine use the same MBB, then it will infinite loop. Don't allow that to happen. Differential Revision: https://reviews.llvm.org/D95895 (cherry picked from commit 02d4b365bf4f8c2cb56e5612902f6c3bb4316493) --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 9 +++---- .../GlobalISel/prelegalizercombiner-br.mir | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index df0219fcfa64..a9353bdfb780 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -968,10 +968,11 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) return false; - // Check that the next block is the conditional branch target. - if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) - return false; - return true; + // Check that the next block is the conditional branch target. Also make sure + // that it isn't the same as the G_BR's target (otherwise, this will loop.) + MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); + return BrCondTarget != MI.getOperand(0).getMBB() && + MBB->isLayoutSuccessor(BrCondTarget); } void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir index 0631ff89ade0..0647de44c4b8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir @@ -29,6 +29,7 @@ ret i32 %retval.0 } + define void @dont_combine_same_block() { ret void } ... --- @@ -87,3 +88,26 @@ body: | RET_ReallyLR implicit $w0 ... +--- +name: dont_combine_same_block +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_combine_same_block + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + liveins: $w0, $w1 + %cond:_(s1) = G_IMPLICIT_DEF + + ; The G_BRCOND and G_BR have the same target here. Don't change anything. + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.1 + bb.1: + RET_ReallyLR +... From 04cb6b5ea8bd2b52e3d11f4cb970fd2d144eee6a Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 8 Feb 2021 17:32:52 -0800 Subject: [PATCH 080/318] PR48587: is_constant_evaluated() should not evaluate to true during a variable's destruction if it didn't do so during construction. The standard doesn't give any guidance as to what to do here, but this approach seems reasonable and conservative, and has been proposed to the standard committee. (cherry picked from commit c945dc4a5023d6a17d11fcda76509b94b36e34fc) --- clang/lib/AST/ExprConstant.cpp | 19 +++- .../builtin-is-constant-evaluated.cpp | 92 +++++++++++++++++++ 2 files changed, 106 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index cd2b5141ebe8..1bdad771a923 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14792,11 +14792,14 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx, static bool EvaluateDestruction(const ASTContext &Ctx, APValue::LValueBase Base, APValue DestroyedValue, QualType Type, - SourceLocation Loc, Expr::EvalStatus &EStatus) { - EvalInfo Info(Ctx, EStatus, EvalInfo::EM_ConstantExpression); + SourceLocation Loc, Expr::EvalStatus &EStatus, + bool IsConstantDestruction) { + EvalInfo Info(Ctx, EStatus, + IsConstantDestruction ? EvalInfo::EM_ConstantExpression + : EvalInfo::EM_ConstantFold); Info.setEvaluatingDecl(Base, DestroyedValue, EvalInfo::EvaluatingDeclKind::Dtor); - Info.InConstantContext = true; + Info.InConstantContext = IsConstantDestruction; LValue LVal; LVal.set(Base); @@ -14850,7 +14853,8 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx, // If this is a class template argument, it's required to have constant // destruction too. if (Kind == ConstantExprKind::ClassTemplateArgument && - (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result) || + (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result, + true) || Result.HasSideEffects)) { // FIXME: Prefix a note to indicate that the problem is lack of constant // destruction. @@ -14916,6 +14920,10 @@ bool VarDecl::evaluateDestruction( Expr::EvalStatus EStatus; EStatus.Diag = &Notes; + // Only treat the destruction as constant destruction if we formally have + // constant initialization (or are usable in a constant expression). + bool IsConstantDestruction = hasConstantInitialization(); + // Make a copy of the value for the destructor to mutate, if we know it. // Otherwise, treat the value as default-initialized; if the destructor works // anyway, then the destruction is constant (and must be essentially empty). @@ -14926,7 +14934,8 @@ bool VarDecl::evaluateDestruction( return false; if (!EvaluateDestruction(getASTContext(), this, std::move(DestroyedValue), - getType(), getLocation(), EStatus) || + getType(), getLocation(), EStatus, + IsConstantDestruction) || EStatus.HasSideEffects) return false; diff --git a/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp b/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp index 967c83496ab9..d30fefe55b4f 100644 --- a/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp +++ b/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp @@ -4,6 +4,7 @@ // RUN: FileCheck -check-prefix=CHECK-DYN -input-file=%t.ll %s // RUN: FileCheck -check-prefix=CHECK-ARR -input-file=%t.ll %s // RUN: FileCheck -check-prefix=CHECK-FOLD -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=CHECK-DTOR -input-file=%t.ll %s using size_t = decltype(sizeof(int)); @@ -131,3 +132,94 @@ void test_ref_to_static_var() { // CHECK-FOLD: store i32* @_ZZ22test_ref_to_static_varvE10i_constant, i32** %r, int &r = __builtin_is_constant_evaluated() ? i_constant : i_non_constant; } + +int not_constexpr; + +// __builtin_is_constant_evaluated() should never evaluate to true during +// destruction if it would not have done so during construction. +// +// FIXME: The standard doesn't say that it should ever return true when +// evaluating a destructor call, even for a constexpr variable. That seems +// obviously wrong. +struct DestructorBCE { + int n; + constexpr DestructorBCE(int n) : n(n) {} + constexpr ~DestructorBCE() { + if (!__builtin_is_constant_evaluated()) + not_constexpr = 1; + } +}; + +// CHECK-DTOR-NOT: @_ZN13DestructorBCED{{.*}}@global_dtor_bce_1 +DestructorBCE global_dtor_bce_1(101); + +// CHECK-DTOR: load i32, i32* @not_constexpr +// CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}} @global_dtor_bce_2, i32 +// CHECK-DTOR: atexit{{.*}} @_ZN13DestructorBCED{{.*}} @global_dtor_bce_2 +// CHECK-DTOR: } +DestructorBCE global_dtor_bce_2(not_constexpr); + +// CHECK-DTOR-NOT: @_ZN13DestructorBCED{{.*}}@global_dtor_bce_3 +constexpr DestructorBCE global_dtor_bce_3(103); + +// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_1v( +void test_dtor_bce_1() { + // Variable is neither constant initialized (because it has automatic storage + // duration) nor usable in constant expressions, so BCE should not return + // true during destruction. It would be OK if we replaced the constructor + // call with a direct store, but we should emit the destructor call. + + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}}, i32 201) + DestructorBCE local(201); + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_2v( +void test_dtor_bce_2() { + // Non-constant init => BCE is false in destructor. + + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}} + DestructorBCE local(not_constexpr); + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_3v( +void test_dtor_bce_3() { + // Should never call dtor for a constexpr variable. + + // CHECK-DTOR-NOT: call {{.*}} @_ZN13DestructorBCEC1Ei( + constexpr DestructorBCE local(203); + // CHECK-DTOR-NOT: @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_1v( +void test_dtor_bce_static_1() { + // Variable is constant initialized, so BCE returns true during constant + // destruction. + + // CHECK: store i32 301 + // CHECK-DTOR-NOT: @_ZN13DestructorBCEC1Ei({{.*}} + static DestructorBCE local(301); + // CHECK-DTOR-NOT: @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_2v( +void test_dtor_bce_static_2() { + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}} + static DestructorBCE local(not_constexpr); + // CHECK-DTOR: call {{.*}}atexit{{.*}} @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_3v( +void test_dtor_bce_static_3() { + // CHECK: store i32 303 + // CHECK-DTOR-NOT: @_ZN13DestructorBCEC1Ei({{.*}} + static constexpr DestructorBCE local(303); + // CHECK-DTOR-NOT: @_ZN13DestructorBCED + // CHECK-DTOR: } +} From 205ecd9b79c6915a85050246c961f167b494df43 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 9 Feb 2021 06:33:48 -0600 Subject: [PATCH 081/318] [DAGCombine] Do not remove masking argument to FP16_TO_FP for some targets As of commit 284f2bffc9bc5, the DAG Combiner gets rid of the masking of the input to this node if the mask only keeps the bottom 16 bits. This is because the underlying library function does not use the high order bits. However, on PowerPC's ELFv2 ABI, it is the caller that is responsible for clearing the bits from the register. Therefore, the library implementation of __gnu_h2f_ieee will return an incorrect result if the bits aren't cleared. This combine is desired for ARM (and possibly other targets) so this patch adds a query to Target Lowering to check if this zeroing needs to be kept. Fixes: https://bugs.llvm.org/show_bug.cgi?id=49092 Differential revision: https://reviews.llvm.org/D96283 (cherry picked from commit a5222aa0858a42660629c410a5b669dee16a4359) --- llvm/include/llvm/CodeGen/TargetLowering.h | 4 ++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 ++ .../PowerPC/handle-f16-storage-type.ll | 4 ++ llvm/test/CodeGen/PowerPC/pr48519.ll | 2 + llvm/test/CodeGen/PowerPC/pr49092.ll | 39 +++++++++++++++++++ 6 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr49092.ll diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index c3221aac8eea..40115fbd2f15 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2785,6 +2785,10 @@ class TargetLoweringBase { return false; } + /// Does this target require the clearing of high-order bits in a register + /// passed to the fp16 to fp conversion library function. + virtual bool shouldKeepZExtForFP16Conv() const { return false; } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 615bea2a4905..89670d708264 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21174,7 +21174,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) - if (N0->getOpcode() == ISD::AND) { + if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 477105bd03ac..0dda2c181572 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -987,6 +987,9 @@ namespace llvm { shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + // Keep the zero-extensions for arguments to libcalls. + bool shouldKeepZExtForFP16Conv() const override { return true; } + /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll index 9977b6b33560..ab19afa2beb5 100644 --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -1156,6 +1156,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { ; P8-NEXT: xscvsxdsp f1, f0 ; P8-NEXT: bl __gnu_f2h_ieee ; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop ; P8-NEXT: xsaddsp f1, f31, f1 @@ -1175,6 +1176,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { ; CHECK-NEXT: xscvhpdp f0, f0 ; CHECK-NEXT: xscvdphp f1, f1 ; CHECK-NEXT: mffprwz r3, f1 +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f1, r3 ; CHECK-NEXT: xscvhpdp f1, f1 ; CHECK-NEXT: xsaddsp f1, f0, f1 @@ -1225,6 +1227,7 @@ define half @PR40273(half) #0 { ; P8-NEXT: stdu r1, -32(r1) ; P8-NEXT: bl __gnu_f2h_ieee ; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop ; P8-NEXT: xxlxor f0, f0, f0 @@ -1245,6 +1248,7 @@ define half @PR40273(half) #0 { ; CHECK-NEXT: xscvdphp f0, f1 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f0, f0 ; CHECK-NEXT: fcmpu cr0, f0, f1 diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll index 50970cb185d8..035cc49b93e6 100644 --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -22,6 +22,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-NEXT: xscvsxdsp f1, f0 ; CHECK-NEXT: bl __gnu_f2h_ieee ; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: nop ; CHECK-NEXT: addi r30, r30, -1 @@ -46,6 +47,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-P9-NEXT: xscvsxdsp f0, f0 ; CHECK-P9-NEXT: xscvdphp f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: clrlwi r3, r3, 16 ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: xscvhpdp f0, f0 diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll new file mode 100644 index 000000000000..2fce58418515 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr49092.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=CHECK-P9 + +define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: addi r3, r3, 11 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P9-LABEL: test2: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: add r3, r4, r3 +; CHECK-P9-NEXT: addi r3, r3, 11 +; CHECK-P9-NEXT: clrlwi r3, r3, 16 +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: xscvhpdp f1, f0 +; CHECK-P9-NEXT: blr +entry: + %add = add i64 %b, %a + %0 = trunc i64 %add to i16 + %conv = add i16 %0, 11 + %call = bitcast i16 %conv to half + ret half %call +} +attributes #0 = { nounwind } From 34cda01e235c549b56ffe30a7b09df0414d56ea0 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 2 Feb 2021 14:40:52 +0000 Subject: [PATCH 082/318] [RISCV] Fix incorrect RVV sdiv/udiv lowering Due to a clerical error, the sdiv operation was mapping to vdivu and udiv to vdiv, when the opposite mapping is the correct one. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D95869 (cherry picked from commit b4106f9c7b8c498d109301ced7bf9aca32027168) --- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 4 +- .../CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll | 88 +++++++++---------- .../CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll | 88 +++++++++---------- .../CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll | 88 +++++++++---------- .../CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll | 88 +++++++++---------- 5 files changed, 178 insertions(+), 178 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 79a1e6ddc8a2..dee67708bed1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -384,8 +384,8 @@ defm "" : VPatBinarySDNode_VV_VX; defm "" : VPatBinarySDNode_VV_VX; // 12.11. Vector Integer Divide Instructions -defm "" : VPatBinarySDNode_VV_VX; -defm "" : VPatBinarySDNode_VV_VX; +defm "" : VPatBinarySDNode_VV_VX; +defm "" : VPatBinarySDNode_VV_VX; defm "" : VPatBinarySDNode_VV_VX; defm "" : VPatBinarySDNode_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll index 239151274c4e..bbfc09d1c276 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll @@ -5,7 +5,7 @@ define @vdiv_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdiv_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -70,7 +70,7 @@ define @vdiv_vv_nxv2i8( %va, %va, %vb ret %vc @@ -80,7 +80,7 @@ define @vdiv_vx_nxv2i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -111,7 +111,7 @@ define @vdiv_vv_nxv4i8( %va, %va, %vb ret %vc @@ -121,7 +121,7 @@ define @vdiv_vx_nxv4i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -152,7 +152,7 @@ define @vdiv_vv_nxv8i8( %va, %va, %vb ret %vc @@ -162,7 +162,7 @@ define @vdiv_vx_nxv8i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -193,7 +193,7 @@ define @vdiv_vv_nxv16i8( %va, %va, %vb ret %vc @@ -203,7 +203,7 @@ define @vdiv_vx_nxv16i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -234,7 +234,7 @@ define @vdiv_vv_nxv32i8( %va, %va, %vb ret %vc @@ -244,7 +244,7 @@ define @vdiv_vx_nxv32i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -275,7 +275,7 @@ define @vdiv_vv_nxv64i8( %va, %va, %vb ret %vc @@ -285,7 +285,7 @@ define @vdiv_vx_nxv64i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -316,7 +316,7 @@ define @vdiv_vv_nxv1i16( %va, %va, %vb ret %vc @@ -326,7 +326,7 @@ define @vdiv_vx_nxv1i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -357,7 +357,7 @@ define @vdiv_vv_nxv2i16( %va, %va, %vb ret %vc @@ -367,7 +367,7 @@ define @vdiv_vx_nxv2i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -398,7 +398,7 @@ define @vdiv_vv_nxv4i16( %va, %va, %vb ret %vc @@ -408,7 +408,7 @@ define @vdiv_vx_nxv4i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -439,7 +439,7 @@ define @vdiv_vv_nxv8i16( %va, %va, %vb ret %vc @@ -449,7 +449,7 @@ define @vdiv_vx_nxv8i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -480,7 +480,7 @@ define @vdiv_vv_nxv16i16( %va, %va, %vb ret %vc @@ -490,7 +490,7 @@ define @vdiv_vx_nxv16i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -521,7 +521,7 @@ define @vdiv_vv_nxv32i16( %va, %va, %vb ret %vc @@ -531,7 +531,7 @@ define @vdiv_vx_nxv32i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -562,7 +562,7 @@ define @vdiv_vv_nxv1i32( %va, %va, %vb ret %vc @@ -572,7 +572,7 @@ define @vdiv_vx_nxv1i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -603,7 +603,7 @@ define @vdiv_vv_nxv2i32( %va, %va, %vb ret %vc @@ -613,7 +613,7 @@ define @vdiv_vx_nxv2i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -644,7 +644,7 @@ define @vdiv_vv_nxv4i32( %va, %va, %vb ret %vc @@ -654,7 +654,7 @@ define @vdiv_vx_nxv4i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -685,7 +685,7 @@ define @vdiv_vv_nxv8i32( %va, %va, %vb ret %vc @@ -695,7 +695,7 @@ define @vdiv_vx_nxv8i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -726,7 +726,7 @@ define @vdiv_vv_nxv16i32( %va, %va, %vb ret %vc @@ -736,7 +736,7 @@ define @vdiv_vx_nxv16i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -767,7 +767,7 @@ define @vdiv_vv_nxv1i64( %va, %va, %vb ret %vc @@ -784,7 +784,7 @@ define @vdiv_vx_nxv1i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v26, v26, a1 ; CHECK-NEXT: vsrl.vx v26, v26, a1 ; CHECK-NEXT: vor.vv v25, v26, v25 -; CHECK-NEXT: vdivu.vv v8, v8, v25 +; CHECK-NEXT: vdiv.vv v8, v8, v25 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -825,7 +825,7 @@ define @vdiv_vv_nxv2i64( %va, %va, %vb ret %vc @@ -842,7 +842,7 @@ define @vdiv_vx_nxv2i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v28, v28, a1 ; CHECK-NEXT: vsrl.vx v28, v28, a1 ; CHECK-NEXT: vor.vv v26, v28, v26 -; CHECK-NEXT: vdivu.vv v8, v8, v26 +; CHECK-NEXT: vdiv.vv v8, v8, v26 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -883,7 +883,7 @@ define @vdiv_vv_nxv4i64( %va, %va, %vb ret %vc @@ -900,7 +900,7 @@ define @vdiv_vx_nxv4i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v12, v12, a1 ; CHECK-NEXT: vsrl.vx v12, v12, a1 ; CHECK-NEXT: vor.vv v28, v12, v28 -; CHECK-NEXT: vdivu.vv v8, v8, v28 +; CHECK-NEXT: vdiv.vv v8, v8, v28 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -941,7 +941,7 @@ define @vdiv_vv_nxv8i64( %va, %va, %vb ret %vc @@ -958,7 +958,7 @@ define @vdiv_vx_nxv8i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v24, v24, a1 ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v16, v24, v16 -; CHECK-NEXT: vdivu.vv v8, v8, v16 +; CHECK-NEXT: vdiv.vv v8, v8, v16 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll index 991cccf72cdd..b8f331e78b5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll @@ -5,7 +5,7 @@ define @vdiv_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdiv_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -46,7 +46,7 @@ define @vdiv_vv_nxv2i8( %va, %va, %vb ret %vc @@ -56,7 +56,7 @@ define @vdiv_vx_nxv2i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -87,7 +87,7 @@ define @vdiv_vv_nxv4i8( %va, %va, %vb ret %vc @@ -97,7 +97,7 @@ define @vdiv_vx_nxv4i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -128,7 +128,7 @@ define @vdiv_vv_nxv8i8( %va, %va, %vb ret %vc @@ -138,7 +138,7 @@ define @vdiv_vx_nxv8i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -169,7 +169,7 @@ define @vdiv_vv_nxv16i8( %va, %va, %vb ret %vc @@ -179,7 +179,7 @@ define @vdiv_vx_nxv16i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -210,7 +210,7 @@ define @vdiv_vv_nxv32i8( %va, %va, %vb ret %vc @@ -220,7 +220,7 @@ define @vdiv_vx_nxv32i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -251,7 +251,7 @@ define @vdiv_vv_nxv64i8( %va, %va, %vb ret %vc @@ -261,7 +261,7 @@ define @vdiv_vx_nxv64i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -292,7 +292,7 @@ define @vdiv_vv_nxv1i16( %va, %va, %vb ret %vc @@ -302,7 +302,7 @@ define @vdiv_vx_nxv1i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -333,7 +333,7 @@ define @vdiv_vv_nxv2i16( %va, %va, %vb ret %vc @@ -343,7 +343,7 @@ define @vdiv_vx_nxv2i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -374,7 +374,7 @@ define @vdiv_vv_nxv4i16( %va, %va, %vb ret %vc @@ -384,7 +384,7 @@ define @vdiv_vx_nxv4i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -415,7 +415,7 @@ define @vdiv_vv_nxv8i16( %va, %va, %vb ret %vc @@ -425,7 +425,7 @@ define @vdiv_vx_nxv8i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -456,7 +456,7 @@ define @vdiv_vv_nxv16i16( %va, %va, %vb ret %vc @@ -466,7 +466,7 @@ define @vdiv_vx_nxv16i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -497,7 +497,7 @@ define @vdiv_vv_nxv32i16( %va, %va, %vb ret %vc @@ -507,7 +507,7 @@ define @vdiv_vx_nxv32i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -538,7 +538,7 @@ define @vdiv_vv_nxv1i32( %va, %va, %vb ret %vc @@ -548,7 +548,7 @@ define @vdiv_vx_nxv1i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -580,7 +580,7 @@ define @vdiv_vv_nxv2i32( %va, %va, %vb ret %vc @@ -590,7 +590,7 @@ define @vdiv_vx_nxv2i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -622,7 +622,7 @@ define @vdiv_vv_nxv4i32( %va, %va, %vb ret %vc @@ -632,7 +632,7 @@ define @vdiv_vx_nxv4i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -664,7 +664,7 @@ define @vdiv_vv_nxv8i32( %va, %va, %vb ret %vc @@ -674,7 +674,7 @@ define @vdiv_vx_nxv8i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -706,7 +706,7 @@ define @vdiv_vv_nxv16i32( %va, %va, %vb ret %vc @@ -716,7 +716,7 @@ define @vdiv_vx_nxv16i32( %va, i32 signex ; CHECK-LABEL: vdiv_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -748,7 +748,7 @@ define @vdiv_vv_nxv1i64( %va, %va, %vb ret %vc @@ -758,7 +758,7 @@ define @vdiv_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv1i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -796,7 +796,7 @@ define @vdiv_vv_nxv2i64( %va, %va, %vb ret %vc @@ -806,7 +806,7 @@ define @vdiv_vx_nxv2i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -844,7 +844,7 @@ define @vdiv_vv_nxv4i64( %va, %va, %vb ret %vc @@ -854,7 +854,7 @@ define @vdiv_vx_nxv4i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -892,7 +892,7 @@ define @vdiv_vv_nxv8i64( %va, %va, %vb ret %vc @@ -902,7 +902,7 @@ define @vdiv_vx_nxv8i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll index 27b27cd64bae..383d3f380fe8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll @@ -5,7 +5,7 @@ define @vdivu_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdivu_vx_nxv1i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -68,7 +68,7 @@ define @vdivu_vv_nxv2i8( %va, %va, %vb ret %vc @@ -78,7 +78,7 @@ define @vdivu_vx_nxv2i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -107,7 +107,7 @@ define @vdivu_vv_nxv4i8( %va, %va, %vb ret %vc @@ -117,7 +117,7 @@ define @vdivu_vx_nxv4i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -146,7 +146,7 @@ define @vdivu_vv_nxv8i8( %va, %va, %vb ret %vc @@ -156,7 +156,7 @@ define @vdivu_vx_nxv8i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -185,7 +185,7 @@ define @vdivu_vv_nxv16i8( %va, %va, %vb ret %vc @@ -195,7 +195,7 @@ define @vdivu_vx_nxv16i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -224,7 +224,7 @@ define @vdivu_vv_nxv32i8( %va, %va, %vb ret %vc @@ -234,7 +234,7 @@ define @vdivu_vx_nxv32i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -263,7 +263,7 @@ define @vdivu_vv_nxv64i8( %va, %va, %vb ret %vc @@ -273,7 +273,7 @@ define @vdivu_vx_nxv64i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -302,7 +302,7 @@ define @vdivu_vv_nxv1i16( %va, %va, %vb ret %vc @@ -312,7 +312,7 @@ define @vdivu_vx_nxv1i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -342,7 +342,7 @@ define @vdivu_vv_nxv2i16( %va, %va, %vb ret %vc @@ -352,7 +352,7 @@ define @vdivu_vx_nxv2i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -382,7 +382,7 @@ define @vdivu_vv_nxv4i16( %va, %va, %vb ret %vc @@ -392,7 +392,7 @@ define @vdivu_vx_nxv4i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -422,7 +422,7 @@ define @vdivu_vv_nxv8i16( %va, %va, %vb ret %vc @@ -432,7 +432,7 @@ define @vdivu_vx_nxv8i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -462,7 +462,7 @@ define @vdivu_vv_nxv16i16( %va, %va, %vb ret %vc @@ -472,7 +472,7 @@ define @vdivu_vx_nxv16i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -502,7 +502,7 @@ define @vdivu_vv_nxv32i16( %va, %va, %vb ret %vc @@ -512,7 +512,7 @@ define @vdivu_vx_nxv32i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -542,7 +542,7 @@ define @vdivu_vv_nxv1i32( %va, %va, %vb ret %vc @@ -552,7 +552,7 @@ define @vdivu_vx_nxv1i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -582,7 +582,7 @@ define @vdivu_vv_nxv2i32( %va, %va, %vb ret %vc @@ -592,7 +592,7 @@ define @vdivu_vx_nxv2i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -622,7 +622,7 @@ define @vdivu_vv_nxv4i32( %va, %va, %vb ret %vc @@ -632,7 +632,7 @@ define @vdivu_vx_nxv4i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -662,7 +662,7 @@ define @vdivu_vv_nxv8i32( %va, %va, %vb ret %vc @@ -672,7 +672,7 @@ define @vdivu_vx_nxv8i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -702,7 +702,7 @@ define @vdivu_vv_nxv16i32( %va, %va, %vb ret %vc @@ -712,7 +712,7 @@ define @vdivu_vx_nxv16i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -742,7 +742,7 @@ define @vdivu_vv_nxv1i64( %va, %va, %vb ret %vc @@ -759,7 +759,7 @@ define @vdivu_vx_nxv1i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v26, v26, a1 ; CHECK-NEXT: vsrl.vx v26, v26, a1 ; CHECK-NEXT: vor.vv v25, v26, v25 -; CHECK-NEXT: vdiv.vv v8, v8, v25 +; CHECK-NEXT: vdivu.vv v8, v8, v25 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -796,7 +796,7 @@ define @vdivu_vv_nxv2i64( %va, %va, %vb ret %vc @@ -813,7 +813,7 @@ define @vdivu_vx_nxv2i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v28, v28, a1 ; CHECK-NEXT: vsrl.vx v28, v28, a1 ; CHECK-NEXT: vor.vv v26, v28, v26 -; CHECK-NEXT: vdiv.vv v8, v8, v26 +; CHECK-NEXT: vdivu.vv v8, v8, v26 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -850,7 +850,7 @@ define @vdivu_vv_nxv4i64( %va, %va, %vb ret %vc @@ -867,7 +867,7 @@ define @vdivu_vx_nxv4i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v12, v12, a1 ; CHECK-NEXT: vsrl.vx v12, v12, a1 ; CHECK-NEXT: vor.vv v28, v12, v28 -; CHECK-NEXT: vdiv.vv v8, v8, v28 +; CHECK-NEXT: vdivu.vv v8, v8, v28 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -904,7 +904,7 @@ define @vdivu_vv_nxv8i64( %va, %va, %vb ret %vc @@ -921,7 +921,7 @@ define @vdivu_vx_nxv8i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v24, v24, a1 ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v16, v24, v16 -; CHECK-NEXT: vdiv.vv v8, v8, v16 +; CHECK-NEXT: vdivu.vv v8, v8, v16 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll index 70cd4fba1eb7..bc72099d75eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll @@ -5,7 +5,7 @@ define @vdivu_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdivu_vx_nxv1i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -44,7 +44,7 @@ define @vdivu_vv_nxv2i8( %va, %va, %vb ret %vc @@ -54,7 +54,7 @@ define @vdivu_vx_nxv2i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -83,7 +83,7 @@ define @vdivu_vv_nxv4i8( %va, %va, %vb ret %vc @@ -93,7 +93,7 @@ define @vdivu_vx_nxv4i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -122,7 +122,7 @@ define @vdivu_vv_nxv8i8( %va, %va, %vb ret %vc @@ -132,7 +132,7 @@ define @vdivu_vx_nxv8i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -161,7 +161,7 @@ define @vdivu_vv_nxv16i8( %va, %va, %vb ret %vc @@ -171,7 +171,7 @@ define @vdivu_vx_nxv16i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -200,7 +200,7 @@ define @vdivu_vv_nxv32i8( %va, %va, %vb ret %vc @@ -210,7 +210,7 @@ define @vdivu_vx_nxv32i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -239,7 +239,7 @@ define @vdivu_vv_nxv64i8( %va, %va, %vb ret %vc @@ -249,7 +249,7 @@ define @vdivu_vx_nxv64i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -278,7 +278,7 @@ define @vdivu_vv_nxv1i16( %va, %va, %vb ret %vc @@ -288,7 +288,7 @@ define @vdivu_vx_nxv1i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -318,7 +318,7 @@ define @vdivu_vv_nxv2i16( %va, %va, %vb ret %vc @@ -328,7 +328,7 @@ define @vdivu_vx_nxv2i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -358,7 +358,7 @@ define @vdivu_vv_nxv4i16( %va, %va, %vb ret %vc @@ -368,7 +368,7 @@ define @vdivu_vx_nxv4i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -398,7 +398,7 @@ define @vdivu_vv_nxv8i16( %va, %va, %vb ret %vc @@ -408,7 +408,7 @@ define @vdivu_vx_nxv8i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -438,7 +438,7 @@ define @vdivu_vv_nxv16i16( %va, %va, %vb ret %vc @@ -448,7 +448,7 @@ define @vdivu_vx_nxv16i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -478,7 +478,7 @@ define @vdivu_vv_nxv32i16( %va, %va, %vb ret %vc @@ -488,7 +488,7 @@ define @vdivu_vx_nxv32i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -518,7 +518,7 @@ define @vdivu_vv_nxv1i32( %va, %va, %vb ret %vc @@ -528,7 +528,7 @@ define @vdivu_vx_nxv1i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -558,7 +558,7 @@ define @vdivu_vv_nxv2i32( %va, %va, %vb ret %vc @@ -568,7 +568,7 @@ define @vdivu_vx_nxv2i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -598,7 +598,7 @@ define @vdivu_vv_nxv4i32( %va, %va, %vb ret %vc @@ -608,7 +608,7 @@ define @vdivu_vx_nxv4i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -638,7 +638,7 @@ define @vdivu_vv_nxv8i32( %va, %va, %vb ret %vc @@ -648,7 +648,7 @@ define @vdivu_vx_nxv8i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -678,7 +678,7 @@ define @vdivu_vv_nxv16i32( %va, %va, %vb ret %vc @@ -688,7 +688,7 @@ define @vdivu_vx_nxv16i32( %va, i32 signe ; CHECK-LABEL: vdivu_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -718,7 +718,7 @@ define @vdivu_vv_nxv1i64( %va, %va, %vb ret %vc @@ -728,7 +728,7 @@ define @vdivu_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv1i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -760,7 +760,7 @@ define @vdivu_vv_nxv2i64( %va, %va, %vb ret %vc @@ -770,7 +770,7 @@ define @vdivu_vx_nxv2i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -802,7 +802,7 @@ define @vdivu_vv_nxv4i64( %va, %va, %vb ret %vc @@ -812,7 +812,7 @@ define @vdivu_vx_nxv4i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -844,7 +844,7 @@ define @vdivu_vv_nxv8i64( %va, %va, %vb ret %vc @@ -854,7 +854,7 @@ define @vdivu_vx_nxv8i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer From 2cf21fd6a5b4a6f0f0da55717a787fc38202cca8 Mon Sep 17 00:00:00 2001 From: Joachim Meyer Date: Thu, 17 Dec 2020 23:58:13 +0100 Subject: [PATCH 083/318] [Support] Indent multi-line descr of enum cli options. As noted in https://reviews.llvm.org/D93459, the formatting of multi-line descriptions of clEnumValN and the likes is unfavorable. Thus this patch adds support for correctly indenting these. Reviewed By: serge-sans-paille Differential Revision: https://reviews.llvm.org/D93494 (cherry picked from commit e3f02302e318837d2421c6425450f04ae0a82b90) --- llvm/include/llvm/Support/CommandLine.h | 13 +++++++++++ llvm/lib/Support/CommandLine.cpp | 25 ++++++++++++++++------ llvm/unittests/Support/CommandLineTest.cpp | 22 +++++++++++++++++++ 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 38f3e188be55..0706aa226c0e 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -369,9 +369,22 @@ class Option { virtual void setDefault() = 0; + // Prints the help string for an option. + // + // This maintains the Indent for multi-line descriptions. + // FirstLineIndentedBy is the count of chars of the first line + // i.e. the one containing the --
: # SOURCE-NEXT: ; int main() { # CHECK-NEXT: 10: 55 pushq %rbp diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test index b384c49b350e..23ce55a329ac 100644 --- a/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test +++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test @@ -24,15 +24,6 @@ ; RUN: llvm-objdump --prefix myprefix --source %t-correct-prefix.o 2>&1 | \ ; RUN: FileCheck %s --check-prefix=CHECK-BROKEN-PREFIX -DFILE=%t-correct-prefix.o -DPREFIX=myprefix%/p -;; Test malformed input. - -; RUN: sed -e "s,SRC_COMPDIR,,g" -e "s,filename: \"source-interleave-x86_64.c\",filename: \"\",g" \ -; RUN: %p/Inputs/source-interleave.ll > %t-malformed.ll -; RUN: llc -o %t-malformed.o -filetype=obj -mtriple=x86_64-pc-linux %t-malformed.ll -; RUN: llvm-objdump --prefix myprefix --source %t-malformed.o 2>&1 | \ -; RUN: FileCheck %s --check-prefix=CHECK-MALFORMED -DFILE=%t-malformed.o -; CHECK-MALFORMED: warning: '[[FILE]]': failed to parse debug information for [[FILE]] - ;; Using only a prefix separator is the same as not using the `--prefix` option. ; RUN: llvm-objdump --prefix / --source %t-missing-prefix.o 2>&1 | \ diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 3134f989603a..17128e95727f 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -947,8 +947,8 @@ class SourcePrinter { std::unordered_map> LineCache; // Keep track of missing sources. StringSet<> MissingSources; - // Only emit 'no debug info' warning once. - bool WarnedNoDebugInfo; + // Only emit 'invalid debug info' warning once. + bool WarnedInvalidDebugInfo = false; private: bool cacheSource(const DILineInfo& LineInfoFile); @@ -962,8 +962,7 @@ class SourcePrinter { public: SourcePrinter() = default; - SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) - : Obj(Obj), WarnedNoDebugInfo(false) { + SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::LinkageName; @@ -1018,22 +1017,17 @@ void SourcePrinter::printSourceLine(formatted_raw_ostream &OS, return; DILineInfo LineInfo = DILineInfo(); - auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address); + Expected ExpectedLineInfo = + Symbolizer->symbolizeCode(*Obj, Address); std::string ErrorMessage; - if (!ExpectedLineInfo) - ErrorMessage = toString(ExpectedLineInfo.takeError()); - else + if (ExpectedLineInfo) { LineInfo = *ExpectedLineInfo; - - if (LineInfo.FileName == DILineInfo::BadString) { - if (!WarnedNoDebugInfo) { - std::string Warning = - "failed to parse debug information for " + ObjectFilename.str(); - if (!ErrorMessage.empty()) - Warning += ": " + ErrorMessage; - reportWarning(Warning, ObjectFilename); - WarnedNoDebugInfo = true; - } + } else if (!WarnedInvalidDebugInfo) { + WarnedInvalidDebugInfo = true; + // TODO Untested. + reportWarning("failed to parse debug information: " + + toString(ExpectedLineInfo.takeError()), + ObjectFilename); } if (!Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) { From 76d5d54f62599d249e0bf2d1b0998451a584c3f3 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 14 Feb 2021 12:25:56 -0600 Subject: [PATCH 153/318] Avoid use of stack allocations in asynchronous calls NOTE: This is an adaption of the original patch to be applicable to the LLVM 12 release branch. Logic is the same though. As reported by Guilherme Valarini [0], we used to pass stack allocations to calls that can nowadays be asynchronous. This is arguably a problem and it will inevitably result in UB. To remedy the situation we allocate the locations as part of the AsyncInfoTy object. The lifetime of that object matches what we need for now. If the synchronization is not tied to the AsyncInfoTy object anymore we might need to have a different buffer construct in global space. This should be back-ported to LLVM 12 but needs slight modifications as it is based on refactoring patches we do not need to backport. [0] https://lists.llvm.org/pipermail/openmp-dev/2021-February/003867.html Differential Revision: https://reviews.llvm.org/D96667 --- openmp/libomptarget/include/omptarget.h | 10 ++++++++++ openmp/libomptarget/src/omptarget.cpp | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h index 9c533944d135..46bb8206efa1 100644 --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -14,6 +14,8 @@ #ifndef _OMPTARGET_H_ #define _OMPTARGET_H_ +#include +#include #include #include @@ -119,10 +121,18 @@ struct __tgt_target_table { /// This struct contains information exchanged between different asynchronous /// operations for device-dependent optimization and potential synchronization struct __tgt_async_info { + /// Locations we used in (potentially) asynchronous calls which should live + /// as long as this AsyncInfoTy object. + std::deque BufferLocations; + // A pointer to a queue-like structure where offloading operations are issued. // We assume to use this structure to do synchronization. In CUDA backend, it // is CUstream. void *Queue = nullptr; + + /// Return a void* reference with a lifetime that is at least as long as this + /// AsyncInfoTy object. The location can be used as intermediate buffer. + void *&getVoidPtrLocation(); }; /// This struct is a record of non-contiguous information diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index e4b7b18bc70b..37150aae2fe6 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -18,6 +18,13 @@ #include #include +/// Return a void* reference with a lifetime that is at least as long as this +/// AsyncInfoTy object. The location can be used as intermediate buffer. +void *&__tgt_async_info::getVoidPtrLocation() { + BufferLocations.push_back(nullptr); + return BufferLocations.back(); +} + /* All begin addresses for partially mapped structs must be 8-aligned in order * to ensure proper alignment of members. E.g. * @@ -415,7 +422,8 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; - void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); + void *&TgtPtrBase = async_info_ptr->getVoidPtrLocation(); + TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, sizeof(void *), async_info_ptr); if (rt != OFFLOAD_SUCCESS) { @@ -1122,8 +1130,9 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr, DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); - void *PointerTgtPtrBegin = Device.getTgtPtrBegin( - HstPtrVal, ArgSizes[I], IsLast, false, IsHostPtr); + void *&PointerTgtPtrBegin = AsyncInfo->getVoidPtrLocation(); + PointerTgtPtrBegin = Device.getTgtPtrBegin(HstPtrVal, ArgSizes[I], + IsLast, false, IsHostPtr); if (!PointerTgtPtrBegin) { DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", DPxPTR(HstPtrVal)); From a3545a0b0777da773c5e2370622579c44a8f0f63 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 19 Feb 2021 09:06:05 -0500 Subject: [PATCH 154/318] [Analysis][LoopVectorize] do not form reductions of pointers This is a fix for https://llvm.org/PR49215 either before/after we make a verifier enhancement for vector reductions with D96904. I'm not sure what the current thinking is for pointer math/logic in IR. We allow icmp on pointer values. Therefore, we match min/max patterns, so without this patch, the vectorizer could form a vector reduction from that sequence. But the LangRef definitions for min/max and vector reduction intrinsics do not allow pointer types: https://llvm.org/docs/LangRef.html#llvm-smax-intrinsic https://llvm.org/docs/LangRef.html#llvm-vector-reduce-umax-intrinsic So we would crash/assert at some point - either in IR verification, in the cost model, or in codegen. If we do want to allow this kind of transform, we will need to update the LangRef and all of those parts of the compiler. Differential Revision: https://reviews.llvm.org/D97047 (cherry picked from commit 5b250a27ec7822aa0a32abb696cb16c2cc60149c) --- llvm/lib/Analysis/IVDescriptors.cpp | 5 ++- .../Transforms/LoopVectorize/reduction-ptr.ll | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-ptr.ll diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 7f311d8f9a2b..94a24ccf2155 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -243,11 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, if (RecurrenceType->isFloatingPointTy()) { if (!isFloatingPointRecurrenceKind(Kind)) return false; - } else { + } else if (RecurrenceType->isIntegerTy()) { if (!isIntegerRecurrenceKind(Kind)) return false; if (isArithmeticRecurrenceKind(Kind)) Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); + } else { + // Pointer min/max may exist, but it is not supported as a reduction op. + return false; } Worklist.push_back(Start); diff --git a/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll new file mode 100644 index 000000000000..5cae61638f31 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Reductions of pointer types are not supported. + +define void @PR49215(i32* %p, i32* %q) { +; CHECK-LABEL: @PR49215( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[G:%.*]] = phi i32* [ [[P:%.*]], [[ENTRY]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32* [[Q:%.*]], [[G]] +; CHECK-NEXT: [[UMIN]] = select i1 [[CMP2]], i32* [[Q]], i32* [[G]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], undef +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK: loopexit: +; CHECK-NEXT: [[UMIN_LCSSA:%.*]] = phi i32* [ [[UMIN]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PHI_CAST:%.*]] = ptrtoint i32* [[UMIN_LCSSA]] to i64 +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %g = phi i32* [ %p, %entry ], [ %umin, %for.body ] + %cmp2 = icmp ult i32* %q, %g + %umin = select i1 %cmp2, i32* %q, i32* %g + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, undef + br i1 %exitcond, label %loopexit, label %for.body + +loopexit: + %phi.cast = ptrtoint i32* %umin to i64 + ret void +} From 3444f052006ca2b19052a4599dd9001b01088c25 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Sat, 20 Feb 2021 20:43:16 -0500 Subject: [PATCH 155/318] [clang][Driver][OpenBSD] libcxx also requires pthread (cherry picked from commit b42d57a100c5df6ace68f686f5adaabeafe8a0f6) --- clang/lib/Driver/ToolChains/OpenBSD.cpp | 1 + clang/test/Driver/openbsd.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index f155d74632f9..e162165b2561 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -296,6 +296,7 @@ void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args, CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++"); CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi"); + CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread"); } std::string OpenBSD::getCompilerRT(const ArgList &Args, diff --git a/clang/test/Driver/openbsd.cpp b/clang/test/Driver/openbsd.cpp index 9293148680c8..23c365d28e7e 100644 --- a/clang/test/Driver/openbsd.cpp +++ b/clang/test/Driver/openbsd.cpp @@ -6,7 +6,7 @@ // RUN: | FileCheck --check-prefix=CHECK-CXX %s // RUN: %clangxx %s -### -o %t.o -target arm-unknown-openbsd 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CXX %s -// CHECK-CXX: "-lc++" "-lc++abi" "-lm" +// CHECK-CXX: "-lc++" "-lc++abi" "-lpthread" "-lm" // RUN: %clangxx %s -### -pg -o %t.o -target amd64-pc-openbsd 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PG-CXX %s @@ -16,4 +16,4 @@ // RUN: | FileCheck --check-prefix=CHECK-PG-CXX %s // RUN: %clangxx %s -### -pg -o %t.o -target arm-unknown-openbsd 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PG-CXX %s -// CHECK-PG-CXX: "-lc++_p" "-lc++abi_p" "-lm_p" +// CHECK-PG-CXX: "-lc++_p" "-lc++abi_p" "-lpthread_p" "-lm_p" From 76e4c93ea42b3d23907611d14e347bfeae8d4b0a Mon Sep 17 00:00:00 2001 From: Conrad Poelman Date: Tue, 2 Feb 2021 05:59:38 +0100 Subject: [PATCH 156/318] clang-extra: fix incorrect use of std::lock_guard by adding variable name (identified by MSVC [[nodiscard]] error) `std::lock_guard` is an RAII class that needs a variable name whose scope determines the guard's lifetime. This particular usage lacked a variable name, meaning the guard could be destroyed before the line that it was indented to protect. This line was identified by building clang with the latest MSVC preview release, which declares the std::lock_guard constructor to be `[[nodiscard]]` to draw attention to such issues. Reviewed By: kadircet Differential Revision: https://reviews.llvm.org/D95725 (cherry picked from commit 0b70c86e2007d3f32968f0a7d9efe8eab3bf0f0a) --- clang-tools-extra/clangd/support/Function.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/support/Function.h b/clang-tools-extra/clangd/support/Function.h index 2cac1b1e7f67..936800d56985 100644 --- a/clang-tools-extra/clangd/support/Function.h +++ b/clang-tools-extra/clangd/support/Function.h @@ -51,7 +51,7 @@ template class Event { Subscription &operator=(Subscription &&Other) { // If *this is active, unsubscribe. if (Parent) { - std::lock_guard(Parent->ListenersMu); + std::lock_guard Lock(Parent->ListenersMu); llvm::erase_if(Parent->Listeners, [&](const std::pair &P) { return P.second == ListenerID; From 8eeb3d99933a3246f2d850b807cf54f11a3a8dce Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Sun, 31 Jan 2021 13:53:22 +0100 Subject: [PATCH 157/318] [clangd] Rename: merge index/AST refs path-insensitively where needed If you have c:\foo open, and C:\foo indexed (case difference) then these need to be considered the same file. Otherwise we emit edits to both, and editors do... something that isn't pretty. Maybe more centralized normalization is called for, but it's not trivial to do this while also being case-preserving. see https://github.com/clangd/clangd/issues/108 Fixes https://github.com/clangd/clangd/issues/665 Differential Revision: https://reviews.llvm.org/D95759 (cherry picked from commit b63cd4db915c08e0cb4cf668a18de24b67f2c44c) --- .../clangd/GlobalCompilationDatabase.cpp | 14 ------ clang-tools-extra/clangd/refactor/Rename.cpp | 4 +- .../clangd/support/CMakeLists.txt | 1 + clang-tools-extra/clangd/support/Path.cpp | 30 ++++++++++++ clang-tools-extra/clangd/support/Path.h | 6 +++ .../clangd/unittests/RenameTests.cpp | 46 +++++++++++++++++++ 6 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 clang-tools-extra/clangd/support/Path.cpp diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index 542d0c3e4dbc..a38c8a57d161 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -396,20 +396,6 @@ DirectoryBasedGlobalCompilationDatabase::getCompileCommand(PathRef File) const { return None; } -// For platforms where paths are case-insensitive (but case-preserving), -// we need to do case-insensitive comparisons and use lowercase keys. -// FIXME: Make Path a real class with desired semantics instead. -// This class is not the only place this problem exists. -// FIXME: Mac filesystems default to case-insensitive, but may be sensitive. - -static std::string maybeCaseFoldPath(PathRef Path) { -#if defined(_WIN32) || defined(__APPLE__) - return Path.lower(); -#else - return std::string(Path); -#endif -} - std::vector DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches( llvm::ArrayRef Dirs) const { diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index d3c7da96a441..a857b3479871 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -68,7 +68,7 @@ llvm::Optional getOtherRefFile(const Decl &D, StringRef MainFile, if (OtherFile) return; if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) { - if (*RefFilePath != MainFile) + if (!pathEqual(*RefFilePath, MainFile)) OtherFile = *RefFilePath; } }); @@ -474,7 +474,7 @@ findOccurrencesOutsideFile(const NamedDecl &RenameDecl, if ((R.Kind & RefKind::Spelled) == RefKind::Unknown) return; if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) { - if (*RefFilePath != MainFile) + if (!pathEqual(*RefFilePath, MainFile)) AffectedFiles[*RefFilePath].push_back(toRange(R.Location)); } }); diff --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt index f0fe073eb136..fc7d7a28117b 100644 --- a/clang-tools-extra/clangd/support/CMakeLists.txt +++ b/clang-tools-extra/clangd/support/CMakeLists.txt @@ -23,6 +23,7 @@ add_clang_library(clangdSupport Logger.cpp Markup.cpp MemoryTree.cpp + Path.cpp Shutdown.cpp Threading.cpp ThreadsafeFS.cpp diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp new file mode 100644 index 000000000000..f72d00070f34 --- /dev/null +++ b/clang-tools-extra/clangd/support/Path.cpp @@ -0,0 +1,30 @@ +//===--- Path.cpp -------------------------------------------*- C++-*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "support/Path.h" +namespace clang { +namespace clangd { + +std::string maybeCaseFoldPath(PathRef Path) { +#if defined(_WIN32) || defined(__APPLE__) + return Path.lower(); +#else + return std::string(Path); +#endif +} + +bool pathEqual(PathRef A, PathRef B) { +#if defined(_WIN32) || defined(__APPLE__) + return A.equals_lower(B); +#else + return A == B; +#endif +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/support/Path.h b/clang-tools-extra/clangd/support/Path.h index 4d4ad7f49047..402903130f01 100644 --- a/clang-tools-extra/clangd/support/Path.h +++ b/clang-tools-extra/clangd/support/Path.h @@ -22,6 +22,12 @@ using Path = std::string; /// signatures. using PathRef = llvm::StringRef; +// For platforms where paths are case-insensitive (but case-preserving), +// we need to do case-insensitive comparisons and use lowercase keys. +// FIXME: Make Path a real class with desired semantics instead. +std::string maybeCaseFoldPath(PathRef Path); +bool pathEqual(PathRef, PathRef); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 4bc03796bb2b..e25850a68fe9 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -1067,6 +1067,52 @@ TEST(RenameTest, Renameable) { } } +MATCHER_P(newText, T, "") { return arg.newText == T; } + +TEST(RenameTest, IndexMergeMainFile) { + Annotations Code("int ^x();"); + TestTU TU = TestTU::withCode(Code.code()); + TU.Filename = "main.cc"; + auto AST = TU.build(); + + auto Main = testPath("main.cc"); + + auto Rename = [&](const SymbolIndex *Idx) { + auto GetDirtyBuffer = [&](PathRef Path) -> llvm::Optional { + return Code.code().str(); // Every file has the same content. + }; + RenameOptions Opts; + Opts.AllowCrossFile = true; + RenameInputs Inputs{Code.point(), "xPrime", AST, Main, + Idx, Opts, GetDirtyBuffer}; + auto Results = rename(Inputs); + EXPECT_TRUE(bool(Results)) << llvm::toString(Results.takeError()); + return std::move(*Results); + }; + + // We do not expect to see duplicated edits from AST vs index. + auto Results = Rename(TU.index().get()); + EXPECT_THAT(Results.GlobalChanges.keys(), ElementsAre(Main)); + EXPECT_THAT(Results.GlobalChanges[Main].asTextEdits(), + ElementsAre(newText("xPrime"))); + + // Sanity check: we do expect to see index results! + TU.Filename = "other.cc"; + Results = Rename(TU.index().get()); + EXPECT_THAT(Results.GlobalChanges.keys(), + UnorderedElementsAre(Main, testPath("other.cc"))); + +#if defined(_WIN32) || defined(__APPLE__) + // On case-insensitive systems, no duplicates if AST vs index case differs. + // https://github.com/clangd/clangd/issues/665 + TU.Filename = "MAIN.CC"; + Results = Rename(TU.index().get()); + EXPECT_THAT(Results.GlobalChanges.keys(), ElementsAre(Main)); + EXPECT_THAT(Results.GlobalChanges[Main].asTextEdits(), + ElementsAre(newText("xPrime"))); +#endif +} + TEST(RenameTest, MainFileReferencesOnly) { // filter out references not from main file. llvm::StringRef Test = From d8404633401509936600b60274b72fc03f11f040 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 15 Feb 2021 09:00:49 +0100 Subject: [PATCH 158/318] [clangd] Treat paths case-insensitively depending on the platform Path{Match,Exclude} and MountPoint were checking paths case-sensitively on all platforms, as with other features, this was causing problems on windows. Since users can have capital drive letters on config files, but editors might lower-case them. This patch addresses that issue by: - Creating regexes with case-insensitive matching on those platforms. - Introducing a new pathIsAncestor helper, which performs checks in a case-correct manner where needed. Differential Revision: https://reviews.llvm.org/D96690 (cherry picked from commit ecea7218fb9b994b26471e9877851cdb51a5f1d4) --- clang-tools-extra/clangd/ConfigCompile.cpp | 20 +++++++--- clang-tools-extra/clangd/support/Path.cpp | 37 ++++++++++++------- clang-tools-extra/clangd/support/Path.h | 12 ++++++ .../clangd/unittests/CMakeLists.txt | 1 + .../clangd/unittests/ConfigCompileTests.cpp | 36 ++++++++++++++++++ .../clangd/unittests/RenameTests.cpp | 2 +- .../clangd/unittests/support/PathTests.cpp | 36 ++++++++++++++++++ 7 files changed, 124 insertions(+), 20 deletions(-) create mode 100644 clang-tools-extra/clangd/unittests/support/PathTests.cpp diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index 8682cae36f26..dadc578c3a81 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -31,6 +31,7 @@ #include "Features.inc" #include "TidyProvider.h" #include "support/Logger.h" +#include "support/Path.h" #include "support/Trace.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -101,9 +102,11 @@ struct FragmentCompiler { // Normalized Fragment::SourceInfo::Directory. std::string FragmentDirectory; - llvm::Optional compileRegex(const Located &Text) { + llvm::Optional + compileRegex(const Located &Text, + llvm::Regex::RegexFlags Flags = llvm::Regex::NoFlags) { std::string Anchored = "^(" + *Text + ")$"; - llvm::Regex Result(Anchored); + llvm::Regex Result(Anchored, Flags); std::string RegexError; if (!Result.isValid(RegexError)) { diag(Error, "Invalid regex " + Anchored + ": " + RegexError, Text.Range); @@ -195,9 +198,15 @@ struct FragmentCompiler { if (F.HasUnrecognizedCondition) Out.Conditions.push_back([&](const Params &) { return false; }); +#ifdef CLANGD_PATH_CASE_INSENSITIVE + llvm::Regex::RegexFlags Flags = llvm::Regex::IgnoreCase; +#else + llvm::Regex::RegexFlags Flags = llvm::Regex::NoFlags; +#endif + auto PathMatch = std::make_unique>(); for (auto &Entry : F.PathMatch) { - if (auto RE = compileRegex(Entry)) + if (auto RE = compileRegex(Entry, Flags)) PathMatch->push_back(std::move(*RE)); } if (!PathMatch->empty()) { @@ -218,7 +227,7 @@ struct FragmentCompiler { auto PathExclude = std::make_unique>(); for (auto &Entry : F.PathExclude) { - if (auto RE = compileRegex(Entry)) + if (auto RE = compileRegex(Entry, Flags)) PathExclude->push_back(std::move(*RE)); } if (!PathExclude->empty()) { @@ -349,7 +358,8 @@ struct FragmentCompiler { return; Spec.MountPoint = std::move(*AbsPath); Out.Apply.push_back([Spec(std::move(Spec))](const Params &P, Config &C) { - if (!P.Path.startswith(Spec.MountPoint)) + if (P.Path.empty() || !pathStartsWith(Spec.MountPoint, P.Path, + llvm::sys::path::Style::posix)) return; C.Index.External = Spec; // Disable background indexing for the files under the mountpoint. diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp index f72d00070f34..6fc74b92fc7a 100644 --- a/clang-tools-extra/clangd/support/Path.cpp +++ b/clang-tools-extra/clangd/support/Path.cpp @@ -7,24 +7,33 @@ //===----------------------------------------------------------------------===// #include "support/Path.h" +#include "llvm/Support/Path.h" namespace clang { namespace clangd { -std::string maybeCaseFoldPath(PathRef Path) { -#if defined(_WIN32) || defined(__APPLE__) - return Path.lower(); -#else - return std::string(Path); -#endif -} +#ifdef CLANGD_PATH_CASE_INSENSITIVE +std::string maybeCaseFoldPath(PathRef Path) { return Path.lower(); } +bool pathEqual(PathRef A, PathRef B) { return A.equals_lower(B); } +#else // NOT CLANGD_PATH_CASE_INSENSITIVE +std::string maybeCaseFoldPath(PathRef Path) { return Path.str(); } +bool pathEqual(PathRef A, PathRef B) { return A == B; } +#endif // CLANGD_PATH_CASE_INSENSITIVE -bool pathEqual(PathRef A, PathRef B) { -#if defined(_WIN32) || defined(__APPLE__) - return A.equals_lower(B); -#else - return A == B; -#endif +bool pathStartsWith(PathRef Ancestor, PathRef Path, + llvm::sys::path::Style Style) { + assert(llvm::sys::path::is_absolute(Ancestor, Style) && + llvm::sys::path::is_absolute(Path, Style)); + // If ancestor ends with a separator drop that, so that we can match /foo/ as + // a parent of /foo. + if (llvm::sys::path::is_separator(Ancestor.back(), Style)) + Ancestor = Ancestor.drop_back(); + // Ensure Path starts with Ancestor. + if (!pathEqual(Ancestor, Path.take_front(Ancestor.size()))) + return false; + Path = Path.drop_front(Ancestor.size()); + // Then make sure either two paths are equal or Path has a separator + // afterwards. + return Path.empty() || llvm::sys::path::is_separator(Path.front(), Style); } - } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/support/Path.h b/clang-tools-extra/clangd/support/Path.h index 402903130f01..938d7d7e99c9 100644 --- a/clang-tools-extra/clangd/support/Path.h +++ b/clang-tools-extra/clangd/support/Path.h @@ -10,8 +10,14 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Path.h" #include +/// Whether current platform treats paths case insensitively. +#if defined(_WIN32) || defined(__APPLE__) +#define CLANGD_PATH_CASE_INSENSITIVE +#endif + namespace clang { namespace clangd { @@ -28,6 +34,12 @@ using PathRef = llvm::StringRef; std::string maybeCaseFoldPath(PathRef Path); bool pathEqual(PathRef, PathRef); +/// Checks if \p Ancestor is a proper ancestor of \p Path. This is just a +/// smarter lexical prefix match, e.g: foo/bar/baz doesn't start with foo/./bar. +/// Both \p Ancestor and \p Path must be absolute. +bool pathStartsWith( + PathRef Ancestor, PathRef Path, + llvm::sys::path::Style Style = llvm::sys::path::Style::native); } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index f4d364720eaf..c396c6f5873b 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -104,6 +104,7 @@ add_unittest(ClangdUnitTests ClangdTests support/FunctionTests.cpp support/MarkupTests.cpp support/MemoryTreeTests.cpp + support/PathTests.cpp support/ThreadingTests.cpp support/TestTracer.cpp support/TraceTests.cpp diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index 4b1da2035727..d9aa171f3102 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -99,6 +99,25 @@ TEST_F(ConfigCompileTests, Condition) { Frag.If.PathMatch.emplace_back("ba*r"); EXPECT_FALSE(compileAndApply()); EXPECT_THAT(Diags.Diagnostics, IsEmpty()); + + // Only matches case-insensitively. + Frag = {}; + Frag.If.PathMatch.emplace_back("B.*R"); + EXPECT_THAT(Diags.Diagnostics, IsEmpty()); +#ifdef CLANGD_PATH_CASE_INSENSITIVE + EXPECT_TRUE(compileAndApply()); +#else + EXPECT_FALSE(compileAndApply()); +#endif + + Frag = {}; + Frag.If.PathExclude.emplace_back("B.*R"); + EXPECT_THAT(Diags.Diagnostics, IsEmpty()); +#ifdef CLANGD_PATH_CASE_INSENSITIVE + EXPECT_FALSE(compileAndApply()); +#else + EXPECT_TRUE(compileAndApply()); +#endif } TEST_F(ConfigCompileTests, CompileCommands) { @@ -406,6 +425,23 @@ TEST_F(ConfigCompileTests, ExternalBlockMountPoint) { ASSERT_THAT(Diags.Diagnostics, IsEmpty()); ASSERT_TRUE(Conf.Index.External); EXPECT_THAT(Conf.Index.External->MountPoint, FooPath); + + // Only matches case-insensitively. + BazPath = testPath("fOo/baz.h", llvm::sys::path::Style::posix); + BazPath = llvm::sys::path::convert_to_slash(BazPath); + Parm.Path = BazPath; + + FooPath = testPath("FOO/", llvm::sys::path::Style::posix); + FooPath = llvm::sys::path::convert_to_slash(FooPath); + Frag = GetFrag("", FooPath.c_str()); + compileAndApply(); + ASSERT_THAT(Diags.Diagnostics, IsEmpty()); +#ifdef CLANGD_PATH_CASE_INSENSITIVE + ASSERT_TRUE(Conf.Index.External); + EXPECT_THAT(Conf.Index.External->MountPoint, FooPath); +#else + ASSERT_FALSE(Conf.Index.External); +#endif } } // namespace } // namespace config diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index e25850a68fe9..b2c83a1a4303 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -1102,7 +1102,7 @@ TEST(RenameTest, IndexMergeMainFile) { EXPECT_THAT(Results.GlobalChanges.keys(), UnorderedElementsAre(Main, testPath("other.cc"))); -#if defined(_WIN32) || defined(__APPLE__) +#ifdef CLANGD_PATH_CASE_INSENSITIVE // On case-insensitive systems, no duplicates if AST vs index case differs. // https://github.com/clangd/clangd/issues/665 TU.Filename = "MAIN.CC"; diff --git a/clang-tools-extra/clangd/unittests/support/PathTests.cpp b/clang-tools-extra/clangd/unittests/support/PathTests.cpp new file mode 100644 index 000000000000..26b999d103a0 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/support/PathTests.cpp @@ -0,0 +1,36 @@ +//===-- PathTests.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "support/Path.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { +TEST(PathTests, IsAncestor) { + EXPECT_TRUE(pathStartsWith("/foo", "/foo")); + EXPECT_TRUE(pathStartsWith("/foo/", "/foo")); + + EXPECT_FALSE(pathStartsWith("/foo", "/fooz")); + EXPECT_FALSE(pathStartsWith("/foo/", "/fooz")); + + EXPECT_TRUE(pathStartsWith("/foo", "/foo/bar")); + EXPECT_TRUE(pathStartsWith("/foo/", "/foo/bar")); + +#ifdef CLANGD_PATH_CASE_INSENSITIVE + EXPECT_TRUE(pathStartsWith("/fOo", "/foo/bar")); + EXPECT_TRUE(pathStartsWith("/foo", "/fOo/bar")); +#else + EXPECT_FALSE(pathStartsWith("/fOo", "/foo/bar")); + EXPECT_FALSE(pathStartsWith("/foo", "/fOo/bar")); +#endif +} +} // namespace +} // namespace clangd +} // namespace clang From b60110090a942078bbacf71db166c2353c340413 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 16 Feb 2021 20:57:00 +0100 Subject: [PATCH 159/318] [clangd] Fix windows buildbots after ecea7218fb9b994b26471e9877851cdb51a5f1d4 (cherry picked from commit cdef5a7161767c2c4b3b7cb2542cf1d29b6d4a09) --- clang-tools-extra/clangd/support/Path.cpp | 4 ++-- .../clangd/unittests/support/PathTests.cpp | 21 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp index 6fc74b92fc7a..a7907cffe60c 100644 --- a/clang-tools-extra/clangd/support/Path.cpp +++ b/clang-tools-extra/clangd/support/Path.cpp @@ -21,8 +21,8 @@ bool pathEqual(PathRef A, PathRef B) { return A == B; } bool pathStartsWith(PathRef Ancestor, PathRef Path, llvm::sys::path::Style Style) { - assert(llvm::sys::path::is_absolute(Ancestor, Style) && - llvm::sys::path::is_absolute(Path, Style)); + assert(llvm::sys::path::is_absolute(Ancestor) && + llvm::sys::path::is_absolute(Path)); // If ancestor ends with a separator drop that, so that we can match /foo/ as // a parent of /foo. if (llvm::sys::path::is_separator(Ancestor.back(), Style)) diff --git a/clang-tools-extra/clangd/unittests/support/PathTests.cpp b/clang-tools-extra/clangd/unittests/support/PathTests.cpp index 26b999d103a0..599c76926d30 100644 --- a/clang-tools-extra/clangd/unittests/support/PathTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/PathTests.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "TestFS.h" #include "support/Path.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -14,21 +15,21 @@ namespace clang { namespace clangd { namespace { TEST(PathTests, IsAncestor) { - EXPECT_TRUE(pathStartsWith("/foo", "/foo")); - EXPECT_TRUE(pathStartsWith("/foo/", "/foo")); + EXPECT_TRUE(pathStartsWith(testPath("foo"), testPath("foo"))); + EXPECT_TRUE(pathStartsWith(testPath("foo/"), testPath("foo"))); - EXPECT_FALSE(pathStartsWith("/foo", "/fooz")); - EXPECT_FALSE(pathStartsWith("/foo/", "/fooz")); + EXPECT_FALSE(pathStartsWith(testPath("foo"), testPath("fooz"))); + EXPECT_FALSE(pathStartsWith(testPath("foo/"), testPath("fooz"))); - EXPECT_TRUE(pathStartsWith("/foo", "/foo/bar")); - EXPECT_TRUE(pathStartsWith("/foo/", "/foo/bar")); + EXPECT_TRUE(pathStartsWith(testPath("foo"), testPath("foo/bar"))); + EXPECT_TRUE(pathStartsWith(testPath("foo/"), testPath("foo/bar"))); #ifdef CLANGD_PATH_CASE_INSENSITIVE - EXPECT_TRUE(pathStartsWith("/fOo", "/foo/bar")); - EXPECT_TRUE(pathStartsWith("/foo", "/fOo/bar")); + EXPECT_TRUE(pathStartsWith(testPath("fOo"), testPath("foo/bar"))); + EXPECT_TRUE(pathStartsWith(testPath("foo"), testPath("fOo/bar"))); #else - EXPECT_FALSE(pathStartsWith("/fOo", "/foo/bar")); - EXPECT_FALSE(pathStartsWith("/foo", "/fOo/bar")); + EXPECT_FALSE(pathStartsWith(testPath("fOo"), testPath("foo/bar"))); + EXPECT_FALSE(pathStartsWith(testPath("foo"), testPath("fOo/bar"))); #endif } } // namespace From 67d6fbe0f157ba78e8131964d60155dc1090f409 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 22 Feb 2021 22:05:26 +0100 Subject: [PATCH 160/318] [clangd] Release notes for 12.x --- clang-tools-extra/docs/ReleaseNotes.rst | 169 ++++++++++++++++++++++++ 1 file changed, 169 insertions(+) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 2960aad5a556..64b3d224ff6f 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -47,6 +47,9 @@ Major New Features Improvements to clangd ---------------------- +Performance +^^^^^^^^^^^ + - clangd's memory usage is significantly reduced on most Linux systems. In particular, memory usage should not increase dramatically over time. @@ -59,6 +62,172 @@ Improvements to clangd systems can disable this using ``--malloc_trim=0`` or the CMake flag ``-DCLANGD_MALLOC_TRIM=0``. +- Added the `$/memoryUsage request + `_: an LSP extension. + This provides a breakdown of the memory clangd thinks it is using (excluding + malloc overhead etc). The clangd VSCode extension supports showing the memory + usage tree. + +Parsing and selection +^^^^^^^^^^^^^^^^^^^^^ + +- Improved navigation of broken code in C using Recovery AST. (This has been + enabled for C++ since clangd 11). + +- Types are understood more often in broken code. (This is the first release + where Recovery AST preserves speculated types). + +- Heuristic resolution for dependent names in templates. + +Code completion +^^^^^^^^^^^^^^^ + +- Higher priority for symbols that were already used in this file, and symbols + from namespaces mentioned in this file. (Estimated 3% accuracy improvement) + +- Introduced a ranking algorithm trained on snippets from a large C++ codebase. + Use the flag ``--ranking-model=decision_forest`` to try this (Estimated 6% + accuracy improvement). This mode is likely to become the default in future. + + Note: this is a generic model, not specialized for your code. clangd does not + collect any data from your code to train code completion. + +- Signature help works with functions with template-dependent parameter types. + +Go to definition +^^^^^^^^^^^^^^^^ + +- Selecting an ``auto`` or ``decltype`` keyword will attempt to navigate to + a definition of the deduced type. + +- Improved handling of aliases: navigate to the underlying entity more often. + +- Better understanding of declaration vs definition for Objective-C classes and + protocols. + +- Selecting a pure-virtual method shows its overrides. + +Find references +^^^^^^^^^^^^^^^ + +- Indexes are smarter about not returning stale references when code is deleted. + +- References in implementation files are always indexed, so results should be + more complete. + +- Find-references on a virtual method shows references to overridden methods. + +New navigation features +^^^^^^^^^^^^^^^^^^^^^^^ + +- Call hierarchy (``textDocument/callHierarchy``) is supported. + Only incoming calls are available. + +- Go to implementation (``textDocument/implementation``) is supported on + abstract classes, and on virtual methods. + +- Symbol search (``workspace/symbol``) queries may be partially qualified. + That is, typing ``b::Foo`` will match the symbol ``a::b::c::Foo``. + +Refactoring +^^^^^^^^^^^ + +- New refactoring: populate ``switch`` statement with cases. + (This acts as a fix for the ``-Wswitch-enum`` warning). + +- Renaming templates is supported, and many other complex cases were fixed. + +- Attempting to rename to an invalid or conflicting name can produce an error + message rather than broken code. (Not all cases are detected!) + +- The accuracy of many code actions has been improved. + +Hover +^^^^^ + +- Hovers for ``auto`` and ``decltype`` show the type in the same style as other + hovers. ``this`` is also now supported. + +- Displayed type names are more consistent and idiomatic. + +Semantic highlighting +^^^^^^^^^^^^^^^^^^^^^ + +- Inactive preprocessor regions (``#ifdef``) are highlighted as comments. + +- clangd 12 is the last release with support for the non-standard + ``textDocument/semanticHighlights`` notification. Clients sholud migrate to + the ``textDocument/semanticTokens`` request added in LSP 3.16. + +Remote index (alpha) +^^^^^^^^^^^^^^^^^^^^ + +- clangd can now connect to a remote index server instead of building a project + index locally. This saves resources in large codebases that are slow to index. + +- The server program is ``clangd-index-server``, and it consumes index files + produced by ``clangd-indexer``. + +- This feature requires clangd to be built with the CMake flag + ``-DCLANGD_ENABLE_REMOTE=On``, which requires GRPC libraries and is not + enabled by default. Unofficial releases of the remote-index-enabled client + and server tools are at https://github.com/clangd/clangd/releases + +- Large projects can deploy a shared server, and check in a ``.clangd`` file + to enable it (in the ``Index.External`` section). We hope to provide such a + server for ``llvm-project`` itself in the near future. + +Configuration +^^^^^^^^^^^^^ + +- Static and remote indexes can be configured in the ``Index.External`` section. + Different static indexes can now be used for different files. + (Obsoletes the flag ``--index-file``). + +- Diagnostics can be filtered or suppressed in the ``Diagnostics`` section. + +- Clang-tidy checks can be enabled/disabled in the ``Diagnostics.ClangTidy`` + section. (Obsoletes the flag ``--clang-tidy-checks``). + +- The compilation database directory can be configured in the ``CompileFlags`` + section. Different compilation databases can now be specified for different + files. (Obsoletes the flag ``--compile-commands-dir``). + +- Errors in loaded configuration files are published as LSP diagnostics, and so + should be shown in your editor. + +`Full reference of configuration options `_ + +System integration +^^^^^^^^^^^^^^^^^^ + +- Changes to ``compile_commands.json`` and ``compile_flags.txt`` will take + effect the next time a file is parsed, without restarting clangd. + +- ``clangd --check=`` can be run on the command-line to simulate + opening a file without actually using an editor. This can be useful to + reproduce crashes or aother problems. + +- Various fixes to handle filenames correctly (and case-insensitively) on + windows. + +- If incoming LSP messages are malformed, the logs now contain details. + +Miscellaneous +^^^^^^^^^^^^^ + +- "Show AST" request + (`textDocument/ast `_) + added as an LSP extension. This displays a simplified view of the clang AST + for selected code. The clangd VSCode extension supports this. + +- clangd should no longer crash while loading old or corrupt index files. + +- The flags ``--index``, ``--recovery-ast`` and ``-suggest-missing-includes`` + have been retired. These features are now always enabled. + +- Too many stability and correctness fixes to mention. + Improvements to clang-doc ------------------------- From a750a2329c433e598f7fc9655d625c5ebb6bc400 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 22 Feb 2021 16:27:19 -0800 Subject: [PATCH 161/318] clang-tidy: Disable cppcoreguidlines-prefer-member-initializer check Fixes https://llvm.org/PR49318 --- .../cppcoreguidelines/CMakeLists.txt | 1 - .../CppCoreGuidelinesTidyModule.cpp | 3 - .../PreferMemberInitializerCheck.cpp | 246 --------- .../PreferMemberInitializerCheck.h | 41 -- clang-tools-extra/docs/ReleaseNotes.rst | 6 - ...reguidelines-prefer-member-initializer.rst | 103 ---- ...ize-use-default-member-init-assignment.cpp | 31 -- ...izer-modernize-use-default-member-init.cpp | 30 -- ...reguidelines-prefer-member-initializer.cpp | 490 ------------------ 9 files changed, 951 deletions(-) delete mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp delete mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h delete mode 100644 clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt index a9f5b3e0c15b..39c2c552eb73 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt @@ -13,7 +13,6 @@ add_clang_library(clangTidyCppCoreGuidelinesModule NarrowingConversionsCheck.cpp NoMallocCheck.cpp OwningMemoryCheck.cpp - PreferMemberInitializerCheck.cpp ProBoundsArrayToPointerDecayCheck.cpp ProBoundsConstantArrayIndexCheck.cpp ProBoundsPointerArithmeticCheck.cpp diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp index bf613109f0eb..4cb5022888d3 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp @@ -22,7 +22,6 @@ #include "NarrowingConversionsCheck.h" #include "NoMallocCheck.h" #include "OwningMemoryCheck.h" -#include "PreferMemberInitializerCheck.h" #include "ProBoundsArrayToPointerDecayCheck.h" #include "ProBoundsConstantArrayIndexCheck.h" #include "ProBoundsPointerArithmeticCheck.h" @@ -67,8 +66,6 @@ class CppCoreGuidelinesModule : public ClangTidyModule { "cppcoreguidelines-non-private-member-variables-in-classes"); CheckFactories.registerCheck( "cppcoreguidelines-owning-memory"); - CheckFactories.registerCheck( - "cppcoreguidelines-prefer-member-initializer"); CheckFactories.registerCheck( "cppcoreguidelines-pro-bounds-array-to-pointer-decay"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp deleted file mode 100644 index 2d7500943860..000000000000 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp +++ /dev/null @@ -1,246 +0,0 @@ -//===--- PreferMemberInitializerCheck.cpp - clang-tidy -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "PreferMemberInitializerCheck.h" -#include "clang/AST/ASTContext.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/Lex/Lexer.h" - -using namespace clang::ast_matchers; - -namespace clang { -namespace tidy { -namespace cppcoreguidelines { - -static bool isControlStatement(const Stmt *S) { - return isa(S); -} - -static bool isNoReturnCallStatement(const Stmt *S) { - const auto *Call = dyn_cast(S); - if (!Call) - return false; - - const FunctionDecl *Func = Call->getDirectCallee(); - if (!Func) - return false; - - return Func->isNoReturn(); -} - -static bool isLiteral(const Expr *E) { - return isa(E); -} - -static bool isUnaryExprOfLiteral(const Expr *E) { - if (const auto *UnOp = dyn_cast(E)) - return isLiteral(UnOp->getSubExpr()); - return false; -} - -static bool shouldBeDefaultMemberInitializer(const Expr *Value) { - if (isLiteral(Value) || isUnaryExprOfLiteral(Value)) - return true; - - if (const auto *DRE = dyn_cast(Value)) - return isa(DRE->getDecl()); - - return false; -} - -static const std::pair -isAssignmentToMemberOf(const RecordDecl *Rec, const Stmt *S) { - if (const auto *BO = dyn_cast(S)) { - if (BO->getOpcode() != BO_Assign) - return std::make_pair(nullptr, nullptr); - - const auto *ME = dyn_cast(BO->getLHS()->IgnoreParenImpCasts()); - if (!ME) - return std::make_pair(nullptr, nullptr); - - const auto *Field = dyn_cast(ME->getMemberDecl()); - if (!Field) - return std::make_pair(nullptr, nullptr); - - if (isa(ME->getBase())) - return std::make_pair(Field, BO->getRHS()->IgnoreParenImpCasts()); - } else if (const auto *COCE = dyn_cast(S)) { - if (COCE->getOperator() != OO_Equal) - return std::make_pair(nullptr, nullptr); - - const auto *ME = - dyn_cast(COCE->getArg(0)->IgnoreParenImpCasts()); - if (!ME) - return std::make_pair(nullptr, nullptr); - - const auto *Field = dyn_cast(ME->getMemberDecl()); - if (!Field) - return std::make_pair(nullptr, nullptr); - - if (isa(ME->getBase())) - return std::make_pair(Field, COCE->getArg(1)->IgnoreParenImpCasts()); - } - - return std::make_pair(nullptr, nullptr); -} - -PreferMemberInitializerCheck::PreferMemberInitializerCheck( - StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context), - IsUseDefaultMemberInitEnabled( - Context->isCheckEnabled("modernize-use-default-member-init")), - UseAssignment(OptionsView("modernize-use-default-member-init", - Context->getOptions().CheckOptions, Context) - .get("UseAssignment", false)) {} - -void PreferMemberInitializerCheck::storeOptions( - ClangTidyOptions::OptionMap &Opts) { - Options.store(Opts, "UseAssignment", UseAssignment); -} - -void PreferMemberInitializerCheck::registerMatchers(MatchFinder *Finder) { - Finder->addMatcher( - cxxConstructorDecl(hasBody(compoundStmt()), unless(isInstantiated())) - .bind("ctor"), - this); -} - -void PreferMemberInitializerCheck::check( - const MatchFinder::MatchResult &Result) { - const auto *Ctor = Result.Nodes.getNodeAs("ctor"); - const auto *Body = cast(Ctor->getBody()); - - const CXXRecordDecl *Class = Ctor->getParent(); - SourceLocation InsertPos; - bool FirstToCtorInits = true; - - for (const Stmt *S : Body->body()) { - if (S->getBeginLoc().isMacroID()) { - StringRef MacroName = - Lexer::getImmediateMacroName(S->getBeginLoc(), *Result.SourceManager, - getLangOpts()); - if (MacroName.contains_lower("assert")) - return; - } - if (isControlStatement(S)) - return; - - if (isNoReturnCallStatement(S)) - return; - - if (const auto *CondOp = dyn_cast(S)) { - if (isNoReturnCallStatement(CondOp->getLHS()) || - isNoReturnCallStatement(CondOp->getRHS())) - return; - } - - const FieldDecl *Field; - const Expr *InitValue; - std::tie(Field, InitValue) = isAssignmentToMemberOf(Class, S); - if (Field) { - if (IsUseDefaultMemberInitEnabled && getLangOpts().CPlusPlus11 && - Ctor->isDefaultConstructor() && - (getLangOpts().CPlusPlus20 || !Field->isBitField()) && - (!isa(Class->getDeclContext()) || - !cast(Class->getDeclContext())->isUnion()) && - shouldBeDefaultMemberInitializer(InitValue)) { - auto Diag = - diag(S->getBeginLoc(), "%0 should be initialized in an in-class" - " default member initializer") - << Field; - - SourceLocation FieldEnd = - Lexer::getLocForEndOfToken(Field->getSourceRange().getEnd(), 0, - *Result.SourceManager, getLangOpts()); - Diag << FixItHint::CreateInsertion(FieldEnd, - UseAssignment ? " = " : "{") - << FixItHint::CreateInsertionFromRange( - FieldEnd, - CharSourceRange(InitValue->getSourceRange(), true)) - << FixItHint::CreateInsertion(FieldEnd, UseAssignment ? "" : "}"); - - SourceLocation SemiColonEnd = - Lexer::findNextToken(S->getEndLoc(), *Result.SourceManager, - getLangOpts()) - ->getEndLoc(); - CharSourceRange StmtRange = - CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd); - - Diag << FixItHint::CreateRemoval(StmtRange); - } else { - auto Diag = - diag(S->getBeginLoc(), "%0 should be initialized in a member" - " initializer of the constructor") - << Field; - - bool AddComma = false; - if (!Ctor->getNumCtorInitializers() && FirstToCtorInits) { - SourceLocation BodyPos = Ctor->getBody()->getBeginLoc(); - SourceLocation NextPos = Ctor->getBeginLoc(); - do { - InsertPos = NextPos; - NextPos = Lexer::findNextToken(NextPos, *Result.SourceManager, - getLangOpts()) - ->getLocation(); - } while (NextPos != BodyPos); - InsertPos = Lexer::getLocForEndOfToken( - InsertPos, 0, *Result.SourceManager, getLangOpts()); - - Diag << FixItHint::CreateInsertion(InsertPos, " : "); - } else { - bool Found = false; - for (const auto *Init : Ctor->inits()) { - if (Init->isMemberInitializer()) { - if (Result.SourceManager->isBeforeInTranslationUnit( - Field->getLocation(), Init->getMember()->getLocation())) { - InsertPos = Init->getSourceLocation(); - Found = true; - break; - } - } - } - - if (!Found) { - if (Ctor->getNumCtorInitializers()) { - InsertPos = Lexer::getLocForEndOfToken( - (*Ctor->init_rbegin())->getSourceRange().getEnd(), 0, - *Result.SourceManager, getLangOpts()); - } - Diag << FixItHint::CreateInsertion(InsertPos, ", "); - } else { - AddComma = true; - } - } - Diag << FixItHint::CreateInsertion(InsertPos, Field->getName()) - << FixItHint::CreateInsertion(InsertPos, "(") - << FixItHint::CreateInsertionFromRange( - InsertPos, - CharSourceRange(InitValue->getSourceRange(), true)) - << FixItHint::CreateInsertion(InsertPos, ")"); - if (AddComma) - Diag << FixItHint::CreateInsertion(InsertPos, ", "); - - SourceLocation SemiColonEnd = - Lexer::findNextToken(S->getEndLoc(), *Result.SourceManager, - getLangOpts()) - ->getEndLoc(); - CharSourceRange StmtRange = - CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd); - - Diag << FixItHint::CreateRemoval(StmtRange); - FirstToCtorInits = false; - } - } - } -} - -} // namespace cppcoreguidelines -} // namespace tidy -} // namespace clang diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h deleted file mode 100644 index dbef7c98d8e3..000000000000 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h +++ /dev/null @@ -1,41 +0,0 @@ -//===--- PreferMemberInitializerCheck.h - clang-tidy ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H - -#include "../ClangTidyCheck.h" - -namespace clang { -namespace tidy { -namespace cppcoreguidelines { - -/// Finds member initializations in the constructor body which can be placed -/// into the initialization list instead. -/// -/// For the user-facing documentation see: -/// http://clang.llvm.org/extra/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.html -class PreferMemberInitializerCheck : public ClangTidyCheck { -public: - PreferMemberInitializerCheck(StringRef Name, ClangTidyContext *Context); - bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { - return LangOpts.CPlusPlus; - } - void storeOptions(ClangTidyOptions::OptionMap &Opts) override; - void registerMatchers(ast_matchers::MatchFinder *Finder) override; - void check(const ast_matchers::MatchFinder::MatchResult &Result) override; - - const bool IsUseDefaultMemberInitEnabled; - const bool UseAssignment; -}; - -} // namespace cppcoreguidelines -} // namespace tidy -} // namespace clang - -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 64b3d224ff6f..b3c9c829198b 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -290,12 +290,6 @@ New checks Finds structs that are inefficiently packed or aligned, and recommends packing and/or aligning of said structs as needed. -- New :doc:`cppcoreguidelines-prefer-member-initializer - ` check. - - Finds member initializations in the constructor body which can be placed into - the initialization list instead. - - New :doc:`bugprone-misplaced-pointer-arithmetic-in-alloc ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst deleted file mode 100644 index 5a5ee3e57a8c..000000000000 --- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst +++ /dev/null @@ -1,103 +0,0 @@ -.. title:: clang-tidy - cppcoreguidelines-prefer-member-initializer - -cppcoreguidelines-prefer-member-initializer -=========================================== - -Finds member initializations in the constructor body which can be converted -into member initializers of the constructor instead. This not only improves -the readability of the code but also positively affects its performance. -Class-member assignments inside a control statement or following the first -control statement are ignored. - -This check implements `C.49 `_ from the CppCoreGuidelines. - -If the language version is `C++ 11` or above, the constructor is the default -constructor of the class, the field is not a bitfield (only in case of earlier -language version than `C++ 20`), furthermore the assigned value is a literal, -negated literal or ``enum`` constant then the preferred place of the -initialization is at the class member declaration. - -This latter rule is `C.48 `_ from CppCoreGuidelines. - -Please note, that this check does not enforce this latter rule for -initializations already implemented as member initializers. For that purpose -see check `modernize-use-default-member-init `_. - -Example 1 ---------- - -.. code-block:: c++ - - class C { - int n; - int m; - public: - C() { - n = 1; // Literal in default constructor - if (dice()) - return; - m = 1; - } - }; - -Here ``n`` can be initialized using a default member initializer, unlike -``m``, as ``m``'s initialization follows a control statement (``if``): - -.. code-block:: c++ - - class C { - int n{1}; - int m; - public: - C() { - if (dice()) - return; - m = 1; - } - -Example 2 ---------- - -.. code-block:: c++ - - class C { - int n; - int m; - public: - C(int nn, int mm) { - n = nn; // Neither default constructor nor literal - if (dice()) - return; - m = mm; - } - }; - -Here ``n`` can be initialized in the constructor initialization list, unlike -``m``, as ``m``'s initialization follows a control statement (``if``): - -.. code-block:: c++ - - C(int nn, int mm) : n(nn) { - if (dice()) - return; - m = mm; - } - -.. option:: UseAssignment - - If this option is set to `true` (default is `false`), the check will initialize - members with an assignment. In this case the fix of the first example looks - like this: - -.. code-block:: c++ - - class C { - int n = 1; - int m; - public: - C() { - if (dice()) - return; - m = 1; - } - }; diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp deleted file mode 100644 index dc6cb7606a0d..000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer,modernize-use-default-member-init %t -- \ -// RUN: -config="{CheckOptions: [{key: modernize-use-default-member-init.UseAssignment, value: 1}]}" - -class Simple1 { - int n; - // CHECK-FIXES: int n = 0; - double x; - // CHECK-FIXES: double x = 0.0; - -public: - Simple1() { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple1(int nn, double xx) { - // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple1() = default; -}; diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp deleted file mode 100644 index fe5bb7c3bb98..000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer,modernize-use-default-member-init %t - -class Simple1 { - int n; - // CHECK-FIXES: int n{0}; - double x; - // CHECK-FIXES: double x{0.0}; - -public: - Simple1() { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple1(int nn, double xx) { - // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple1() = default; -}; diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp deleted file mode 100644 index b5c04c32c9fa..000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp +++ /dev/null @@ -1,490 +0,0 @@ -// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer %t -- -- -fcxx-exceptions - -extern void __assert_fail (__const char *__assertion, __const char *__file, - unsigned int __line, __const char *__function) - __attribute__ ((__noreturn__)); -#define assert(expr) \ - ((expr) ? (void)(0) : __assert_fail (#expr, __FILE__, __LINE__, __func__)) - -class Simple1 { - int n; - double x; - -public: - Simple1() { - // CHECK-FIXES: Simple1() : n(0), x(0.0) { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple1(int nn, double xx) { - // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple1() = default; -}; - -class Simple2 { - int n; - double x; - -public: - Simple2() : n(0) { - // CHECK-FIXES: Simple2() : n(0), x(0.0) { - x = 0.0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple2(int nn, double xx) : n(nn) { - // CHECK-FIXES: Simple2(int nn, double xx) : n(nn), x(xx) { - x = xx; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple2() = default; -}; - -class Simple3 { - int n; - double x; - -public: - Simple3() : x(0.0) { - // CHECK-FIXES: Simple3() : n(0), x(0.0) { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - Simple3(int nn, double xx) : x(xx) { - // CHECK-FIXES: Simple3(int nn, double xx) : n(nn), x(xx) { - n = nn; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple3() = default; -}; - -int something_int(); -double something_double(); - -class Simple4 { - int n; - -public: - Simple4() { - // CHECK-FIXES: Simple4() : n(something_int()) { - n = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Simple4() = default; -}; - -static bool dice(); - -class Complex1 { - int n; - int m; - -public: - Complex1() : n(0) { - if (dice()) - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional expression - } - - ~Complex1() = default; -}; - -class Complex2 { - int n; - int m; - -public: - Complex2() : n(0) { - if (!dice()) - return; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional expression - } - - ~Complex2() = default; -}; - -class Complex3 { - int n; - int m; - -public: - Complex3() : n(0) { - while (dice()) - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional loop - } - - ~Complex3() = default; -}; - -class Complex4 { - int n; - int m; - -public: - Complex4() : n(0) { - while (!dice()) - return; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional loop - } - - ~Complex4() = default; -}; - -class Complex5 { - int n; - int m; - -public: - Complex5() : n(0) { - do { - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional loop - } while (dice()); - } - - ~Complex5() = default; -}; - -class Complex6 { - int n; - int m; - -public: - Complex6() : n(0) { - do { - return; - } while (!dice()); - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional loop - } - - ~Complex6() = default; -}; - -class Complex7 { - int n; - int m; - -public: - Complex7() : n(0) { - for (int i = 2; i < 1; ++i) { - m = 1; - } - // NO-MESSAGES: initialization of 'm' is nested into a conditional loop - } - - ~Complex7() = default; -}; - -class Complex8 { - int n; - int m; - -public: - Complex8() : n(0) { - for (int i = 0; i < 2; ++i) { - return; - } - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional loop - } - - ~Complex8() = default; -}; - -class Complex9 { - int n; - int m; - -public: - Complex9() : n(0) { - switch (dice()) { - case 1: - m = 1; - // NO-MESSAGES: initialization of 'm' is nested in a conditional expression - break; - default: - break; - } - } - - ~Complex9() = default; -}; - -class Complex10 { - int n; - int m; - -public: - Complex10() : n(0) { - switch (dice()) { - case 1: - return; - break; - default: - break; - } - m = 1; - // NO-MESSAGES: initialization of 'm' follows a conditional expression - } - - ~Complex10() = default; -}; - -class E {}; -int risky(); // may throw - -class Complex11 { - int n; - int m; - -public: - Complex11() : n(0) { - try { - risky(); - m = 1; - // NO-MESSAGES: initialization of 'm' follows is nested in a try-block - } catch (const E& e) { - return; - } - } - - ~Complex11() = default; -}; - -class Complex12 { - int n; - int m; - -public: - Complex12() : n(0) { - try { - risky(); - } catch (const E& e) { - return; - } - m = 1; - // NO-MESSAGES: initialization of 'm' follows a try-block - } - - ~Complex12() = default; -}; - -class Complex13 { - int n; - int m; - -public: - Complex13() : n(0) { - return; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a return statement - } - - ~Complex13() = default; -}; - -class Complex14 { - int n; - int m; - -public: - Complex14() : n(0) { - goto X; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a goto statement - X: - ; - } - - ~Complex14() = default; -}; - -void returning(); - -class Complex15 { - int n; - int m; - -public: - Complex15() : n(0) { - // CHECK-FIXES: Complex15() : n(0), m(1) { - returning(); - m = 1; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'm' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Complex15() = default; -}; - -[[noreturn]] void not_returning(); - -class Complex16 { - int n; - int m; - -public: - Complex16() : n(0) { - not_returning(); - m = 1; - // NO-MESSAGES: initialization of 'm' follows a non-returning function call - } - - ~Complex16() = default; -}; - -class Complex17 { - int n; - int m; - -public: - Complex17() : n(0) { - throw 1; - m = 1; - // NO-MESSAGES: initialization of 'm' follows a 'throw' statement; - } - - ~Complex17() = default; -}; - -class Complex18 { - int n; - -public: - Complex18() try { - n = risky(); - // NO-MESSAGES: initialization of 'n' in a 'try' body; - } catch (const E& e) { - n = 0; - } - - ~Complex18() = default; -}; - -class Complex19 { - int n; -public: - Complex19() { - // CHECK-FIXES: Complex19() : n(0) { - n = 0; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - explicit Complex19(int) { - // CHECK-FIXES: Complex19(int) : n(12) { - n = 12; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } - - ~Complex19() = default; -}; - -class Complex20 { - int n; - int m; - -public: - Complex20(int k) : n(0) { - assert(k > 0); - m = 1; - // NO-MESSAGES: initialization of 'm' follows an assertion - } - - ~Complex20() = default; -}; - -class VeryComplex1 { - int n1, n2, n3; - double x1, x2, x3; - int n4, n5, n6; - double x4, x5, x6; - - VeryComplex1() : n3(something_int()), x3(something_double()), - n5(something_int()), x4(something_double()), - x5(something_double()) { - // CHECK-FIXES: VeryComplex1() : n2(something_int()), n1(something_int()), n3(something_int()), x2(something_double()), x1(something_double()), x3(something_double()), - // CHECK-FIXES: n4(something_int()), n5(something_int()), n6(something_int()), x4(something_double()), - // CHECK-FIXES: x5(something_double()), x6(something_double()) { - -// FIXME: Order of elements on the constructor initializer list should match -// the order of the declaration of the fields. Thus the correct fixes -// should look like these: -// - // C ECK-FIXES: VeryComplex1() : n2(something_int()), n1(something_int()), n3(something_int()), x2(something_double()), x1(something_double()), x3(something_double()), - // C ECK-FIXES: n4(something_int()), n5(something_int()), n6(something_int()), x4(something_double()), - // C ECK-FIXES: x5(something_double()), x6(something_double()) { -// -// However, the Diagnostics Engine processes fixes in the order of the -// diagnostics and insertions to the same position are handled in left to -// right order thus in the case two adjacent fields are initialized -// inside the constructor in reverse order the provided fix is a -// constructor initializer list that does not match the order of the -// declaration of the fields. - - x2 = something_double(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x2' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n2 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n2' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x6 = something_double(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x6' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x1 = something_double(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x1' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n6 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n6' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n1 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n1' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - n4 = something_int(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n4' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - } -}; - -struct Outside { - int n; - double x; - Outside(); -}; - -Outside::Outside() { - // CHECK-FIXES: Outside::Outside() : n(1), x(1.0) { - n = 1; - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} - x = 1.0; - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer] - // CHECK-FIXES: {{^\ *$}} -} From da7fa7457800394d610e8cbd6befe7bc944ca7d0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 21 Feb 2021 16:24:18 +0100 Subject: [PATCH 162/318] [JumpThreading] Clone noalias.scope.decl when threading blocks When cloning instructions during jump threading, also clone and adapt any declared scopes. This is primarily important when threading loop exits, because we'll end up with two dominating scope declarations in that case (at least after additional loop rotation). This addresses a loose thread from https://reviews.llvm.org/rG2556b413a7b8#975012. Differential Revision: https://reviews.llvm.org/D97154 (cherry picked from commit 5e7e499b912d2c9ebaa91b5783ca123dbedeabcc) --- llvm/include/llvm/Transforms/Utils/Cloning.h | 7 +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp | 10 +++ llvm/lib/Transforms/Utils/CloneFunction.cpp | 8 +++ .../JumpThreading/noalias-scope-decl.ll | 63 +++++++++++++++++++ 4 files changed, 88 insertions(+) create mode 100644 llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 56aaa5d48e2a..aa960c625630 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -274,6 +274,13 @@ void updateProfileCallee( void identifyNoAliasScopesToClone( ArrayRef BBs, SmallVectorImpl &NoAliasDeclScopes); +/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified +/// instruction range and extract their scope. These are candidates for +/// duplication when cloning. +void identifyNoAliasScopesToClone( + BasicBlock::iterator Start, BasicBlock::iterator End, + SmallVectorImpl &NoAliasDeclScopes); + /// Duplicate the specified list of noalias decl scopes. /// The 'Ext' string is added as an extension to the name. /// Afterwards, the ClonedScopes contains the mapping of the original scope diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 96aef90c1c1a..10b08b4e2224 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -2076,6 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, ValueMapping[PN] = NewPN; } + // Clone noalias scope declarations in the threaded block. When threading a + // loop exit, we would otherwise end up with two idential scope declarations + // visible at the same time. + SmallVector NoAliasScopes; + DenseMap ClonedScopes; + LLVMContext &Context = PredBB->getContext(); + identifyNoAliasScopesToClone(BI, BE, NoAliasScopes); + cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context); + // Clone the non-phi instructions of the source basic block into NewBB, // keeping track of the mapping and using it to remap operands in the cloned // instructions. @@ -2084,6 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI, New->setName(BI->getName()); NewBB->getInstList().push_back(New); ValueMapping[&*BI] = New; + adaptNoAliasScopes(New, ClonedScopes, Context); // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 51a49574e55d..6ab061510a60 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -989,3 +989,11 @@ void llvm::identifyNoAliasScopesToClone( if (auto *Decl = dyn_cast(&I)) NoAliasDeclScopes.push_back(Decl->getScopeList()); } + +void llvm::identifyNoAliasScopesToClone( + BasicBlock::iterator Start, BasicBlock::iterator End, + SmallVectorImpl &NoAliasDeclScopes) { + for (Instruction &I : make_range(Start, End)) + if (auto *Decl = dyn_cast(&I)) + NoAliasDeclScopes.push_back(Decl->getScopeList()); +} diff --git a/llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll b/llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll new file mode 100644 index 000000000000..b032afaaf313 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -jump-threading < %s | FileCheck %s + +define void @test(i8* %ptr) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[I]], 100 +; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !3) +; CHECK-NEXT: store i8 0, i8* [[PTR:%.*]], align 1, !noalias !0 +; CHECK-NEXT: store i8 1, i8* [[PTR]], align 1, !noalias !3 +; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1 +; CHECK-NEXT: br label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !5) +; CHECK-NEXT: store i8 0, i8* [[PTR]], align 1, !noalias !0 +; CHECK-NEXT: store i8 1, i8* [[PTR]], align 1, !noalias !5 +; CHECK-NEXT: ret void +; +entry: + call void @llvm.experimental.noalias.scope.decl(metadata !0) + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.inc, %latch ] + %c = icmp eq i32 %i, 100 + br i1 %c, label %if, label %latch + +if: + br label %latch + +latch: + %p = phi i1 [ true, %if ], [ false, %loop ] + call void @llvm.experimental.noalias.scope.decl(metadata !3) + store i8 0, i8* %ptr, !noalias !0 + store i8 1, i8* %ptr, !noalias !3 + %i.inc = add i32 %i, 1 + br i1 %p, label %exit, label %loop + +exit: + ret void +} + +declare void @llvm.experimental.noalias.scope.decl(metadata) + +!0 = !{!1} +!1 = distinct !{!1, !2, !"scope1"} +!2 = distinct !{!2, !"domain"} +!3 = !{!4} +!4 = distinct !{!4, !2, !"scope2"} + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2, !"scope1"} +; CHECK: !2 = distinct !{!2, !"domain"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2, !"scope2"} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !2, !"scope2:thread"} From a92ceea91116e7b95d23eff634507fa2cff86ef2 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 22 Feb 2021 17:35:09 -0800 Subject: [PATCH 163/318] Revert "[llvm-cov] reset executation count to 0 after wrapped segment" This reverts commit e3df9471750935876bd2bf7da93ccf0eacca8592. This commit caused regressions in coverage generation for both Rust and Swift. We're reverting this in the release/12.x branch until we have a proper fix in trunk. http://llvm.org/PR49297 --- llvm/lib/ProfileData/Coverage/CoverageMapping.cpp | 1 - llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h | 2 +- llvm/test/tools/llvm-cov/ignore-filename-regex.test | 4 ++-- llvm/unittests/ProfileData/CoverageMappingTest.cpp | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index a8cc308b4e3a..cdbcde50d33a 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -794,7 +794,6 @@ LineCoverageStats::LineCoverageStats( ExecutionCount = WrappedSegment->Count; if (!MinRegionCount) return; - ExecutionCount = 0; for (const auto *LS : LineSegments) if (isStartOfRegion(LS)) ExecutionCount = std::max(ExecutionCount, LS->Count); diff --git a/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h b/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h index d224fd0d00ea..07941f9bb497 100644 --- a/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h +++ b/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h @@ -12,7 +12,7 @@ template T FOO::DoIt(T ti) { // HEADER: [[@LINE]]| 2|template for (T I = 0; I < ti; I++) { // HEADER: [[@LINE]]| 22| for (T t += I; // HEADER: [[@LINE]]| 20| t += I; if (I > ti / 2) // HEADER: [[@LINE]]| 20| if (I > ti - t -= 1; // HEADER: [[@LINE]]| 8| t -= 1; + t -= 1; // HEADER: [[@LINE]]| 20| t -= 1; } // HEADER: [[@LINE]]| 20| } // HEADER: [[@LINE]]| 2| return t; // HEADER: [[@LINE]]| 2| return t; diff --git a/llvm/test/tools/llvm-cov/ignore-filename-regex.test b/llvm/test/tools/llvm-cov/ignore-filename-regex.test index efc4cda4abc0..aea9e4646776 100644 --- a/llvm/test/tools/llvm-cov/ignore-filename-regex.test +++ b/llvm/test/tools/llvm-cov/ignore-filename-regex.test @@ -22,7 +22,7 @@ REPORT_IGNORE_DIR-NOT: {{.*}}extra{{[/\\]}}dec.h{{.*}} REPORT_IGNORE_DIR-NOT: {{.*}}extra{{[/\\]}}inc.h{{.*}} REPORT_IGNORE_DIR: {{.*}}abs.h{{.*}} REPORT_IGNORE_DIR: {{.*}}main.cc{{.*}} -REPORT_IGNORE_DIR: {{^}}TOTAL 5{{.*}}90.00%{{$}} +REPORT_IGNORE_DIR: {{^}}TOTAL 5{{.*}}100.00%{{$}} # Ignore all files from "extra" directory even when SOURCES specified. RUN: llvm-cov report -instr-profile %S/Inputs/sources_specified/main.profdata \ @@ -35,7 +35,7 @@ REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}extra{{[/\\]}}dec.h{{.*}} REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}extra{{[/\\]}}inc.h{{.*}} REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}main.cc{{.*}} REPORT_IGNORE_DIR_WITH_SOURCES: {{.*}}abs.h{{.*}} -REPORT_IGNORE_DIR_WITH_SOURCES: {{^}}TOTAL 4{{.*}}80.00%{{$}} +REPORT_IGNORE_DIR_WITH_SOURCES: {{^}}TOTAL 4{{.*}}100.00%{{$}} ######################## # Test "show" command. diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp index 43386d23883e..4854b7f1454c 100644 --- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp +++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp @@ -675,7 +675,7 @@ TEST_P(CoverageMappingTest, test_line_coverage_iterator) { CoverageData Data = LoadedCoverage->getCoverageForFile("file1"); unsigned Line = 0; - unsigned LineCounts[] = {20, 20, 20, 20, 10, 10, 10, 10, 10, 0, 0}; + unsigned LineCounts[] = {20, 20, 20, 20, 30, 10, 10, 10, 10, 0, 0}; for (const auto &LCS : getLineCoverageStats(Data)) { ASSERT_EQ(Line + 1, LCS.getLine()); errs() << "Line: " << Line + 1 << ", count = " << LCS.getExecutionCount() << "\n"; From 99df95fd910becbcf89dd6f17f1e259353a72d27 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Wed, 3 Feb 2021 12:45:46 +0100 Subject: [PATCH 164/318] [clang][CodeComplete] Fix crash on ParenListExprs Fixes https://github.com/clangd/clangd/issues/676. Differential Revision: https://reviews.llvm.org/D95935 --- clang/lib/Sema/SemaCodeComplete.cpp | 18 ++++++++++++++++-- .../test/CodeCompletion/function-overloads.cpp | 6 ++++++ clang/test/CodeCompletion/member-access.c | 7 +++++++ clang/unittests/Sema/CodeCompleteTest.cpp | 1 + 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index c2785fd60fc2..40ea0f5d24b3 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -5168,6 +5168,15 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base, if (!Base || !CodeCompleter) return; + // Peel off the ParenListExpr by chosing the last one, as they don't have a + // predefined type. + if (auto *PLE = llvm::dyn_cast(Base)) + Base = PLE->getExpr(PLE->getNumExprs() - 1); + if (OtherOpBase) { + if (auto *PLE = llvm::dyn_cast(OtherOpBase)) + OtherOpBase = PLE->getExpr(PLE->getNumExprs() - 1); + } + ExprResult ConvertedBase = PerformMemberExprBaseConversion(Base, IsArrow); if (ConvertedBase.isInvalid()) return; @@ -5597,12 +5606,17 @@ ProduceSignatureHelp(Sema &SemaRef, Scope *S, QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, ArrayRef Args, SourceLocation OpenParLoc) { - if (!CodeCompleter) + if (!CodeCompleter || !Fn) return QualType(); + // If we have a ParenListExpr for LHS, peel it off by chosing the last expr. + // As ParenListExprs don't have a predefined type. + if (auto *PLE = llvm::dyn_cast(Fn)) + Fn = PLE->getExpr(PLE->getNumExprs() - 1); + // FIXME: Provide support for variadic template functions. // Ignore type-dependent call expressions entirely. - if (!Fn || Fn->isTypeDependent() || anyNullArguments(Args)) + if (Fn->isTypeDependent() || anyNullArguments(Args)) return QualType(); // In presence of dependent args we surface all possible signatures using the // non-dependent args in the prefix. Afterwards we do a post filtering to make diff --git a/clang/test/CodeCompletion/function-overloads.cpp b/clang/test/CodeCompletion/function-overloads.cpp index 11c864c28107..7b8ccef1d580 100644 --- a/clang/test/CodeCompletion/function-overloads.cpp +++ b/clang/test/CodeCompletion/function-overloads.cpp @@ -21,6 +21,8 @@ namespace NS { void test_adl() { NS::X x; g(x, x); + (void)(f)(1, 2, 3); + (void)(test, test, test, f)(1, 2, 3); } // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:10:9 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s @@ -31,6 +33,10 @@ void test_adl() { // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:10:21 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:23:7 %s -o - | \ // RUN: FileCheck -check-prefix=CHECK-CC5 %s +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:24:13 %s -o - | \ +// RUN: FileCheck -check-prefix=CHECK-CC1 %s +// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:25:31 %s -o - | \ +// RUN: FileCheck -check-prefix=CHECK-CC1 %s // CHECK-CC1: OVERLOAD: [#int#]f(<#float x#>, float y) // CHECK-CC1: OVERLOAD: [#int#]f(<#int i#>) // CHECK-CC1-NOT, CHECK-CC2-NOT: OVERLOAD: A( diff --git a/clang/test/CodeCompletion/member-access.c b/clang/test/CodeCompletion/member-access.c index 72afbf2ff947..545349f71731 100644 --- a/clang/test/CodeCompletion/member-access.c +++ b/clang/test/CodeCompletion/member-access.c @@ -29,3 +29,10 @@ void test3(struct Point2 *p) { // RUN: %clang_cc1 -fsyntax-only -code-completion-with-fixits -code-completion-at=%s:24:5 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s // CHECK-CC3: x (requires fix-it: {24:4-24:5} to "->") + +void test4(struct Point *p) { + (int)(p)->x; + (int)(0,1,2,3,4,p)->x; +} +// RUN: %clang_cc1 -fsyntax-only -code-completion-with-fixits -code-completion-at=%s:34:13 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s +// RUN: %clang_cc1 -fsyntax-only -code-completion-with-fixits -code-completion-at=%s:35:23 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s diff --git a/clang/unittests/Sema/CodeCompleteTest.cpp b/clang/unittests/Sema/CodeCompleteTest.cpp index d8b303d77bb9..dae0793658c5 100644 --- a/clang/unittests/Sema/CodeCompleteTest.cpp +++ b/clang/unittests/Sema/CodeCompleteTest.cpp @@ -488,6 +488,7 @@ TEST(PreferredTypeTest, NoCrashOnInvalidTypes) { auto y = new decltype(&1)(^); // GNU decimal type extension is not supported in clang. auto z = new _Decimal128(^); + void foo() { (void)(foo)(^); } )cpp"; EXPECT_THAT(collectPreferredTypes(Code), Each("NULL TYPE")); } From 7fc6c60608e416e7f8f5c194768c6dd511449c1b Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 18 Feb 2021 13:48:43 +0100 Subject: [PATCH 165/318] [clang][CodeComplete] Ensure there are no crashes when completing with ParenListExprs as LHS Differential Revision: https://reviews.llvm.org/D96950 --- clang/lib/Sema/SemaCodeComplete.cpp | 31 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 40ea0f5d24b3..be04970979b3 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -5158,6 +5158,20 @@ class ConceptInfo { llvm::DenseMap Results; }; + +// If \p Base is ParenListExpr, assume a chain of comma operators and pick the +// last expr. We expect other ParenListExprs to be resolved to e.g. constructor +// calls before here. (So the ParenListExpr should be nonempty, but check just +// in case) +Expr *unwrapParenList(Expr *Base) { + if (auto *PLE = llvm::dyn_cast_or_null(Base)) { + if (PLE->getNumExprs() == 0) + return nullptr; + Base = PLE->getExpr(PLE->getNumExprs() - 1); + } + return Base; +} + } // namespace void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base, @@ -5165,18 +5179,11 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base, SourceLocation OpLoc, bool IsArrow, bool IsBaseExprStatement, QualType PreferredType) { + Base = unwrapParenList(Base); + OtherOpBase = unwrapParenList(OtherOpBase); if (!Base || !CodeCompleter) return; - // Peel off the ParenListExpr by chosing the last one, as they don't have a - // predefined type. - if (auto *PLE = llvm::dyn_cast(Base)) - Base = PLE->getExpr(PLE->getNumExprs() - 1); - if (OtherOpBase) { - if (auto *PLE = llvm::dyn_cast(OtherOpBase)) - OtherOpBase = PLE->getExpr(PLE->getNumExprs() - 1); - } - ExprResult ConvertedBase = PerformMemberExprBaseConversion(Base, IsArrow); if (ConvertedBase.isInvalid()) return; @@ -5606,14 +5613,10 @@ ProduceSignatureHelp(Sema &SemaRef, Scope *S, QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, ArrayRef Args, SourceLocation OpenParLoc) { + Fn = unwrapParenList(Fn); if (!CodeCompleter || !Fn) return QualType(); - // If we have a ParenListExpr for LHS, peel it off by chosing the last expr. - // As ParenListExprs don't have a predefined type. - if (auto *PLE = llvm::dyn_cast(Fn)) - Fn = PLE->getExpr(PLE->getNumExprs() - 1); - // FIXME: Provide support for variadic template functions. // Ignore type-dependent call expressions entirely. if (Fn->isTypeDependent() || anyNullArguments(Args)) From 1c0a0c727eaeee7d7283f9dabe861e69881764c4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 23 Feb 2021 11:12:50 -0800 Subject: [PATCH 166/318] [12.0.0][llvm-symbolizer][test] Fix test broken after cherry-pick See bug https://bugs.llvm.org/show_bug.cgi?id=49227. The cherry-pick 0d4f8a3f364f introduced a test failure, as the test included use of a feature that was only recently added to lit and isn't in the release branch. This patch fixes up the test to manage without this lit change. Reviewed By: tstellar, MaskRay Differential Revision: https://reviews.llvm.org/D97272 --- llvm/test/tools/llvm-symbolizer/output-style-inlined.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test index 1b8e3a2f22fb..103c2afc176e 100644 --- a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test +++ b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test @@ -33,17 +33,17 @@ GNU: inctwo ## is specified, but a file doesn't exist. Check we report an error. RUN: llvm-symbolizer --output-style=GNU --obj=%p/Inputs/not.exist 0x1 0x2 --no-inlines 2>&1 \ -RUN: | FileCheck %s --check-prefix=NOT-EXIST-GNU -DMSG=%errc_ENOENT +RUN: | FileCheck %s --check-prefix=NOT-EXIST-GNU RUN: llvm-symbolizer --output-style=LLVM --obj=%p/Inputs/not.exist 0x1 0x2 --no-inlines 2>&1 \ -RUN: | FileCheck %s --check-prefix=NOT-EXIST-LLVM -DMSG=%errc_ENOENT +RUN: | FileCheck %s --check-prefix=NOT-EXIST-LLVM -# NOT-EXIST-GNU: LLVMSymbolizer: error reading file: [[MSG]] +# NOT-EXIST-GNU: LLVMSymbolizer: error reading file: {{[Nn]}}o such file or directory # NOT-EXIST-GNU-NEXT: ?? # NOT-EXIST-GNU-NEXT: ??:0 # NOT-EXIST-GNU-NEXT: ?? # NOT-EXIST-GNU-NEXT: ??:0 -# NOT-EXIST-LLVM: LLVMSymbolizer: error reading file: [[MSG]] +# NOT-EXIST-LLVM: LLVMSymbolizer: error reading file: {{[Nn]}}o such file or directory # NOT-EXIST-LLVM-NEXT: ?? # NOT-EXIST-LLVM-NEXT: ??:0:0 # NOT-EXIST-LLVM-EMPTY: From eccac5a8aec92c995f0f8ef090ba4142e0334b46 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Wed, 3 Feb 2021 18:16:04 -0800 Subject: [PATCH 167/318] Add auto-upgrade support for annotation intrinsics The llvm.ptr.annotation and llvm.var.annotation intrinsics were changed since the 11.0 release to add an additional parameter. This patch auto-upgrades IR containing the four-parameter versions of these intrinsics, adding a null pointer as the fifth argument. Differential Revision: https://reviews.llvm.org/D95993 (cherry picked from commit 9a827906cb95e7c3ae94627558da67b47ffde249) --- llvm/lib/IR/AutoUpgrade.cpp | 42 ++++++++++++++++ llvm/test/Bitcode/upgrade-ptr-annotation.ll | 45 ++++++++++++++++++ .../test/Bitcode/upgrade-ptr-annotation.ll.bc | Bin 0 -> 1524 bytes llvm/test/Bitcode/upgrade-var-annotation.ll | 15 ++++++ .../test/Bitcode/upgrade-var-annotation.ll.bc | Bin 0 -> 1232 bytes 5 files changed, 102 insertions(+) create mode 100644 llvm/test/Bitcode/upgrade-ptr-annotation.ll create mode 100644 llvm/test/Bitcode/upgrade-ptr-annotation.ll.bc create mode 100644 llvm/test/Bitcode/upgrade-var-annotation.ll create mode 100644 llvm/test/Bitcode/upgrade-var-annotation.ll.bc diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 23e7af6287b6..7d83cf5dcf1d 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -937,6 +937,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys); return true; } + } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::ptr_annotation, + F->arg_begin()->getType()); + return true; } break; @@ -947,6 +953,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; + case 'v': { + if (Name == "var.annotation" && F->arg_size() == 4) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::var_annotation); + return true; + } + break; + } + case 'x': if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) return true; @@ -3730,6 +3746,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; + case Intrinsic::ptr_annotation: + // Upgrade from versions that lacked the annotation attribute argument. + assert(CI->getNumArgOperands() == 4 && + "Before LLVM 12.0 this intrinsic took four arguments"); + // Create a new call with an added null annotation attribute argument. + NewCall = Builder.CreateCall( + NewFn, + {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), + CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())}); + NewCall->takeName(CI); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + return; + + case Intrinsic::var_annotation: + // Upgrade from versions that lacked the annotation attribute argument. + assert(CI->getNumArgOperands() == 4 && + "Before LLVM 12.0 this intrinsic took four arguments"); + // Create a new call with an added null annotation attribute argument. + NewCall = Builder.CreateCall( + NewFn, + {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), + CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())}); + CI->eraseFromParent(); + return; + case Intrinsic::x86_xop_vfrcz_ss: case Intrinsic::x86_xop_vfrcz_sd: NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); diff --git a/llvm/test/Bitcode/upgrade-ptr-annotation.ll b/llvm/test/Bitcode/upgrade-ptr-annotation.ll new file mode 100644 index 000000000000..aeacc6f1a6ce --- /dev/null +++ b/llvm/test/Bitcode/upgrade-ptr-annotation.ll @@ -0,0 +1,45 @@ +; Test upgrade of ptr.annotation intrinsics. +; +; RUN: llvm-dis < %s.bc | FileCheck %s + +; Unused return values +; The arguments passed to the intrinisic wouldn't normally be arguments to +; the function, but that makes it easier to test that they are handled +; correctly. +define void @f1(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3) { +;CHECK: @f1(i8* [[ARG0:%.*]], i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i32 [[ARG3:%.*]]) + %t0 = call i8* @llvm.ptr.annotation.p0i8(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3) +;CHECK: call i8* @llvm.ptr.annotation.p0i8(i8* [[ARG0]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null) + + %arg0_p16 = bitcast i8* %arg0 to i16* + %t1 = call i16* @llvm.ptr.annotation.p0i16(i16* %arg0_p16, i8* %arg1, i8* %arg2, i32 %arg3) +;CHECK: [[ARG0_P16:%.*]] = bitcast +;CHECK: call i16* @llvm.ptr.annotation.p0i16(i16* [[ARG0_P16]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null) + + %arg0_p256 = bitcast i8* %arg0 to i256* + %t2 = call i256* @llvm.ptr.annotation.p0i256(i256* %arg0_p256, i8* %arg1, i8* %arg2, i32 %arg3) +;CHECK: [[ARG0_P256:%.*]] = bitcast +;CHECK: call i256* @llvm.ptr.annotation.p0i256(i256* [[ARG0_P256]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null) + ret void +} + +; Used return values +define i16* @f2(i16* %x, i16* %y) { + %t0 = call i16* @llvm.ptr.annotation.p0i16(i16* %x, i8* undef, i8* undef, i32 undef) + %t1 = call i16* @llvm.ptr.annotation.p0i16(i16* %y, i8* undef, i8* undef, i32 undef) + %cmp = icmp ugt i16* %t0, %t1 + %sel = select i1 %cmp, i16* %t0, i16* %t1 + ret i16* %sel +; CHECK: [[T0:%.*]] = call i16* @llvm.ptr.annotation.p0i16(i16* %x, i8* undef, i8* undef, i32 undef, i8* null) +; CHECK: [[T1:%.*]] = call i16* @llvm.ptr.annotation.p0i16(i16* %y, i8* undef, i8* undef, i32 undef, i8* null) +; CHECK: %cmp = icmp ugt i16* [[T0]], [[T1]] +; CHECK: %sel = select i1 %cmp, i16* [[T0]], i16* [[T1]] +; CHECK: ret i16* %sel +} + +declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) +; CHECK: declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*) +declare i16* @llvm.ptr.annotation.p0i16(i16*, i8*, i8*, i32) +; CHECK: declare i16* @llvm.ptr.annotation.p0i16(i16*, i8*, i8*, i32, i8*) +declare i256* @llvm.ptr.annotation.p0i256(i256*, i8*, i8*, i32) +; CHECK: declare i256* @llvm.ptr.annotation.p0i256(i256*, i8*, i8*, i32, i8*) diff --git a/llvm/test/Bitcode/upgrade-ptr-annotation.ll.bc b/llvm/test/Bitcode/upgrade-ptr-annotation.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..5db0810ff88515d1fc40abe7fc20275d8cfb83aa GIT binary patch literal 1524 zcmZ`(Z%h+s7=QcsPAJzK64#Dva|M=kPVri#UTM3uJ*U-?Sa2U$CRwl4iVNcLXIOAC zN3Z5^>xUwkbQv)=AtruU(q(AOGz;aBG1nM$ZgCqUG*om34O3^#X5!vf_wR#Ga_>Fw zd(S=Z@ArG&=k+p^3-uKMr~m*#4OO%Aov*^Jf1ce{+Qc`?;F?JTfDbyQaubll`8xO@ zgVn>HS>+BjZnYnm#Wb&~P`y$4ageWgQ|>)p>K(_e6*Q`QtHjzoSl`yp{3Ar_d0${131Lt`+#LjRNqBsS*34;mHQ=$hRBm)`gM_zZ@J$o;-4) zwc}Bu1V>@iKPJ;PuaOsY_}fUpte5ZR!M{<;fbY^?4PMM%jN*aj%pJGzzZiE=a5tR> zfNz2Nj=-=wlqaThM7Toc8e-^}6!-E-e+21Cc4?M@_AZV+vOK zDzggLEaQqRT-ivxmqQMtNWV|yIAnlR^hHFD6!qDcu$2UsqqLorc4-w`^<#DTv|+(S z+%OrYSYwzv=C~+0<{0~g!g*0}#uSe1YDHfFIoytHX&1ZU$ctP;a8R>hhf@u%& z;(Msrr4zf8Vt;aQ!@X7P-Xiwk3C&(tN#Ns^2~;&{@fmW?WE^CsWQm;~0WdDSoL0G8GR& zJSZ|?M!J)OdzNr)WfHsT#~x7CnQIZ=wrDAgV4XV2h~#c?MevXzwnnS4r(nN_&&kKJsIoIJQJ#aQa|T{p%u@_hXMK zCij!cu$&@RWkuxdw(CDiG%QwH+H`|y$TAH>Oao*+$Gic~z_YD~XR5TM+YD=k476}b z#swu$Gxj+tx=hTZ%5HlKM`_t)c&5R+%}W97mLE#1%@+jL-tuD))BGUPZ!T6oB0|&! z-YwjiL{G>ZBw{JV_5M_qDd6G74NK?Flf(0zgmrg zXkbzoBbCoxVwyswGB2tAu1vr?a7GRSYEMI``|v^;X!&uR*U|F}kI|J8TJCJ39If)N;MuwQOhS!Ctm^|G~~$j@nPZ zVBKPS!3S%-&0?vw)KX*}OIvAc-4hcjsMxaqoauRFGh#!<^TWy literal 0 HcmV?d00001 diff --git a/llvm/test/Bitcode/upgrade-var-annotation.ll b/llvm/test/Bitcode/upgrade-var-annotation.ll new file mode 100644 index 000000000000..30f692cd8db8 --- /dev/null +++ b/llvm/test/Bitcode/upgrade-var-annotation.ll @@ -0,0 +1,15 @@ +; Test upgrade of var.annotation intrinsics. +; +; RUN: llvm-dis < %s.bc | FileCheck %s + + +define void @f(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3) { +;CHECK: @f(i8* [[ARG0:%.*]], i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i32 [[ARG3:%.*]]) + call void @llvm.var.annotation(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3) +;CHECK: call void @llvm.var.annotation(i8* [[ARG0]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null) + ret void +} + +; Function Attrs: nofree nosync nounwind willreturn +declare void @llvm.var.annotation(i8*, i8*, i8*, i32) +; CHECK: declare void @llvm.var.annotation(i8*, i8*, i8*, i32, i8*) diff --git a/llvm/test/Bitcode/upgrade-var-annotation.ll.bc b/llvm/test/Bitcode/upgrade-var-annotation.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..c5f88855fb171b5a787b5637ee62834e30edeca4 GIT binary patch literal 1232 zcmYLJaZDRk7=NXOyFmBu1f1=-Hg{thvJ7@DgI#HBD7O(n5{vo=CL?lPSD9gObhQ*P zOG~?5r;RCtKl%qTBjF$as}aqT5kong6`KW@#h^pdhQg*{q;a?`N}}&N7r*3v@4fH6 z``+*SecyL&{L*GqH2|dm0GF0M*7fc;p1!}odb6S@*sVe~MhgH$i;5Bkl%V}#G_X+Z zlq0R_EF;a98>)!*)lyt<)O_L!YR{Im-Kc1Dljdp;*S%40Hb*KZ8I*X40q=!4_jKi7Ou&t?P=Z~ zQ`<9vm`}teaBQqY5=CrWRQm&xNK5+6OK>+1b1X5$5?cjWaKc7%#jwdx>x^MpFnZYQ z*2S=OjkheQZHr-BL~YF!s{Q@g!~k}BK=L9Tl*G6s_GA7!%>S$uxg8Bk@8QyjPVy$C zu|(*^UIFf9;h#%zmsPvcb5wqg$`tj6K&>*=O31J(l>HRvEot78DC%1>H^XyLwRJ{5 zi5@+ewBA=+ld|@^i;~zYdBw1A01Na>KAq&dDe3RNP3!^U4=2pW;Z`e=qv0-!g03@c zq>RfB<2{Dj2pKoBR6b|;U1jv>dblLtG8?vLoX7qPq+c2hV1YzW_kNsE7%+ZdTotHC zjA1inxS}#DfEhzUD0}B>n9Io4ByUHA94y=Nyfuopa_rl36sa_d@Zi{Z9p+7h&TWx! zcL{Dg;S;ttbV!ii++rcxKFOB-mW6pIe8%#*Ul_yA992*ikqhnj zf0j9vtfYK$oo~tTEm6J&MZLzq4n9Y9sz)_dQSPlnxx&VK#RP9hlqY%1nmoBf-I+V| z$f1&Q|Q9=fJh&Xn(7YXwt| z2S9bE{gOsQ>c4)OY;|uSyCNg>^By!q@@NEvm0v|s`cOk3L90sj%%WoO3=aA)HTVT@ zgD^BS>=S$!hlfn222%q|Hwv7YGdG@Yb~T?jyIjppLZi#o#Ifg1toxK(FqtjQZqDs7 XeJH-~72Fr Date: Tue, 23 Feb 2021 15:57:13 -0800 Subject: [PATCH 168/318] Fix test failures after a92ceea91116e7b95d23eff634507fa2cff86ef2 --- llvm/test/tools/llvm-cov/branch-c-general.test | 12 ++++++------ llvm/test/tools/llvm-cov/branch-logical-mixed.cpp | 4 ++-- llvm/test/tools/llvm-cov/branch-noShowBranch.test | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/test/tools/llvm-cov/branch-c-general.test b/llvm/test/tools/llvm-cov/branch-c-general.test index bbebdd19fbae..33c12d611992 100644 --- a/llvm/test/tools/llvm-cov/branch-c-general.test +++ b/llvm/test/tools/llvm-cov/branch-c-general.test @@ -118,18 +118,18 @@ // REPORT-NEXT: --- // REPORT-NEXT: simple_loops 8 0 100.00% 9 0 100.00% 6 0 100.00% // REPORT-NEXT: conditionals 24 0 100.00% 15 0 100.00% 16 2 87.50% -// REPORT-NEXT: early_exits 20 4 80.00% 25 3 88.00% 16 6 62.50% -// REPORT-NEXT: jumps 39 12 69.23% 48 4 91.67% 26 9 65.38% -// REPORT-NEXT: switches 28 5 82.14% 38 5 86.84% 30 9 70.00% +// REPORT-NEXT: early_exits 20 4 80.00% 25 2 92.00% 16 6 62.50% +// REPORT-NEXT: jumps 39 12 69.23% 48 2 95.83% 26 9 65.38% +// REPORT-NEXT: switches 28 5 82.14% 38 4 89.47% 30 9 70.00% // REPORT-NEXT: big_switch 25 1 96.00% 32 0 100.00% 30 6 80.00% // REPORT-NEXT: boolean_operators 16 0 100.00% 13 0 100.00% 22 2 90.91% // REPORT-NEXT: boolop_loops 19 0 100.00% 14 0 100.00% 16 2 87.50% -// REPORT-NEXT: conditional_operator 4 2 50.00% 8 1 87.50% 4 2 50.00% +// REPORT-NEXT: conditional_operator 4 2 50.00% 8 0 100.00% 4 2 50.00% // REPORT-NEXT: do_fallthrough 9 0 100.00% 12 0 100.00% 6 0 100.00% // REPORT-NEXT: main 1 0 100.00% 16 0 100.00% 0 0 0.00% // REPORT-NEXT: c-general.c:static_func 4 0 100.00% 4 0 100.00% 2 0 100.00% // REPORT-NEXT: --- -// REPORT-NEXT: TOTAL 197 24 87.82% 234 13 94.44% 174 38 78.16% +// REPORT-NEXT: TOTAL 197 24 87.82% 234 8 96.58% 174 38 78.16% // Test file-level report. // RUN: llvm-profdata merge %S/Inputs/branch-c-general.proftext -o %t.profdata @@ -157,7 +157,7 @@ // HTML-INDEX: // HTML-INDEX: 100.00% (12/12) // HTML-INDEX: -// HTML-INDEX: 94.44% (221/234) +// HTML-INDEX: 96.58% (226/234) // HTML-INDEX: // HTML-INDEX: 87.82% (173/197) // HTML-INDEX: diff --git a/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp b/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp index 107ed7778015..f5f787112446 100644 --- a/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp +++ b/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp @@ -84,7 +84,7 @@ int main(int argc, char *argv[]) // REPORT: Name Regions Miss Cover Lines Miss Cover Branches Miss Cover // REPORT-NEXT: --- -// REPORT-NEXT: _Z4funcii 77 9 88.31% 68 10 85.29% 80 32 60.00% +// REPORT-NEXT: _Z4funcii 77 9 88.31% 68 3 95.59% 80 32 60.00% // REPORT-NEXT: main 1 0 100.00% 5 0 100.00% 0 0 0.00% // REPORT-NEXT: --- -// REPORT-NEXT: TOTAL 78 9 88.46% 73 10 86.30% 80 32 60.00% +// REPORT-NEXT: TOTAL 78 9 88.46% 73 3 95.89% 80 32 60.00% diff --git a/llvm/test/tools/llvm-cov/branch-noShowBranch.test b/llvm/test/tools/llvm-cov/branch-noShowBranch.test index a8f12d698933..79069b2f07bf 100644 --- a/llvm/test/tools/llvm-cov/branch-noShowBranch.test +++ b/llvm/test/tools/llvm-cov/branch-noShowBranch.test @@ -20,6 +20,6 @@ // REPORT-NOT: do_fallthrough 9 0 100.00% 12 0 100.00% 6 0 100.00% // REPORT-NOT: main 1 0 100.00% 16 0 100.00% 0 0 0.00% // REPORT-NOT: c-general.c:static_func 4 0 100.00% 4 0 100.00% 2 0 100.00% -// REPORT: TOTAL 197 24 87.82% 234 13 94.44% -// REPORT-NOT: TOTAL 197 24 87.82% 234 13 94.44% 174 38 78.16% +// REPORT: TOTAL 197 24 87.82% 234 8 96.58% +// REPORT-NOT: TOTAL 197 24 87.82% 234 8 96.58% 174 38 78.16% From d56d2c8863b6ae3637b6261c32ea9479d8e1e2d6 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 27 Jan 2021 13:08:24 -0500 Subject: [PATCH 169/318] [libc++] Fix extern template test failing on Windows See https://reviews.llvm.org/D94718#2521489 for details. (cherry picked from commit 90407b16b1d3e38f1360b6a24ceab801ab9cefc1) --- libcxx/test/libcxx/debug/extern-templates.sh.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/libcxx/debug/extern-templates.sh.cpp b/libcxx/test/libcxx/debug/extern-templates.sh.cpp index d5039d4f3029..b2ed6a63d630 100644 --- a/libcxx/test/libcxx/debug/extern-templates.sh.cpp +++ b/libcxx/test/libcxx/debug/extern-templates.sh.cpp @@ -15,7 +15,7 @@ // UNSUPPORTED: libcpp-has-no-localization // RUN: %{cxx} %{flags} %{compile_flags} %s %{link_flags} -fPIC -DTU1 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -shared -o %t.lib -// RUN: %{cxx} %{flags} %{compile_flags} %s %t.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe +// RUN: cd %T && %{cxx} %{flags} %{compile_flags} %s %basename_t.tmp.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe // RUN: %{exec} %t.exe #include From 4918a3d138b907a571f496661b5367e090e1e8bb Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 28 Jan 2021 10:46:22 -0500 Subject: [PATCH 170/318] [libc++] Fix extern-templates.sh.cpp test on Linux (cherry picked from commit bf5941afcda3ac6570ba25165758869287491e0d) --- libcxx/test/libcxx/debug/extern-templates.sh.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/libcxx/debug/extern-templates.sh.cpp b/libcxx/test/libcxx/debug/extern-templates.sh.cpp index b2ed6a63d630..0e19895ba8f0 100644 --- a/libcxx/test/libcxx/debug/extern-templates.sh.cpp +++ b/libcxx/test/libcxx/debug/extern-templates.sh.cpp @@ -15,7 +15,7 @@ // UNSUPPORTED: libcpp-has-no-localization // RUN: %{cxx} %{flags} %{compile_flags} %s %{link_flags} -fPIC -DTU1 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -shared -o %t.lib -// RUN: cd %T && %{cxx} %{flags} %{compile_flags} %s %basename_t.tmp.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe +// RUN: cd %T && %{cxx} %{flags} %{compile_flags} %s ./%basename_t.tmp.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe // RUN: %{exec} %t.exe #include From e0e6b1e39e7e402cd74a8bf98a2728efbe38310e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 24 Feb 2021 09:19:23 -0800 Subject: [PATCH 171/318] ReleaseNotes: add lld/ELF notes Differential Revision: https://reviews.llvm.org/D97113 --- lld/docs/ReleaseNotes.rst | 68 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 7c1cbc4a4c4b..24ed23bb2b7d 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -24,13 +24,77 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- -* ``--error-handling-script`` is added to allow for user-defined handlers upon +* ``--dependency-file`` has been added. (Similar to ``cc -M -MF``.) + (`D82437 `_) +* ``--error-handling-script`` has been added to allow for user-defined handlers upon missing libraries. (`D87758 `_) +* ``--exclude-libs`` can now localize defined version symbols and bitcode referenced libcall symbols. + (`D94280 `_) +* ``--gdb-index`` now works with DWARF v5 and ``--icf={safe,all}``. + (`D85579 `_) + (`D89751 `_) +* ``--gdb-index --emit-relocs`` can now be used together. + (`D94354 `_) +* ``--icf={safe,all}`` conservatively no longer fold text sections with LSDA. + Previously ICF on ``-fexceptions`` code could be unsafe. + (`D84610 `_) +* ``--icf={safe,all}`` can now fold two sections with relocations referencing aliased symbols. + (`D88830 `_) +* ``--lto-pseudo-probe-for-profiling`` has been added. + (`D95056 `_) +* ``--no-lto-whole-program-visibility`` has been added. + (`D92060 `_) +* ``--oformat-binary`` has been fixed to respect LMA. + (`D85086 `_) +* ``--reproduce`` includes ``--lto-sample-profile``, ``--just-symbols``, ``--call-graph-ordering-file``, ``--retain-symbols-file`` files. +* ``-r --gc-sections`` is now supported. + (`D84131 `_) +* A ``-u`` specified symbol will no longer change the binding to ``STB_WEAK``. + (`D88945 `_) +* ``--wrap`` support has been improved. + + If ``foo`` is not referenced, there is no longer an undefined symbol ``__wrap_foo``. + + If ``__real_foo`` is not referenced, there is no longer an undefined symbol ``foo``. +* ``SHF_LINK_ORDER`` sections can now have zero ``sh_link`` values. +* ``SHF_LINK_ORDER`` and non-``SHF_LINK_ORDER`` sections can now be mixed within an input section description. + (`D84001 `_) +* ``LOG2CEIL`` is now supported in linker scripts. + (`D84054 `_) +* ``DEFINED`` has been fixed to check whether the symbol is defined. + (`D83758 `_) +* An input section description may now have multiple ``SORT_*``. + The matched sections are ordered by radix sort with the keys being ``(SORT*, --sort-section, input order)``. + (`D91127 `_) +* Users can now provide a GNU style linker script to convert ``.ctors`` into ``.init_array``. + (`D91187 `_) +* An empty output section can now be discarded even if it is assigned to a program header. + (`D92301 `_) +* Non-``SHF_ALLOC`` sections now have larger file offsets than ``SHF_ALLOC`` sections. + (`D85867 `_) +* Some symbol versioning improvements. + + Defined ``foo@@v1`` now resolve undefined ``foo@v1`` (`D92259 `_) + + Undefined ``foo@v1`` now gets an error (`D92260 `_) +* The AArch64 port now has support for ``STO_AARCH64_VARIANT_PCS`` and ``DT_AARCH64_VARIANT_PCS``. + (`D93045 `_) +* The AArch64 port now has support for ``R_AARCH64_LD64_GOTPAGE_LO15``. +* The PowerPC64 port now detects missing R_PPC64_TLSGD/R_PPC64_TLSLD and disables TLS relaxation. + This allows linking with object files produced by very old IBM XL compilers. + (`D92959 `_) +* Many PowerPC PC-relative relocations are now supported. +* ``R_PPC_ADDR24`` and ``R_PPC64_ADDR16_HIGH`` are now supported. +* powerpcle is now supported. Tested with FreeBSD loader and freestanding. + (`D93917 `_) +* RISC-V: the first ``SHT_RISCV_ATTRIBUTES`` section is now retained. + (`D86309 `_) +* LTO pipeline now defaults to the new PM if the CMake variable ``ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER`` is on. + (`D92885 `_) Breaking changes ---------------- -* ... +* A COMMON symbol can now cause the fetch of an archive providing a ``STB_GLOBAL`` definition. + This behavior follows GNU ld newer than December 1999. + If you see ``duplicate symbol`` errors with the new behavior, check out `PR49226 `_. + (`D86142 `_) COFF Improvements ----------------- From 98f06b16a313ece593f5711778d7da9037f3a2ef Mon Sep 17 00:00:00 2001 From: Pavel Iliin Date: Thu, 25 Feb 2021 22:51:09 +0000 Subject: [PATCH 172/318] [AArch64][Docs] Release notes 12.x on outline atomics Description for AArch64 -moutline-atomics, -mno-outline-atomics options added to release notes. Differential Revision: https://reviews.llvm.org/D97510 --- clang/docs/ReleaseNotes.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a43cc33988ab..64f737ff488f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -73,6 +73,15 @@ New Compiler Flags - ... +- AArch64 options ``-moutline-atomics``, ``-mno-outline-atomics`` to enable + and disable calls to helper functions implementing atomic operations. These + out-of-line helpers like '__aarch64_cas8_relax' will detect at runtime + AArch64 Large System Extensions (LSE) availability and either use their + atomic instructions, or falls back to LL/SC loop. These options do not apply + if the compilation target supports LSE. Atomic instructions are used directly + in that case. The option's behaviour mirrors GCC, the helpers are implemented + both in compiler-rt and libgcc. + - -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo for contents of a precompiled header in a separate object file. This object file needs to be linked in, but its contents do not need to be generated From c637d4d136fd476d4a7418f5ecb76b80bcb6f8fc Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 23 Feb 2021 13:20:13 -0500 Subject: [PATCH 173/318] [OpenMP][NVPTX] Fixed a compilation error in deviceRTLs caused by unsupported feature in release verion of LLVM `ptx71` is not supported in release version of LLVM yet. As a result, the support of CUDA 11.2 and CUDA 11.1 caused a compilation error as mentioned in D97004. Since the support in D97004 is just a WA for releease, and we'll not use it in the near future, using `ptx70` for CUDA 11 is feasible. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D97195 (cherry picked from commit f6c2984a090e78947f75e096d43b476bf2ae73eb) --- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 5478cd3f6aea..806a887cc2d8 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -153,7 +153,7 @@ add_custom_target(omptarget-nvptx-bc) # This map is from clang/lib/Driver/ToolChains/Cuda.cpp. # The last element is the default case. set(cuda_version_list 112 111 110 102 101 100 92 91 90 80) -set(ptx_feature_list 71 71 70 65 64 63 61 61 60 42) +set(ptx_feature_list 70 70 70 65 64 63 61 61 60 42) # The following two lines of ugly code is not needed when the minimal CMake # version requirement is 3.17+. list(LENGTH cuda_version_list num_version_supported) From 692808e5af8338f5a109a64b5b9d75d05ec6f590 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 28 Feb 2021 10:17:10 -0500 Subject: [PATCH 174/318] [InstCombine] avoid infinite loop in demanded bits for select https://llvm.org/PR49205 (cherry picked from commit 9502061bcc86982641772f45b7e7a0eb7437f054) --- .../InstCombineSimplifyDemanded.cpp | 8 +++- .../InstCombine/select-imm-canon.ll | 38 +++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index c265516213aa..16efe863779a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -345,10 +345,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return false; // Get the constant out of the ICmp, if there is one. + // Only try this when exactly 1 operand is a constant (if both operands + // are constant, the icmp should eventually simplify). Otherwise, we may + // invert the transform that reduces set bits and infinite-loop. + Value *X; const APInt *CmpC; ICmpInst::Predicate Pred; - if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) || - CmpC->getBitWidth() != SelC->getBitWidth()) + if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) || + isa(X) || CmpC->getBitWidth() != SelC->getBitWidth()) return ShrinkDemandedConstant(I, OpNo, DemandedMask); // If the constant is already the same as the ICmp, leave it as-is. diff --git a/llvm/test/Transforms/InstCombine/select-imm-canon.ll b/llvm/test/Transforms/InstCombine/select-imm-canon.ll index e230b3b92777..fec6d693954a 100644 --- a/llvm/test/Transforms/InstCombine/select-imm-canon.ll +++ b/llvm/test/Transforms/InstCombine/select-imm-canon.ll @@ -87,3 +87,41 @@ define i8 @original_logical(i32 %A, i32 %B) { %conv7 = trunc i32 %spec.select.i to i8 ret i8 %conv7 } + +; This would infinite loop because we have potentially opposing +; constant transforms on degenerate (unsimplified) cmps. + +define i32 @PR49205(i32 %t0, i1 %b) { +; CHECK-LABEL: @PR49205( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: br i1 [[B:%.*]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.cond + +for.cond: + %s = phi i32 [ 7, %entry ], [ %add, %for.body ] + br i1 %b, label %for.body, label %for.end + +for.body: + %div = add i32 %t0, undef + %add = add nsw i32 %div, 1 + br label %for.cond + +for.end: + %cmp6 = icmp ne i32 %s, 4 + %conv = zext i1 %cmp6 to i32 + %and7 = and i32 %s, %conv + %sub = sub i32 %s, %and7 + %cmp9 = icmp ne i32 %sub, 4 + %conv10 = zext i1 %cmp9 to i32 + %sub11 = sub i32 %conv10, %sub + %and = and i32 %sub11, 1 + ret i32 %and +} From f73ba0f3582ba33984ad996c124d106a9737cd90 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 27 Feb 2021 09:09:03 -0500 Subject: [PATCH 175/318] [SimplifyCFG] avoid illegal phi with both poison and undef In the example based on: https://llvm.org/PR49218 ...we are crashing because poison is a subclass of undef, so we merge blocks and create: PHI node has multiple entries for the same basic block with different incoming values! %k3 = phi i64 [ poison, %entry ], [ %k3, %g ], [ undef, %entry ] If both poison and undef values are incoming, we soften the poison values to undef. Differential Revision: https://reviews.llvm.org/D97495 (cherry picked from commit 356cdabd3a9e0ff919ea2c1a35c8706ecb915297) --- llvm/lib/Transforms/Utils/Local.cpp | 24 ++- .../Transforms/SimplifyCFG/poison-merge.ll | 200 ++++++++++++++++++ 2 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/poison-merge.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index d055f3dd3084..ae26058c210c 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -918,6 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN, /// \param IncomingValues A map from block to value. static void replaceUndefValuesInPhi(PHINode *PN, const IncomingValueMap &IncomingValues) { + SmallVector TrueUndefOps; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); @@ -925,10 +926,31 @@ static void replaceUndefValuesInPhi(PHINode *PN, BasicBlock *BB = PN->getIncomingBlock(i); IncomingValueMap::const_iterator It = IncomingValues.find(BB); - if (It == IncomingValues.end()) continue; + // Keep track of undef/poison incoming values. Those must match, so we fix + // them up below if needed. + // Note: this is conservatively correct, but we could try harder and group + // the undef values per incoming basic block. + if (It == IncomingValues.end()) { + TrueUndefOps.push_back(i); + continue; + } + + // There is a defined value for this incoming block, so map this undef + // incoming value to the defined value. PN->setIncomingValue(i, It->second); } + + // If there are both undef and poison values incoming, then convert those + // values to undef. It is invalid to have different values for the same + // incoming block. + unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) { + return isa(PN->getIncomingValue(i)); + }); + if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) { + for (unsigned i : TrueUndefOps) + PN->setIncomingValue(i, UndefValue::get(PN->getType())); + } } /// Replace a value flowing from a block to a phi with diff --git a/llvm/test/Transforms/SimplifyCFG/poison-merge.ll b/llvm/test/Transforms/SimplifyCFG/poison-merge.ll new file mode 100644 index 000000000000..93d9b0b299a6 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/poison-merge.ll @@ -0,0 +1,200 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=0 < %s | FileCheck %s + +; Merge 2 undefined incoming values. + +define i32 @undef_merge(i32 %x) { +; CHECK-LABEL: @undef_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 4, label [[G:%.*]] +; CHECK-NEXT: i32 12, label [[G]] +; CHECK-NEXT: ] +; CHECK: g: +; CHECK-NEXT: [[K3:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ undef, [[ENTRY]] ] +; CHECK-NEXT: br label [[G]] +; CHECK: exit: +; CHECK-NEXT: ret i32 undef +; +entry: + switch i32 %x, label %exit [ + i32 4, label %loop + i32 12, label %g + ] + +loop: + %k2 = phi i64 [ %k3, %g ], [ undef, %entry ] + br label %g + +g: + %k3 = phi i64 [ %k2, %loop ], [ undef, %entry ] + br label %loop + +exit: + ret i32 undef +} + +; Merge 2 poison incoming values. + +define i32 @poison_merge(i32 %x) { +; CHECK-LABEL: @poison_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 4, label [[G:%.*]] +; CHECK-NEXT: i32 12, label [[G]] +; CHECK-NEXT: ] +; CHECK: g: +; CHECK-NEXT: [[K3:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ poison, [[ENTRY]] ] +; CHECK-NEXT: br label [[G]] +; CHECK: exit: +; CHECK-NEXT: ret i32 undef +; +entry: + switch i32 %x, label %exit [ + i32 4, label %loop + i32 12, label %g + ] + +loop: + %k2 = phi i64 [ %k3, %g ], [ poison, %entry ] + br label %g + +g: + %k3 = phi i64 [ %k2, %loop ], [ poison, %entry ] + br label %loop + +exit: + ret i32 undef +} + +; Merge equal defined incoming values. + +define i32 @defined_merge(i32 %x) { +; CHECK-LABEL: @defined_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 4, label [[G:%.*]] +; CHECK-NEXT: i32 12, label [[G]] +; CHECK-NEXT: ] +; CHECK: g: +; CHECK-NEXT: [[K3:%.*]] = phi i64 [ 42, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ 42, [[ENTRY]] ] +; CHECK-NEXT: br label [[G]] +; CHECK: exit: +; CHECK-NEXT: ret i32 undef +; +entry: + switch i32 %x, label %exit [ + i32 4, label %loop + i32 12, label %g + ] + +loop: + %k2 = phi i64 [ %k3, %g ], [ 42, %entry ] + br label %g + +g: + %k3 = phi i64 [ %k2, %loop ], [ 42, %entry ] + br label %loop + +exit: + ret i32 undef +} + +; Merge defined and undef incoming values. + +define i32 @defined_and_undef_merge(i32 %x) { +; CHECK-LABEL: @defined_and_undef_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 4, label [[G:%.*]] +; CHECK-NEXT: i32 12, label [[G]] +; CHECK-NEXT: ] +; CHECK: g: +; CHECK-NEXT: [[K3:%.*]] = phi i64 [ 42, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ 42, [[ENTRY]] ] +; CHECK-NEXT: br label [[G]] +; CHECK: exit: +; CHECK-NEXT: ret i32 undef +; +entry: + switch i32 %x, label %exit [ + i32 4, label %loop + i32 12, label %g + ] + +loop: + %k2 = phi i64 [ %k3, %g ], [ undef, %entry ] + br label %g + +g: + %k3 = phi i64 [ %k2, %loop ], [ 42, %entry ] + br label %loop + +exit: + ret i32 undef +} + +; Merge defined and poison incoming values. + +define i32 @defined_and_poison_merge(i32 %x) { +; CHECK-LABEL: @defined_and_poison_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 4, label [[G:%.*]] +; CHECK-NEXT: i32 12, label [[G]] +; CHECK-NEXT: ] +; CHECK: g: +; CHECK-NEXT: [[K3:%.*]] = phi i64 [ 42, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ 42, [[ENTRY]] ] +; CHECK-NEXT: br label [[G]] +; CHECK: exit: +; CHECK-NEXT: ret i32 undef +; +entry: + switch i32 %x, label %exit [ + i32 4, label %loop + i32 12, label %g + ] + +loop: + %k2 = phi i64 [ %k3, %g ], [ poison, %entry ] + br label %g + +g: + %k3 = phi i64 [ %k2, %loop ], [ 42, %entry ] + br label %loop + +exit: + ret i32 undef +} + +; Do not crash trying to merge poison and undef into a single phi. + +define i32 @PR49218(i32 %x) { +; CHECK-LABEL: @PR49218( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 4, label [[G:%.*]] +; CHECK-NEXT: i32 12, label [[G]] +; CHECK-NEXT: ] +; CHECK: g: +; CHECK-NEXT: [[K3:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ undef, [[ENTRY]] ] +; CHECK-NEXT: br label [[G]] +; CHECK: exit: +; CHECK-NEXT: ret i32 undef +; +entry: + switch i32 %x, label %exit [ + i32 4, label %loop + i32 12, label %g + ] + +loop: + %k2 = phi i64 [ %k3, %g ], [ undef, %entry ] + br label %g + +g: + %k3 = phi i64 [ %k2, %loop ], [ poison, %entry ] + br label %loop + +exit: + ret i32 undef +} From 344216979213d841b72e44891871c031db622f5d Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 1 Mar 2021 12:17:10 -0800 Subject: [PATCH 176/318] Revert "[c++20] Mark class type NTTPs as done and start defining the feature test macro." Some of the parts of this work were reverted; stop defining the feature test macro for now. This reverts commit b4c63ef6dd90dba9af26a111c9a78b121c5284b1. (cherry picked from commit 564f5b0734bd5d265a0046e5ca9d08ae5bc303eb) --- clang/lib/Frontend/InitPreprocessor.cpp | 2 +- clang/test/Lexer/cxx-features.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index d47ad1b74649..c64a912ce919 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -565,7 +565,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_aggregate_bases", "201603L"); Builder.defineMacro("__cpp_structured_bindings", "201606L"); Builder.defineMacro("__cpp_nontype_template_args", - LangOpts.CPlusPlus20 ? "201911L" : "201411L"); + "201411L"); // (not latest) Builder.defineMacro("__cpp_fold_expressions", "201603L"); Builder.defineMacro("__cpp_guaranteed_copy_elision", "201606L"); Builder.defineMacro("__cpp_nontype_template_parameter_auto", "201606L"); diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index f57faed4ed90..2f46f354ee83 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -181,7 +181,8 @@ #error "wrong value for __cpp_structured_bindings" #endif -#if check(nontype_template_args, 0, 0, 0, 201411, 201911, 201911) +#if check(nontype_template_args, 0, 0, 0, 201411, 201411, 201411) +// FIXME: 201911 in C++20 #error "wrong value for __cpp_nontype_template_args" #endif From 9760b282ff03ef581d51b3d74d5b33d09b463272 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 28 Feb 2021 11:23:46 -0800 Subject: [PATCH 177/318] [DAGCombiner][X86] Don't peek through ANDs on the shift amount in matchRotateSub when called from MatchFunnelPosNeg. Peeking through AND is only valid if the input to both shifts is the same. If the inputs are different, then the original pattern ORs the two values when the masked shift amount is 0. This is ok if the values are the same since the OR would be a NOP which is why its ok for rotate. Fixes PR49365 and reverts PR34641 Differential Revision: https://reviews.llvm.org/D97637 (cherry picked from commit 5de09ef02e24d234d9fc0cd1c6dfe18a1bb784b0) --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 +++++-- llvm/test/CodeGen/X86/shift-double.ll | 44 ++++++++++++------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 89670d708264..6a6f83827f72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6517,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. +// +// The IsRotate flag should be set when the LHS of both shifts is the same. +// Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool IsRotate) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -6550,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // always invokes undefined behavior for 32-bit X. // // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. + // + // NOTE: We can only do this when matching an AND and not a general + // funnel shift. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); unsigned Bits = Log2_64(EltSize); @@ -6641,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, + /*IsRotate*/ true)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); @@ -6670,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // fold (or (shl x0, (*ext (sub 32, y))), // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) - if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); diff --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll index c0872957f2b8..1213a80921d2 100644 --- a/llvm/test/CodeGen/X86/shift-double.ll +++ b/llvm/test/CodeGen/X86/shift-double.ll @@ -480,23 +480,31 @@ define i32 @test18(i32 %hi, i32 %lo, i32 %bits) nounwind { ret i32 %sh } -; PR34641 - Masked Shift Counts +; These are not valid shld/shrd patterns. When the shift amount modulo +; the bitwidth is zero, the result should be an OR of both operands not a +; shift. -define i32 @shld_safe_i32(i32, i32, i32) { -; X86-LABEL: shld_safe_i32: +define i32 @not_shld_i32(i32, i32, i32) { +; X86-LABEL: not_shld_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: shll %cl, %edx +; X86-NEXT: negb %cl +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: retl ; -; X64-LABEL: shld_safe_i32: +; X64-LABEL: not_shld_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: shll %cl, %edi +; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shldl %cl, %esi, %eax +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %4 = and i32 %2, 31 %5 = shl i32 %0, %4 @@ -507,21 +515,27 @@ define i32 @shld_safe_i32(i32, i32, i32) { ret i32 %9 } -define i32 @shrd_safe_i32(i32, i32, i32) { -; X86-LABEL: shrd_safe_i32: +define i32 @not_shrd_i32(i32, i32, i32) { +; X86-LABEL: not_shrd_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrdl %cl, %edx, %eax +; X86-NEXT: shrl %cl, %edx +; X86-NEXT: negb %cl +; X86-NEXT: shll %cl, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: retl ; -; X64-LABEL: shrd_safe_i32: +; X64-LABEL: not_shrd_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrdl %cl, %esi, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %4 = and i32 %2, 31 %5 = lshr i32 %0, %4 From 4ed9f17e9390a6845cfd8a235f2078cb9b0e4719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirst=C3=B3f=20Umann?= Date: Fri, 5 Feb 2021 19:57:09 +0100 Subject: [PATCH 178/318] [analyzer] Add 12.0.0 release notes Differential Revision: https://reviews.llvm.org/D96163 --- clang/docs/ReleaseNotes.rst | 33 +++++++++++++++++++++++++++++++- clang/docs/analyzer/checkers.rst | 2 ++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 64f737ff488f..7f4b675b68f9 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -377,7 +377,38 @@ libclang Static Analyzer --------------- -- ... +.. 3ff220de9009 [analyzer][StdLibraryFunctionsChecker] Add POSIX networking functions +.. ...And a million other patches. +- Improve the analyzer's understanding of several POSIX functions. + +.. https://reviews.llvm.org/D86533#2238207 +- Greatly improved the analyzer’s constraint solver by better understanding + when constraints are imposed on multiple symbolic values that are known to be + equal or known to be non-equal. It will now also efficiently reject impossible + if-branches between known comparison expressions. (Incorrectly stated as a + 11.0.0 feature in the previous release notes) + +.. 820e8d8656ec [Analyzer][WebKit] UncountedLambdaCaptureChecker +- New checker: :ref:`webkit.UncountedLambdaCapturesChecker` + is a WebKit coding convention checker that flags raw pointers to + reference-counted objects captured by lambdas and suggests using intrusive + reference-counting smart pointers instead. + +.. 8a64689e264c [Analyzer][WebKit] UncountedLocalVarsChecker +- New checker: :ref:`alpha.webkit.UncountedLocalVarsChecker` + is a WebKit coding convention checker that intends to make sure that any + uncounted local variable is backed by a ref-counted object with lifetime that + is strictly larger than the scope of the uncounted local variable. + +.. i914f6c4ff8a4 [StaticAnalyzer] Support struct annotations in FuchsiaHandleChecker +- ``fuchia.HandleChecker`` now recognizes handles in structs; All the handles + referenced by the structure (direct value or ptr) would be treated as + containing the release/use/acquire annotations directly. + +.. 8deaec122ec6 [analyzer] Update Fuchsia checker to catch releasing unowned handles. +- Fuchsia checkers can detect the release of an unowned handle. + +- Numerous fixes and improvements to bug report generation. .. _release-notes-ubsan: diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index b47be97eef96..d851845396ac 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2538,6 +2538,8 @@ We also define a set of safe transformations which if passed a safe value as an - casts - unary operators like ``&`` or ``*`` +.. _alpha-webkit-UncountedLocalVarsChecker: + alpha.webkit.UncountedLocalVarsChecker """""""""""""""""""""""""""""""""""""" The goal of this rule is to make sure that any uncounted local variable is backed by a ref-counted object with lifetime that is strictly larger than the scope of the uncounted local variable. To be on the safe side we require the scope of an uncounted variable to be embedded in the scope of ref-counted object that backs it. From 99350dcc3f5b46d564338c0067c2cbd139b841ee Mon Sep 17 00:00:00 2001 From: "Peyton, Jonathan L" Date: Tue, 2 Mar 2021 07:44:15 -0600 Subject: [PATCH 179/318] [OpenMP] Fix clang-cl build error regarding TSX intrinsics Fix for https://bugs.llvm.org/show_bug.cgi?id=49339 The CMake check for the RTM intrinsics needs the -mrtm flag to be set during the test. This way clang-cl correctly detects it has the _xbegin() intrinsic. Otherwise, the CMake check fails. Differential Revision: https://reviews.llvm.org/D97413 (cherry picked from commit e83380fccc2cc9842bdcfd268efddf6fce90544d) --- openmp/runtime/cmake/config-ix.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index f06fda6c0221..ed62aefccd14 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -172,6 +172,10 @@ if (IA32 OR INTEL64) } int main() { int a = __kmp_umwait(0, 1000); return a; }") check_cxx_source_compiles("${source_code}" LIBOMP_HAVE_WAITPKG_INTRINSICS) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + if (LIBOMP_HAVE_MRTM_FLAG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mrtm") + endif() set(source_code "// check for attribute rtm and rtm intrinsics #ifdef IMMINTRIN_H #include @@ -188,6 +192,7 @@ if (IA32 OR INTEL64) int main() { int a = __kmp_xbegin(); return a; }") check_cxx_source_compiles("${source_code}" LIBOMP_HAVE_RTM_INTRINSICS) set(CMAKE_REQUIRED_DEFINITIONS) + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) endif() # Find perl executable From 52510d84802b55ecd80a904ca259adfecffc5be1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 1 Mar 2021 21:37:26 +0100 Subject: [PATCH 180/318] [GlobalISel] Bail on G_PHI narrowing of odd types (PR48188) The current narrowing code for G_PHI can only handle the case where the size is a multiple of the narrow size. If this is not the case, fall back to SDAG instead of asserting. Original patch by shepmaster. Differential Revision: https://reviews.llvm.org/D92446 (cherry picked from commit c35761db0f078f74550ef56bfc0745c162d76967) --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 5 ++++ llvm/test/CodeGen/AArch64/pr48188.ll | 27 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/pr48188.ll diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e7f40523efaf..3178ee16af2b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_PHI: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + unsigned NumParts = SizeOp0 / NarrowSize; SmallVector DstRegs(NumParts); SmallVector, 2> SrcRegs(MI.getNumOperands() / 2); diff --git a/llvm/test/CodeGen/AArch64/pr48188.ll b/llvm/test/CodeGen/AArch64/pr48188.ll new file mode 100644 index 000000000000..2da02e640ec1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr48188.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; GlobalISel cannot legalize this phi, so we fall back to SDAG. +define void @test() nounwind { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 // =16 +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: str x1, [sp] // 8-byte Folded Spill +; CHECK-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr x1, [sp] // 8-byte Folded Reload +; CHECK-NEXT: str x1, [sp] // 8-byte Folded Spill +; CHECK-NEXT: str x0, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: b .LBB0_1 +entry: + br label %loop + +loop: + %p = phi i72 [ 0, %entry ], [ %p, %loop ] + br label %loop +} From d24e102ba2665dc6cd467f467813fba9c8261133 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 24 Feb 2021 12:37:22 -0500 Subject: [PATCH 181/318] [OpenMP] Fixed a crash when offloading to x86_64 with target nowait PR#49334 reports a crash when offloading to x86_64 with `target nowait`, which is caused by referencing a nullptr. The root cause of the issue is, when pushing a hidden helper task in `__kmp_push_task`, it also maps the gtid to its shadow gtid, which is wrong. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D97329 (cherry picked from commit e5da63d5a9ede1fb6d8aa18cfd44533ead128738) --- .../libomptarget/test/offloading/bug49334.cpp | 148 ++++++++++++++++++ openmp/runtime/src/kmp_tasking.cpp | 3 +- 2 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 openmp/libomptarget/test/offloading/bug49334.cpp diff --git a/openmp/libomptarget/test/offloading/bug49334.cpp b/openmp/libomptarget/test/offloading/bug49334.cpp new file mode 100644 index 000000000000..b26cd7b2b338 --- /dev/null +++ b/openmp/libomptarget/test/offloading/bug49334.cpp @@ -0,0 +1,148 @@ +// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda + +#include +#include +#include +#include + +class BlockMatrix { +private: + const int rowsPerBlock; + const int colsPerBlock; + const long nRows; + const long nCols; + const int nBlocksPerRow; + const int nBlocksPerCol; + std::vector>> Blocks; + +public: + BlockMatrix(const int _rowsPerBlock, const int _colsPerBlock, + const long _nRows, const long _nCols) + : rowsPerBlock(_rowsPerBlock), colsPerBlock(_colsPerBlock), nRows(_nRows), + nCols(_nCols), nBlocksPerRow(_nRows / _rowsPerBlock), + nBlocksPerCol(_nCols / _colsPerBlock), Blocks(nBlocksPerCol) { + for (int i = 0; i < nBlocksPerCol; i++) { + for (int j = 0; j < nBlocksPerRow; j++) { + Blocks[i].emplace_back(new float[_rowsPerBlock * _colsPerBlock]); + } + } + }; + + // Initialize the BlockMatrix from 2D arrays + void Initialize(const std::vector &matrix) { + for (int i = 0; i < nBlocksPerCol; i++) + for (int j = 0; j < nBlocksPerRow; j++) { + float *CurrBlock = GetBlock(i, j); + for (int ii = 0; ii < colsPerBlock; ++ii) + for (int jj = 0; jj < rowsPerBlock; ++jj) { + int curri = i * colsPerBlock + ii; + int currj = j * rowsPerBlock + jj; + CurrBlock[ii + jj * colsPerBlock] = matrix[curri + currj * nCols]; + } + } + } + + long Compare(const std::vector &matrix) const { + long fail = 0; + for (int i = 0; i < nBlocksPerCol; i++) + for (int j = 0; j < nBlocksPerRow; j++) { + float *CurrBlock = GetBlock(i, j); + for (int ii = 0; ii < colsPerBlock; ++ii) + for (int jj = 0; jj < rowsPerBlock; ++jj) { + int curri = i * colsPerBlock + ii; + int currj = j * rowsPerBlock + jj; + float m_value = matrix[curri + currj * nCols]; + float bm_value = CurrBlock[ii + jj * colsPerBlock]; + if (bm_value != m_value) { + fail++; + } + } + } + return fail; + } + + float *GetBlock(int i, int j) const { + assert(i < nBlocksPerCol && j < nBlocksPerRow && "Accessing outside block"); + return Blocks[i][j].get(); + } +}; + +constexpr const int BS = 256; +constexpr const int N = 1024; + +int BlockMatMul_TargetNowait(BlockMatrix &A, BlockMatrix &B, BlockMatrix &C) { +#pragma omp parallel +#pragma omp master + for (int i = 0; i < N / BS; ++i) + for (int j = 0; j < N / BS; ++j) { + float *BlockC = C.GetBlock(i, j); + for (int k = 0; k < N / BS; ++k) { + float *BlockA = A.GetBlock(i, k); + float *BlockB = B.GetBlock(k, j); +// clang-format off +#pragma omp target depend(in: BlockA[0], BlockB[0]) depend(inout: BlockC[0]) \ + map(to: BlockA[:BS * BS], BlockB[:BS * BS]) \ + map(tofrom: BlockC[:BS * BS]) nowait +// clang-format on +#pragma omp parallel for + for (int ii = 0; ii < BS; ii++) + for (int jj = 0; jj < BS; jj++) { + for (int kk = 0; kk < BS; ++kk) + BlockC[ii + jj * BS] += + BlockA[ii + kk * BS] * BlockB[kk + jj * BS]; + } + } + } + return 0; +} + +void Matmul(const std::vector &a, const std::vector &b, + std::vector &c) { + for (int i = 0; i < N; ++i) { + for (int j = 0; j < N; ++j) { + float sum = 0.0; + for (int k = 0; k < N; ++k) { + sum = sum + a[i * N + k] * b[k * N + j]; + } + c[i * N + j] = sum; + } + } +} + +int main(int argc, char *argv[]) { + std::vector a(N * N); + std::vector b(N * N); + std::vector c(N * N, 0.0); + + for (int i = 0; i < N; ++i) { + for (int j = 0; j < N; ++j) { + a[i * N + j] = b[i * N + j] = i + j % 100; + } + } + + auto BlockedA = BlockMatrix(BS, BS, N, N); + BlockedA.Initialize(a); + BlockedA.Compare(a); + auto BlockedB = BlockMatrix(BS, BS, N, N); + BlockedB.Initialize(b); + BlockedB.Compare(b); + + Matmul(a, b, c); + + auto BlockedC = BlockMatrix(BS, BS, N, N); + BlockMatMul_TargetNowait(BlockedA, BlockedB, BlockedC); + + if (BlockedC.Compare(c) > 0) { + return 1; + } + + std::cout << "PASS\n"; + + return 0; +} + +// CHECK: PASS diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d7021128dbd..4bcd11946694 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -326,7 +326,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - if (taskdata->td_flags.hidden_helper) { + // We don't need to map to shadow gtid if it is already hidden helper thread + if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) { gtid = KMP_GTID_TO_SHADOW_GTID(gtid); thread = __kmp_threads[gtid]; } From 46a1b0655666e21c56fa79560e9baee87405d4f4 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Fri, 5 Mar 2021 16:01:45 +0100 Subject: [PATCH 182/318] [AArch64] Legalize horizontal fmax/fmin reductions on f16 vectors Expand the horizontal reduction during the instruction selection phase, but only if the target doesn't support the full fp16 instruction set. Fixes https://bugs.llvm.org/show_bug.cgi?id=49401 Reviewed By: aemerson Differential Revision: https://reviews.llvm.org/D97840 (cherry picked from commit 8725b24c6d4abaa97425e704652a13dacb35fe3f) --- .../Target/AArch64/AArch64ISelLowering.cpp | 7 ++- .../AArch64/vecreduce-fmax-legalization.ll | 62 ++++++++++++++++++- .../AArch64/vecreduce-fmin-legalization.ll | 62 ++++++++++++++++++- 3 files changed, 126 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1be09186dc0a..1451151f4dc5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1017,11 +1017,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Vector reductions for (MVT VT : { MVT::v4f16, MVT::v2f32, MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { - setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); - setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); + if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { + setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); - if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); + } } for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index f1ebd8fa85ea..d26db2aefee0 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a) +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a) declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a) declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) @@ -44,6 +46,64 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ret fp128 %b } +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v4f16: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-NEXT: mov h1, v0.h[3] +; CHECK-NOFP-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v4f16: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: fmaxnmv h0, v0.4h +; CHECK-FP-NEXT: ret + %b = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) + ret half %b +} + +define half @test_v4f16_ninf(<4 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v4f16_ninf: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-NEXT: mov h1, v0.h[3] +; CHECK-NOFP-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v4f16_ninf: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: fmaxnmv h0, v0.4h +; CHECK-FP-NEXT: ret + %b = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) + ret half %b +} + define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll index 4129fa80b13e..52d6e9773ab2 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP declare half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a) declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a) +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a) declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a) declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a) @@ -44,6 +46,64 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { ret fp128 %b } +define half @test_v4f16(<4 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v4f16: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-NEXT: mov h1, v0.h[3] +; CHECK-NOFP-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fminnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fminnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v4f16: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: fminnmv h0, v0.4h +; CHECK-FP-NEXT: ret + %b = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) + ret half %b +} + +define half @test_v4f16_ninf(<4 x half> %a) nounwind { +; CHECK-NOFP-LABEL: test_v4f16_ninf: +; CHECK-NOFP: // %bb.0: +; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-NEXT: mov h1, v0.h[3] +; CHECK-NOFP-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s3, h3 +; CHECK-NOFP-NEXT: fminnm s0, s0, s3 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s2, h2 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fminnm s0, s0, s2 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt s1, h1 +; CHECK-NOFP-NEXT: fminnm s0, s0, s1 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: ret +; +; CHECK-FP-LABEL: test_v4f16_ninf: +; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: fminnmv h0, v0.4h +; CHECK-FP-NEXT: ret + %b = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) + ret half %b +} + define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: From f8b32989241cca87a8690c8cc404f06ce1f90e4c Mon Sep 17 00:00:00 2001 From: Nathan James Date: Wed, 3 Mar 2021 16:01:12 +0000 Subject: [PATCH 183/318] [clang-tidy] Deprecate readability-deleted-default check ... For removal in next release cycle. The clang warning that does the same thing is enabled by default and typically emits better diagnostics making this check surplus to requirements. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D97491 (cherry picked from commit 19aefd2d5dc3a8d3b8e81219973828170b7fcd2c) --- clang-tools-extra/docs/ReleaseNotes.rst | 10 ++++++++++ .../checks/readability-deleted-default.rst | 20 +++---------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index b3c9c829198b..29321bb3eb04 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -358,6 +358,16 @@ Changes in existing checks Added `std::basic_string_view` to default list of ``string``-like types. +Deprecated checks +^^^^^^^^^^^^^^^^^ + +- The :doc:`readability-deleted-default + ` check has been deprecated. + + The clang warning `Wdefaulted-function-deleted + `_ + will diagnose the same issues and is enabled by default. + Improvements to include-fixer ----------------------------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst index 00134eb05484..5f2083e00061 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst @@ -3,20 +3,6 @@ readability-deleted-default =========================== -Checks that constructors and assignment operators marked as ``= default`` are -not actually deleted by the compiler. - -.. code-block:: c++ - - class Example { - public: - // This constructor is deleted because I is missing a default value. - Example() = default; - // This is fine. - Example(const Example& Other) = default; - // This operator is deleted because I cannot be assigned (it is const). - Example& operator=(const Example& Other) = default; - - private: - const int I; - }; +This check has been deprecated prefer to make use of the `Wdefaulted-function-deleted +`_ +flag. From a123beacce408af8c2de1f39d522ac6b6c4b5d1b Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 9 Feb 2021 14:06:17 +0900 Subject: [PATCH 184/318] [LoopVectorize] Fix VPRecipeBuilder::createEdgeMask to correctly generate the mask This patch fixes pr48832 by correctly generating the mask when a poison value is involved. Consider this CFG (which is a part of the input): ``` for.body: ; preds = %for.cond br i1 true, label %cond.false, label %land.rhs land.rhs: ; preds = %for.body br i1 poison, label %cond.end, label %cond.false cond.false: ; preds = %for.body, %land.rhs br label %cond.end cond.end: ; preds = %land.rhs, %cond.false %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ] ``` The path for.body -> land.rhs -> cond.end should be taken when 'select i1 false, i1 poison, i1 false' holds (which means it's never taken); but VPRecipeBuilder::createEdgeMask was emitting 'and i1 false, poison' instead. The former one successfully blocks poison propagation whereas the latter one doesn't, making the condition poison and thus causing the miscompilation. SimplifyCFG has a similar bug (which didn't expose a real-world bug yet), and a patch for this is also ongoing (see https://reviews.llvm.org/D95026). Reviewed By: bjope Differential Revision: https://reviews.llvm.org/D95217 (cherry picked from commit ed253ef77248d91a15b3a1aa36c0b74bed8ec8af) --- .../Vectorize/LoopVectorizationPlanner.h | 4 ++ .../Transforms/Vectorize/LoopVectorize.cpp | 11 ++++- .../LoopVectorize/X86/masked_load_store.ll | 48 +++++++++---------- .../x86-interleaved-accesses-masked-group.ll | 12 ++--- .../LoopVectorize/if-conversion-nest.ll | 2 +- .../LoopVectorize/if-pred-non-void.ll | 2 +- .../Transforms/LoopVectorize/if-reduction.ll | 8 ++-- llvm/test/Transforms/LoopVectorize/pr48832.ll | 40 ++++++++++++++++ .../LoopVectorize/reduction-inloop-pred.ll | 4 +- .../LoopVectorize/reduction-inloop.ll | 4 +- 10 files changed, 93 insertions(+), 42 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/pr48832.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 1795470fa58c..19797e6f7858 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -142,6 +142,10 @@ class VPBuilder { return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); } + VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) { + return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}); + } + //===--------------------------------------------------------------------===// // RAII helpers. //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 47635dbdda02..d36e078444bc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8195,8 +8195,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, if (BI->getSuccessor(0) != Dst) EdgeMask = Builder.createNot(EdgeMask); - if (SrcMask) // Otherwise block in-mask is all-one, no need to AND. - EdgeMask = Builder.createAnd(EdgeMask, SrcMask); + if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND. + // The condition is 'SrcMask && EdgeMask', which is equivalent to + // 'select i1 SrcMask, i1 EdgeMask, i1 false'. + // The select version does not introduce new UB if SrcMask is false and + // EdgeMask is poison. Using 'and' here introduces undefined behavior. + VPValue *False = Plan->getOrAddVPValue( + ConstantInt::getFalse(BI->getCondition()->getType())); + EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False); + } return EdgeMaskCache[Edge] = EdgeMask; } diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index dddedcb77f67..b464389fe393 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -2042,10 +2042,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea ; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], ; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], ; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], -; AVX1-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]] -; AVX1-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]] -; AVX1-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]] -; AVX1-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]] +; AVX1-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer +; AVX1-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer +; AVX1-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer +; AVX1-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) @@ -2166,10 +2166,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea ; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], ; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], ; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], -; AVX2-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]] -; AVX2-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]] -; AVX2-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]] -; AVX2-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]] +; AVX2-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer +; AVX2-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer +; AVX2-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer +; AVX2-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) @@ -2290,10 +2290,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea ; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], ; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], ; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], -; AVX512-NEXT: [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]] -; AVX512-NEXT: [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]] -; AVX512-NEXT: [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]] -; AVX512-NEXT: [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]] +; AVX512-NEXT: [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer +; AVX512-NEXT: [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer +; AVX512-NEXT: [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer +; AVX512-NEXT: [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>* ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> , <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]]) @@ -2459,10 +2459,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea ; AVX1-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], ; AVX1-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], ; AVX1-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], -; AVX1-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]] -; AVX1-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]] -; AVX1-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]] -; AVX1-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]] +; AVX1-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer +; AVX1-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer +; AVX1-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer +; AVX1-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX1-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) @@ -2583,10 +2583,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea ; AVX2-NEXT: [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], ; AVX2-NEXT: [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], ; AVX2-NEXT: [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], -; AVX2-NEXT: [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]] -; AVX2-NEXT: [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]] -; AVX2-NEXT: [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]] -; AVX2-NEXT: [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]] +; AVX2-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer +; AVX2-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer +; AVX2-NEXT: [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer +; AVX2-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX2-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]]) @@ -2707,10 +2707,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea ; AVX512-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], ; AVX512-NEXT: [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], ; AVX512-NEXT: [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], -; AVX512-NEXT: [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]] -; AVX512-NEXT: [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]] -; AVX512-NEXT: [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]] -; AVX512-NEXT: [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]] +; AVX512-NEXT: [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer +; AVX512-NEXT: [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer +; AVX512-NEXT: [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer +; AVX512-NEXT: [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX512-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>* ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> , <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index 285f460d99d5..aa8b1361fe4e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -408,7 +408,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -520,7 +520,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], @@ -615,7 +615,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -727,7 +727,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw i32 [[INDEX]], 3 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], @@ -1535,7 +1535,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], -; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; DISABLED_MASKED_STRIDED: pred.load.if: @@ -1871,7 +1871,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] +; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]] ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison) diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index 0cba3fc20ed9..f218869c1fbe 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -39,7 +39,7 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], ; CHECK-NEXT: [[TMP12:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], ; CHECK-NEXT: [[TMP16:%.*]] = and <4 x i1> [[TMP10]], [[TMP15]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index 308377f06856..b8d9b458aa4c 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -161,7 +161,7 @@ for.cond.cleanup: ; preds = %if.end ; CHECK: %[[CMP1:.+]] = icmp slt <2 x i32> %[[VAL:.+]], ; CHECK: %[[CMP2:.+]] = icmp sge <2 x i32> %[[VAL]], ; CHECK: %[[NOT:.+]] = xor <2 x i1> %[[CMP1]], -; CHECK: %[[AND:.+]] = and <2 x i1> %[[CMP2]], %[[NOT]] +; CHECK: %[[AND:.+]] = select <2 x i1> %[[NOT]], <2 x i1> %[[CMP2]], <2 x i1> zeroinitializer ; CHECK: %[[OR:.+]] = or <2 x i1> %[[AND]], %[[CMP1]] ; CHECK: %[[EXTRACT:.+]] = extractelement <2 x i1> %[[OR]], i32 0 ; CHECK: br i1 %[[EXTRACT]], label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]] diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index a97301659cb9..bde4fbcc9d13 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -610,9 +610,9 @@ for.end: ; preds = %for.body, %entry ; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], %[[V0]], %[[C1]], %[[C2]], %[[C11]] +; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], %[[C21]], %[[C11]] +; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]] ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]] ; CHECK: fadd fast <4 x float> %[[S2]], @@ -678,9 +678,9 @@ for.end: ; preds = %for.inc, %entry ; CHECK-DAG: %[[SUB:.*]] = fsub fast <4 x float> ; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float> ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], %[[C2]], %[[C11]] +; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], %[[C21]], %[[C11]] +; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]] ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]] define float @fcmp_fadd_fsub(float* nocapture readonly %a, i32 %n) nounwind readonly { diff --git a/llvm/test/Transforms/LoopVectorize/pr48832.ll b/llvm/test/Transforms/LoopVectorize/pr48832.ll new file mode 100644 index 000000000000..620da918bb47 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr48832.ll @@ -0,0 +1,40 @@ +; RUN: opt -loop-vectorize -force-vector-width=4 -S -o - < %s | FileCheck %s +%arrayt = type [64 x i32] + +@v_146 = external global %arrayt, align 1 + +; Since the program has well defined behavior, it should not introduce store poison +; CHECK: vector.ph: +; CHECK-NEXT: br label %vector.body +; CHECK: vector.body: +; CHECK: store <4 x i32> zeroinitializer, +; CHECK: br i1 %{{.*}}, label %middle.block, label %vector.body + +define void @foo() { +entry: + br label %for.cond + +for.cond: ; preds = %cond.end, %entry + %storemerge = phi i16 [ 0, %entry ], [ %inc, %cond.end ] + %cmp = icmp slt i16 %storemerge, 15 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br i1 true, label %cond.false, label %land.rhs + +land.rhs: ; preds = %for.body + br i1 poison, label %cond.end, label %cond.false + +cond.false: ; preds = %for.body, %land.rhs + br label %cond.end + +cond.end: ; preds = %land.rhs, %cond.false + %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ] + %arrayidx = getelementptr inbounds %arrayt, %arrayt* @v_146, i16 0, i16 %storemerge + store i32 %cond, i32* %arrayidx, align 1 + %inc = add nsw i16 %storemerge, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index d1b99e4e403b..e8271b9c5984 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -1542,8 +1542,8 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) { ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], ; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], ; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 23bfc39bf646..b295090ca928 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -814,8 +814,8 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) { ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], ; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], ; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], From bff59aca162ef16d7634dc9df39f1f3af31ecb93 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 4 Mar 2021 22:30:38 -0800 Subject: [PATCH 185/318] [TargetLowering] Use HandleSDNodes to prevent nodes from being deleted by recursive calls in getNegatedExpression. For binary or ternary ops we call getNegatedExpression multiple times and then compare costs. While we're doing this we need to hold a node from the first call across the second call, but its not yet attached to the DAG. Its possible the second call creates an identical node and then decides it didn't need it so will try to delete it if it has no uses. This can cause a reference to the node we're holding further up the call stack to become invalidated. To prevent this, we can use a HandleSDNode to artifically give the node a use without connecting it to the DAG. I've used a std::list of HandleSDNodes so we can create handles only when we have a node to hold. HandleSDNode does not have default constructor and cannot be copied or moved. Fixes PR49393. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D97914 (cherry picked from commit 74e6030bcbcc8e628f9a99a424342a0c656456f9) --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 29 ++++++++++ llvm/test/CodeGen/X86/pr49393.ll | 55 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr49393.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7145fc91d5f3..b0ad86899d25 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5935,6 +5935,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); + // Because getNegatedExpression can delete nodes we need a handle to keep + // temporary nodes alive in case the recursion manages to create an identical + // node. + std::list Handles; + switch (Opcode) { case ISD::ConstantFP: { // Don't invert constant FP values after legalization unless the target says @@ -6003,11 +6008,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; @@ -6052,11 +6064,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = CostX; @@ -6094,15 +6113,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, if (!NegZ) break; + // Prevent this node from being deleted by the next two calls. + Handles.emplace_back(NegZ); + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) NegatibleCost CostX = NegatibleCost::Expensive; SDValue NegX = getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // Prevent this node from being deleted by the next call. + if (NegX) + Handles.emplace_back(NegX); + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) NegatibleCost CostY = NegatibleCost::Expensive; SDValue NegY = getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + // We're done with the handles. + Handles.clear(); + // Negate the X if its cost is less or equal than Y. if (NegX && (CostX <= CostY)) { Cost = std::min(CostX, CostZ); diff --git a/llvm/test/CodeGen/X86/pr49393.ll b/llvm/test/CodeGen/X86/pr49393.ll new file mode 100644 index 000000000000..9952b90fc7b7 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr49393.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @f() { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %for.cond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: imull %eax, %eax +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: mulsd %xmm0, %xmm1 +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: cwtl +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: cvtsi2sd %eax, %xmm2 +; CHECK-NEXT: mulsd %xmm0, %xmm2 +; CHECK-NEXT: mulsd %xmm0, %xmm2 +; CHECK-NEXT: movapd %xmm2, %xmm3 +; CHECK-NEXT: mulsd %xmm1, %xmm3 +; CHECK-NEXT: mulsd %xmm0, %xmm2 +; CHECK-NEXT: subsd %xmm3, %xmm1 +; CHECK-NEXT: addsd %xmm2, %xmm1 +; CHECK-NEXT: cvttsd2si %xmm1, %eax +; CHECK-NEXT: jmp .LBB0_1 +entry: + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %b.0 = phi i16 [ 0, %entry ], [ %conv77, %for.cond ] + %mul18 = mul i16 %b.0, %b.0 + %arrayidx.real = load double, double* undef, align 1 + %arrayidx.imag = load double, double* undef, align 1 + %mul_ac = fmul fast double %arrayidx.real, %arrayidx.real + %0 = fadd fast double 0.000000e+00, %arrayidx.real + %sub.r = fsub fast double %mul_ac, %0 + %sub.i = fsub fast double 0.000000e+00, %arrayidx.imag + %conv28 = sitofp i16 %mul18 to double + %mul_bc32 = fmul fast double %arrayidx.imag, %conv28 + %mul_bd46 = fmul fast double %mul_bc32, %arrayidx.imag + %mul_r49 = fsub fast double 0.000000e+00, %mul_bd46 + %mul_ac59 = fmul fast double %mul_r49, %sub.r + %mul_bc48 = fmul fast double %mul_bc32, %arrayidx.real + %mul_i50 = fadd fast double 0.000000e+00, %mul_bc48 + %1 = fmul fast double %mul_i50, %sub.i + %.neg = fneg fast double %0 + %.neg19 = fmul fast double %1, -1.000000e+00 + %.neg20 = fadd fast double %.neg, %mul_ac + %2 = fadd fast double %.neg20, %mul_ac59 + %sub.r75 = fadd fast double %2, %.neg19 + %conv77 = fptosi double %sub.r75 to i16 + br label %for.cond +} From 15d1ee36720ff24323f55452ae3cfb63f318c3f3 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 6 Mar 2021 11:45:57 +0200 Subject: [PATCH 186/318] [CMake][compiler-rt] Use copying instead of symlinking for LSE builtins on non-Unix-likes As reported in D93278 post-review symlinking requires privilege escalation on Windows. Copying is functionally same, so fallback to it for systems that aren't Unix-like. This is similar to the solution in AddLLVM.cmake. Reviewed By: ikudrin Differential Revision: https://reviews.llvm.org/D98111 (cherry picked from commit ba860963b156db3b653c67ef044df877f3cea9cc) --- compiler-rt/lib/builtins/CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index b511a9a987b3..73b6bead8424 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -515,6 +515,12 @@ set(aarch64_SOURCES set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir") file(MAKE_DIRECTORY "${OA_HELPERS_DIR}") +if(CMAKE_HOST_UNIX) + set(COMPILER_RT_LINK_OR_COPY create_symlink) +else() + set(COMPILER_RT_LINK_OR_COPY copy) +endif() + foreach(pat cas swp ldadd ldclr ldeor ldset) foreach(size 1 2 4 8 16) foreach(model 1 2 3 4) @@ -522,7 +528,7 @@ foreach(pat cas swp ldadd ldclr ldeor ldset) set(helper_asm "${OA_HELPERS_DIR}/outline_atomic_${pat}${size}_${model}.S") add_custom_command( OUTPUT ${helper_asm} - COMMAND ${CMAKE_COMMAND} -E create_symlink "${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S" "${helper_asm}" + COMMAND ${CMAKE_COMMAND} -E ${COMPILER_RT_LINK_OR_COPY} "${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S" "${helper_asm}" ) set_source_files_properties("${helper_asm}" PROPERTIES From c016eda3257eb0f67a989065d174bc5e13ed7096 Mon Sep 17 00:00:00 2001 From: Amilendra Kodithuwakku Date: Fri, 12 Mar 2021 19:19:29 +0000 Subject: [PATCH 187/318] [release][docs] List all cores Arm has added support for in LLVM 12. Reviewed By: kristof.beyls Differential Revision: https://reviews.llvm.org/D98277 --- clang/docs/ReleaseNotes.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7f4b675b68f9..46e11fcb31cb 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -139,6 +139,15 @@ Modified Compiler Flags This behavior matches newer GCC. (`D91760 `_) (`D92054 `_) +- Support has been added for the following processors (command-line identifiers + in parentheses): + - Arm Cortex-A78C (cortex-a78c). + - Arm Cortex-R82 (cortex-r82). + - Arm Neoverse V1 (neoverse-v1). + - Arm Neoverse N2 (neoverse-n2). + - Fujitsu A64FX (a64fx). + For example, to select architecture support and tuning for Neoverse-V1 based + systems, use ``-mcpu=neoverse-v1``. Removed Compiler Flags ------------------------- From ca14f0282fcec0324b921d27907a704b3a156d0f Mon Sep 17 00:00:00 2001 From: Amilendra Kodithuwakku Date: Fri, 12 Mar 2021 20:03:23 +0000 Subject: [PATCH 188/318] [release][docs] List all cores Arm has added support for in LLVM 12. Add new-line before sub-list for proper rendering. Differential Revision: https://reviews.llvm.org/D98277 --- clang/docs/ReleaseNotes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 46e11fcb31cb..451bc65b9f5b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -141,6 +141,7 @@ Modified Compiler Flags (`D92054 `_) - Support has been added for the following processors (command-line identifiers in parentheses): + - Arm Cortex-A78C (cortex-a78c). - Arm Cortex-R82 (cortex-r82). - Arm Neoverse V1 (neoverse-v1). From 00441b8f4e5b7daa39ac6cbeb45ebfe54662b08d Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Tue, 16 Mar 2021 12:07:15 +0000 Subject: [PATCH 189/318] [OpenCL][Docs] Release notes Differential Revision: https://reviews.llvm.org/D98076 --- clang/docs/ReleaseNotes.rst | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 451bc65b9f5b..b35d81c60b7b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -210,10 +210,38 @@ C++1z Feature Support Objective-C Language Changes in Clang ------------------------------------- -OpenCL C Language Changes in Clang ----------------------------------- - -... +OpenCL Kernel Language Changes in Clang +--------------------------------------- + +- Improved online documentation: :doc:`UsersManual` and :doc:`OpenCLSupport` + pages. +- Added ``-cl-std=CL3.0`` and predefined version macro for OpenCL 3.0. +- Added ``-cl-std=CL1.0`` and mapped to the existing OpenCL 1.0 functionality. +- Improved OpenCL extension handling per target. +- Added clang extension for function pointers ``__cl_clang_function_pointers`` + and variadic functions ``__cl_clang_variadic_functions``, more details can be + found in :doc:`LanguageExtensions`. +- Removed extensions without kernel language changes: + ``cl_khr_select_fprounding_mode``, ``cl_khr_gl_sharing``, ``cl_khr_icd``, + ``cl_khr_gl_event``, ``cl_khr_d3d10_sharing``, ``cl_khr_context_abort``, + ``cl_khr_d3d11_sharing``, ``cl_khr_dx9_media_sharing``, + ``cl_khr_image2d_from_buffer``, ``cl_khr_initialize_memory``, + ``cl_khr_gl_depth_images``, ``cl_khr_spir``, ``cl_khr_egl_event``, + ``cl_khr_egl_image``, ``cl_khr_terminate_context``. +- Improved diagnostics for unevaluated ``vec_step`` expression. +- Allow nested pointers (e.g. pointer-to-pointer) kernel arguments beyond OpenCL + 1.2. +- Added ``global_device`` and ``global_host`` address spaces for USM + allocations. + +Miscellaneous improvements in C++ for OpenCL support: + +- Added diagnostics for pointers to member functions and references to + functions. +- Added support of ``vec_step`` builtin. +- Fixed ICE on address spaces with forwarding references and templated copy + constructors. +- Removed warning for variadic macro use. ABI Changes in Clang -------------------- From e3186ba0f3b5a5cf2a42155ff5ee8350cbda1486 Mon Sep 17 00:00:00 2001 From: Maxim Kuvyrkov Date: Thu, 18 Mar 2021 16:08:58 +0000 Subject: [PATCH 190/318] [aarch64][WOA64][docs] Release note for WoA-hosted LLVM 12 binary Reviewed By: peterwaller-arm Differential Revision: https://reviews.llvm.org/D98415 --- clang/docs/ReleaseNotes.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b35d81c60b7b..f3499d167361 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -193,6 +193,13 @@ Windows Support exception. To workaround (with reduced security), compile with /guard:cf,nolongjmp. +- Windows on Arm64: LLVM 12 adds official binary release hosted on + Windows on Arm64. The binary is built and tested by Linaro alongside + AArch64 and ARM 32-bit Linux binary releases. This first WoA release + includes Clang compiler, LLD Linker, and compiler-rt runtime libraries. + Work on LLDB, sanitizer support, OpenMP, and other features is in progress + and will be included in future Windows on Arm64 LLVM releases. + C Language Changes in Clang --------------------------- From 4990141a4366eb00abdc8252d7cbb8adeacb9954 Mon Sep 17 00:00:00 2001 From: Maxim Kuvyrkov Date: Fri, 19 Mar 2021 13:37:19 +0000 Subject: [PATCH 191/318] [WoA][MSVC] Use default linker setting in MSVC-compatible driver [take 2] At the moment "link.exe" is hard-coded as default linker in MSVC.cpp, so there's no way to use LLD as default linker for MSVC driver. This patch adds checking of CLANG_DEFAULT_LINKER to MSVC.cpp and updates unit-tests that expect link.exe linker to explicitly select it via -fuse-ld=link, so that buildbots and other builds that set -DCLANG_DEFAULT_LINKER=foobar don't fail these tests. This is a squash of - https://reviews.llvm.org/D98493 (MSVC.cpp change) and - https://reviews.llvm.org/D98862 (unit-tests change) Fixes https://bugs.llvm.org/show_bug.cgi?id=49624 Reviewed By: maxim-kuvyrkov Differential Revision: https://reviews.llvm.org/D98935 (cherry-picked from commit 2049fe58903b68f66872a18e608f40e5233b55fb) --- clang/lib/Driver/ToolChains/MSVC.cpp | 6 +++++- clang/test/Driver/Xlinker-args.c | 2 +- clang/test/Driver/cl-inputs.c | 6 +++--- clang/test/Driver/cl-link-at-file.c | 2 +- clang/test/Driver/cl-link.c | 22 +++++++++++----------- clang/test/Driver/msvc-link.c | 8 ++++---- clang/test/OpenMP/linking.c | 4 ++-- 7 files changed, 27 insertions(+), 23 deletions(-) diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index f4b7a57e0bb7..13943b6c404a 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -11,6 +11,7 @@ #include "Darwin.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Version.h" +#include "clang/Config/config.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -520,7 +521,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, // translate 'lld' into 'lld-link', and in the case of the regular msvc // linker, we need to use a special search algorithm. llvm::SmallString<128> linkPath; - StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link"); + StringRef Linker + = Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER); + if (Linker.empty()) + Linker = "link"; if (Linker.equals_lower("lld")) Linker = "lld-link"; diff --git a/clang/test/Driver/Xlinker-args.c b/clang/test/Driver/Xlinker-args.c index a44957cd8aef..cb045a1d40ac 100644 --- a/clang/test/Driver/Xlinker-args.c +++ b/clang/test/Driver/Xlinker-args.c @@ -17,7 +17,7 @@ // LINUX: "--no-demangle" "-e" "_start" "one" "two" "three" "four" "-z" "five" "-r" {{.*}} "-T" "a.lds" // Check that we forward '-Xlinker' and '-Wl,' on Windows. -// RUN: %clang -target i686-pc-win32 -### \ +// RUN: %clang -target i686-pc-win32 -fuse-ld=link -### \ // RUN: -Xlinker one -Wl,two %s 2>&1 | \ // RUN: FileCheck -check-prefix=WIN %s // WIN: link.exe diff --git a/clang/test/Driver/cl-inputs.c b/clang/test/Driver/cl-inputs.c index 59455a0aa5e5..8eb44517ee16 100644 --- a/clang/test/Driver/cl-inputs.c +++ b/clang/test/Driver/cl-inputs.c @@ -50,16 +50,16 @@ // RUN: %clang_cl -### /Tc - 2>&1 | FileCheck -check-prefix=STDINTc %s // STDINTc: "-x" "c" -// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s +// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s // LIBINPUT: link.exe" // LIBINPUT: "cl-test.lib" -// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s +// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s // LIBINPUT2: error: no such file or directory: 'cl-test2.lib' // LIBINPUT2: link.exe" // LIBINPUT2-NOT: "cl-test2.lib" -// RUN: %clang_cl -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s +// RUN: %clang_cl -fuse-ld=link -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s // LIBINPUT3: error: no such file or directory: '/nonexisting.lib' // LIBINPUT3: link.exe" // LIBINPUT3-NOT: "/nonexisting.lib" diff --git a/clang/test/Driver/cl-link-at-file.c b/clang/test/Driver/cl-link-at-file.c index 50ae07fadf5b..4e665f89b74e 100644 --- a/clang/test/Driver/cl-link-at-file.c +++ b/clang/test/Driver/cl-link-at-file.c @@ -7,7 +7,7 @@ // RUN: echo /link bar.lib baz.lib > %t.args // RUN: touch %t.obj -// RUN: %clang_cl -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS +// RUN: %clang_cl -fuse-ld=link -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS // If the "/link" option captures all remaining args beyond its response file, // it will also capture "--" and our input argument. In this case, Clang will // be clueless and will emit "argument unused" warnings. If PR17239 is properly diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c index 142725fed8eb..e2f5397e9133 100644 --- a/clang/test/Driver/cl-link.c +++ b/clang/test/Driver/cl-link.c @@ -2,14 +2,14 @@ // be interpreted as a command-line option, e.g. on Mac where %s is commonly // under /Users. -// RUN: %clang_cl /Tc%s -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s -// RUN: %clang_cl /Tc%s -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s +// RUN: %clang_cl /Tc%s -fuse-ld=link -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s +// RUN: %clang_cl /Tc%s -fuse-ld=link -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s // LINK: link.exe // LINK: "foo" // LINK: "bar" // LINK: "baz" -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s // ASAN: link.exe // ASAN: "-debug" // ASAN: "-incremental:no" @@ -19,7 +19,7 @@ // ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx-i386.lib" // ASAN: "{{.*}}cl-link{{.*}}.obj" -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s // ASAN-MD: link.exe // ASAN-MD: "-debug" // ASAN-MD: "-incremental:no" @@ -29,13 +29,13 @@ // ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" // ASAN-MD: "{{.*}}cl-link{{.*}}.obj" -// RUN: %clang_cl /LD -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s -// RUN: %clang_cl /LDd -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s +// RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s +// RUN: %clang_cl /LDd -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s // DLL: link.exe // "-dll" -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s // ASAN-DLL: link.exe // ASAN-DLL: "-dll" // ASAN-DLL: "-debug" @@ -43,13 +43,13 @@ // ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk-i386.lib" // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj" -// RUN: %clang_cl /Zi /Tc%s -### 2>&1 | FileCheck --check-prefix=DEBUG %s +// RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s // DEBUG: link.exe // DEBUG: "-debug" // PR27234 -// RUN: %clang_cl /Tc%s nonexistent.obj -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s -// RUN: %clang_cl /Tc%s nonexistent.lib -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s +// RUN: %clang_cl /Tc%s nonexistent.obj -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s +// RUN: %clang_cl /Tc%s nonexistent.lib -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s // NONEXISTENT-NOT: no such file // NONEXISTENT: link.exe // NONEXISTENT: "/libpath:somepath" diff --git a/clang/test/Driver/msvc-link.c b/clang/test/Driver/msvc-link.c index 13dccd21bfd8..1ee17fc63c32 100644 --- a/clang/test/Driver/msvc-link.c +++ b/clang/test/Driver/msvc-link.c @@ -1,4 +1,4 @@ -// RUN: %clang -target i686-pc-windows-msvc -### %s 2>&1 | FileCheck --check-prefix=BASIC %s +// RUN: %clang -target i686-pc-windows-msvc -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=BASIC %s // BASIC: link.exe" // BASIC: "-out:a.exe" // BASIC: "-defaultlib:libcmt" @@ -6,7 +6,7 @@ // BASIC: "-nologo" // BASIC-NOT: "-Brepro" -// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -### %s 2>&1 | FileCheck --check-prefix=DLL %s +// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=DLL %s // DLL: link.exe" // DLL: "-out:a.dll" // DLL: "-defaultlib:libcmt" @@ -19,13 +19,13 @@ // LIBPATH: "-libpath:/usr/lib" // LIBPATH: "-nologo" -// RUN: %clang_cl /Brepro -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s +// RUN: %clang_cl /Brepro -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s // REPRO: link.exe" // REPRO: "-out:msvc-link.exe" // REPRO: "-nologo" // REPRO: "-Brepro" -// RUN: %clang_cl /Brepro- -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s +// RUN: %clang_cl /Brepro- -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s // NOREPRO: link.exe" // NOREPRO: "-out:msvc-link.exe" // NOREPRO: "-nologo" diff --git a/clang/test/OpenMP/linking.c b/clang/test/OpenMP/linking.c index 802553c1be75..1c4439626470 100644 --- a/clang/test/OpenMP/linking.c +++ b/clang/test/OpenMP/linking.c @@ -81,7 +81,7 @@ // CHECK-LD-OVERRIDE-64: "-lgomp" "-lrt" // CHECK-LD-OVERRIDE-64: "-lpthread" "-lc" // -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: %clang -no-canonical-prefixes -fuse-ld=link %s -### -o %t.o 2>&1 \ // RUN: -fopenmp=libomp -target x86_64-msvc-win32 -rtlib=platform \ // RUN: | FileCheck --check-prefix=CHECK-MSVC-LINK-64 %s // CHECK-MSVC-LINK-64: link.exe @@ -95,7 +95,7 @@ // SIMD-ONLY11-NOT: libomp // SIMD-ONLY11-NOT: libgomp // -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: %clang -no-canonical-prefixes %s -fuse-ld=link -### -o %t.o 2>&1 \ // RUN: -fopenmp=libiomp5 -target x86_64-msvc-win32 -rtlib=platform \ // RUN: | FileCheck --check-prefix=CHECK-MSVC-ILINK-64 %s From f4c01f33f450f654a63363b4eb84bf744c24959c Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Wed, 24 Mar 2021 12:28:00 -0400 Subject: [PATCH 192/318] [Support] Fix 'keeping' temporary files on Windows 7 As reported here: https://bugs.llvm.org/show_bug.cgi?id=48378#c0 and here: https://github.com/rust-lang/rust/issues/81051 since 79657e2339b58bc01fe1b85a448bb073d57d90bb, some programs such as llvm-ar don't work properly on Windows 7. The issue is shown in the snippet by Oleksandr Prodan: https://pastebin.com/v51m3uBU In essence, once the 'DeleteFile' flag has been set on FILE_DISPOSITION_INFO, the file path can't be queried anymore with GetFinalPathNameByHandleW. This however works on Windows 10, GetFinalPathNameByHandleW would return sucessfully. To workaround the issue, we simply reset the 'DeleteFile' flag before even checking if we're dealing with a network file. Tested with `llvm-ar r empty.a a.obj` ran on a network mount. At the moment, we cannot specifically add a test coverage for this, since it requres mounting a network drive. (cherry picked from commit 64ab2b6825c5aeae6e4afa7ef0829b89a6828102) --- llvm/lib/Support/Windows/Path.inc | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index dc9bcf868381..adcbd1b5f8f3 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) { } static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { - // First, check if the file is on a network (non-local) drive. If so, don't - // set DeleteFile to true, since it prevents opening the file for writes. + // Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a + // network file. On Windows 7 the function realPathFromHandle() below fails + // if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by + // a prior call. + FILE_DISPOSITION_INFO Disposition; + Disposition.DeleteFile = false; + if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, + sizeof(Disposition))) + return mapWindowsError(::GetLastError()); + if (!Delete) + return std::error_code(); + + // Check if the file is on a network (non-local) drive. If so, don't + // continue when DeleteFile is true, since it prevents opening the file for + // writes. Note -- this will leak temporary files on disk, but only when the + // target file is on a network drive. SmallVector FinalPath; if (std::error_code EC = realPathFromHandle(Handle, FinalPath)) return EC; @@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { if (!IsLocal) return std::error_code(); - // The file is on a local drive, set the DeleteFile to true. - FILE_DISPOSITION_INFO Disposition; - Disposition.DeleteFile = Delete; + // The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's + // flag. + Disposition.DeleteFile = true; if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, sizeof(Disposition))) return mapWindowsError(::GetLastError()); From e94372d1b395a6461e7d973917b3a3c29563a5e6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 12 Mar 2021 07:56:54 -0500 Subject: [PATCH 193/318] [SimplifyCFG] avoid sinking insts within an infinite-loop The test is reduced from a C source example in: https://llvm.org/PR49541 It's possible that the test could be reduced further or the predicate generalized further, but it seems to require a few ingredients (including the "late" SimplifyCFG options on the RUN line) to fall into the infinite-loop trap. (cherry picked from commit bd197ed0a57a82187ed3c6265ca811d412acfaef) --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 19 ++++--- .../Transforms/SimplifyCFG/sink-inf-loop.ll | 49 +++++++++++++++++++ 2 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7cfe17618cde..de9560df9785 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1628,6 +1628,11 @@ static bool canSinkInstructions( I->getType()->isTokenTy()) return false; + // Do not try to sink an instruction in an infinite loop - it can cause + // this algorithm to infinite loop. + if (I->getParent()->getSingleSuccessor() == I->getParent()) + return false; + // Conservatively return false if I is an inline-asm instruction. Sinking // and merging inline-asm instructions can potentially create arguments // that cannot satisfy the inline-asm constraints. @@ -1714,13 +1719,13 @@ static bool canSinkInstructions( return true; } -// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last +// Assuming canSinkInstructions(Blocks) has returned true, sink the last // instruction of every block in Blocks to their common successor, commoning // into one instruction. static bool sinkLastInstruction(ArrayRef Blocks) { auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); - // canSinkLastInstruction returning true guarantees that every block has at + // canSinkInstructions returning true guarantees that every block has at // least one non-terminator instruction. SmallVector Insts; for (auto *BB : Blocks) { @@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef Blocks) { } // The only checking we need to do now is that all users of all instructions - // are the same PHI node. canSinkLastInstruction should have checked this but - // it is slightly over-aggressive - it gets confused by commutative instructions - // so double-check it here. + // are the same PHI node. canSinkInstructions should have checked this but + // it is slightly over-aggressive - it gets confused by commutative + // instructions so double-check it here. Instruction *I0 = Insts.front(); if (!I0->user_empty()) { auto *PNUse = dyn_cast(*I0->user_begin()); @@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef Blocks) { return false; } - // We don't need to do any more checking here; canSinkLastInstruction should + // We don't need to do any more checking here; canSinkInstructions should // have done it all for us. SmallVector NewOperands; for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { - // This check is different to that in canSinkLastInstruction. There, we + // This check is different to that in canSinkInstructions. There, we // cared about the global view once simplifycfg (and instcombine) have // completed - it takes into account PHIs that become trivially // simplifiable. However here we need a more local view; if an operand diff --git a/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll new file mode 100644 index 000000000000..37399367efce --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -sink-common-insts=true -S | FileCheck %s + +; This would infinite-loop because we allowed code sinking to examine an infinite-loop block (%j). + +define void @PR49541(i32* %t1, i32 %a, i1 %bool) { +; CHECK-LABEL: @PR49541( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[I:%.*]] +; CHECK: j: +; CHECK-NEXT: [[T3:%.*]] = phi i32 [ [[B:%.*]], [[J:%.*]] ], [ [[A:%.*]], [[COND_TRUE:%.*]] ], [ [[A]], [[COND_FALSE:%.*]] ] +; CHECK-NEXT: [[T2:%.*]] = phi i32 [ [[T2]], [[J]] ], [ [[PRE2:%.*]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +; CHECK-NEXT: [[B]] = load i32, i32* [[T1:%.*]], align 4 +; CHECK-NEXT: br label [[J]] +; CHECK: i: +; CHECK-NEXT: [[G_1:%.*]] = phi i16 [ undef, [[ENTRY:%.*]] ], [ [[G_1]], [[COND_FALSE]] ] +; CHECK-NEXT: br i1 [[BOOL:%.*]], label [[COND_FALSE]], label [[COND_TRUE]] +; CHECK: cond.true: +; CHECK-NEXT: [[TOBOOL9_NOT:%.*]] = icmp eq i16 [[G_1]], 0 +; CHECK-NEXT: [[PRE2]] = load i32, i32* [[T1]], align 4 +; CHECK-NEXT: br label [[J]] +; CHECK: cond.false: +; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T1]], align 4 +; CHECK-NEXT: [[B2:%.*]] = icmp eq i32 [[T5]], 0 +; CHECK-NEXT: br i1 [[B2]], label [[J]], label [[I]] +; +entry: + br label %i + +j: + %t3 = phi i32 [ %b, %j ], [ %a, %cond.true ], [ %a, %cond.false ] + %t2 = phi i32 [ %t2, %j ], [ %pre2, %cond.true ], [ 0, %cond.false ] + %b = load i32, i32* %t1, align 4 + br label %j + +i: + %g.1 = phi i16 [ undef, %entry ], [ %g.1, %cond.false ] + br i1 %bool, label %cond.false, label %cond.true + +cond.true: + %tobool9.not = icmp eq i16 %g.1, 0 + %pre2 = load i32, i32* %t1, align 4 + br label %j + +cond.false: + %t5 = load i32, i32* %t1, align 4 + %b2 = icmp eq i32 %t5, 0 + br i1 %b2, label %j, label %i +} From f43958b7c497c526b238607624ee0069888f4c98 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 18 Mar 2021 18:25:21 -0400 Subject: [PATCH 194/318] [OpenMP] Fixed a crash in hidden helper thread It is reported that after enabling hidden helper thread, the program can hit the assertion `new_gtid < __kmp_threads_capacity` sometimes. The root cause is explained as follows. Let's say the default `__kmp_threads_capacity` is `N`. If hidden helper thread is enabled, `__kmp_threads_capacity` will be offset to `N+8` by default. If the number of threads we need exceeds `N+8`, e.g. via `num_threads` clause, we need to expand `__kmp_threads`. In `__kmp_expand_threads`, the expansion starts from `__kmp_threads_capacity`, and repeatedly doubling it until the new capacity meets the requirement. Let's assume the new requirement is `Y`. If `Y` happens to meet the constraint `(N+8)*2^X=Y` where `X` is the number of iterations, the new capacity is not enough because we have 8 slots for hidden helper threads. Here is an example. ``` #include int main(int argc, char *argv[]) { constexpr const size_t N = 1344; std::vector data(N); #pragma omp parallel for for (unsigned i = 0; i < N; ++i) { data[i] = i; } #pragma omp parallel for num_threads(N) for (unsigned i = 0; i < N; ++i) { data[i] += i; } return 0; } ``` My CPU is 20C40T, then `__kmp_threads_capacity` is 160. After offset, `__kmp_threads_capacity` becomes 168. `1344 = (160+8)*2^3`, then the assertions hit. Reviewed By: protze.joachim Differential Revision: https://reviews.llvm.org/D98838 (cherry picked from commit 2df65f87c1ea81008768e14522e5d9277234ba70) --- openmp/runtime/src/kmp_runtime.cpp | 15 ++++++- openmp/runtime/src/kmp_settings.cpp | 7 +-- .../capacity_mix_threads.cpp | 45 +++++++++++++++++++ .../hidden_helper_task/capacity_nthreads.cpp | 31 +++++++++++++ 4 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp create mode 100644 openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index a6e32bd008e1..b981f8740dbe 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, if (TCR_PTR(__kmp_threads[0]) == NULL) { --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } if (__kmp_nth + new_nthreads - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > capacity) { @@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) { --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } + /* see if there are too many threads */ if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { if (__kmp_tp_cached) { @@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) { /* find an available thread slot */ // Don't reassign the zero slot since we need that to only be used by // initial thread. Slots for hidden helper threads should also be skipped. - if (initial_thread && __kmp_threads[0] == NULL) { + if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { gtid = 0; } else { for (gtid = __kmp_hidden_helper_threads_num + 1; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index b477edbbfb42..50f6a05faaf5 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) { nth = (4 * __kmp_xproc); // If hidden helper task is enabled, we initialize the thread capacity with - // extra - // __kmp_hidden_helper_threads_num. - nth += __kmp_hidden_helper_threads_num; + // extra __kmp_hidden_helper_threads_num. + if (__kmp_enable_hidden_helper) { + nth += __kmp_hidden_helper_threads_num; + } if (nth > __kmp_max_nth) nth = __kmp_max_nth; diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp new file mode 100644 index 000000000000..776aee9d8e2c --- /dev/null +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp @@ -0,0 +1,45 @@ +// RUN: %libomp-cxx-compile-and-run + +#include + +#include +#include +#include +#include +#include + +void dummy_root() { + // omp_get_max_threads() will do middle initialization + int nthreads = omp_get_max_threads(); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); +} + +int main(int argc, char *argv[]) { + const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits::max()); + + std::vector data(N); + + // Create a new thread to initialize the OpenMP RTL. The new thread will not + // be taken as the "initial thread". + std::thread root(dummy_root); + +#pragma omp parallel for num_threads(N) + for (unsigned i = 0; i < N; ++i) { + data[i] = i; + } + +#pragma omp parallel for num_threads(N + 1) + for (unsigned i = 0; i < N; ++i) { + data[i] += i; + } + + for (unsigned i = 0; i < N; ++i) { + assert(data[i] == 2 * i); + } + + root.join(); + + return 0; +} diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp new file mode 100644 index 000000000000..a9d394f729e9 --- /dev/null +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp @@ -0,0 +1,31 @@ +// RUN: %libomp-cxx-compile-and-run + +#include + +#include +#include +#include + +int main(int argc, char *argv[]) { + const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits::max()); + + std::vector data(N); + +#pragma omp parallel for num_threads(N) + for (unsigned i = 0; i < N; ++i) { + data[i] = i; + } + +#pragma omp parallel for num_threads(N + 1) + for (unsigned i = 0; i < N; ++i) { + data[i] += i; + } + + for (unsigned i = 0; i < N; ++i) { + assert(data[i] == 2 * i); + } + + return 0; +} From 8ca56905dd9bdade269b5bc91528495884b62bf5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 10 Mar 2021 14:37:09 +0100 Subject: [PATCH 195/318] [PowerPC] Fix infinite loop in peephole CR optimization (PR49509) If we encounter a degenerate select node where both operands are the same, then we can continue negating the condition while swapping operands, resulting in an infinite loop. Avoid this by bailing out if both operands are the same. Fixes https://bugs.llvm.org/show_bug.cgi?id=49509. Differential Revision: https://reviews.llvm.org/D98340 (cherry picked from commit 2489cbaa8057c736475fd88990f4f6dbf022873d) --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 6 ++ llvm/test/CodeGen/PowerPC/pr49509.ll | 81 +++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/pr49509.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 693b0adaede4..2604218da160 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { User->getMachineOpcode() != PPC::SELECT_I8) return false; + SDNode *Op1 = User->getOperand(1).getNode(); SDNode *Op2 = User->getOperand(2).getNode(); + // If we have a degenerate select with two equal operands, swapping will + // not do anything, and we may run into an infinite loop. + if (Op1 == Op2) + return false; + if (!Op2->isMachineOpcode()) return false; diff --git a/llvm/test/CodeGen/PowerPC/pr49509.ll b/llvm/test/CodeGen/PowerPC/pr49509.ll new file mode 100644 index 000000000000..f13733c18047 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr49509.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s + +target datalayout = "E-m:e-p:32:32-i64:64-n32" + +define void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb2 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: lis 3, 256 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %bb1 +; CHECK-NEXT: bclr 4, 20, 0 +; CHECK-NEXT: # %bb.3: # %bb66 +; CHECK-NEXT: lwz 4, 12(0) +; CHECK-NEXT: lwz 5, 8(0) +; CHECK-NEXT: lwz 6, 0(0) +; CHECK-NEXT: lwz 7, 4(0) +; CHECK-NEXT: lbz 3, 0(3) +; CHECK-NEXT: and 5, 5, 6 +; CHECK-NEXT: and 4, 4, 7 +; CHECK-NEXT: and 4, 4, 5 +; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: lis 3, 256 +; CHECK-NEXT: lis 7, 512 +; CHECK-NEXT: bc 12, 2, .LBB0_4 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %bb66 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB0_5: # %bb66 +; CHECK-NEXT: cmpwi 1, 4, -1 +; CHECK-NEXT: cmpwi 5, 4, -1 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: bc 12, 6, .LBB0_6 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_6: # %bb66 +; CHECK-NEXT: addi 3, 7, 0 +; CHECK-NEXT: .LBB0_7: # %bb66 +; CHECK-NEXT: cror 20, 22, 2 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: bc 12, 20, .LBB0_9 +; CHECK-NEXT: # %bb.8: # %bb66 +; CHECK-NEXT: ori 3, 6, 0 +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_9: # %bb66 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB0_10: # %bb66 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: blr +bb: + br i1 undef, label %bb2, label %bb1 + +bb2: ; preds = %bb + %i = select i1 undef, i64 0, i64 72057594037927936 + store i64 %i, i64* undef, align 8 + ret void + +bb1: ; preds = %bb + %i50 = load i8, i8* undef, align 8 + %i52 = load i128, i128* null, align 8 + %i62 = icmp eq i8 %i50, 0 + br i1 undef, label %bb66, label %bb64 + +bb64: ; preds = %bb63 + ret void + +bb66: ; preds = %bb63 + %i67 = lshr i128 -1, 0 + %i68 = xor i128 %i52, -1 + %i69 = add i128 0, %i68 + %i70 = and i128 %i67, %i69 + %i71 = icmp eq i128 %i70, 0 + %i74 = select i1 %i62, i64 0, i64 72057594037927936 + %i75 = select i1 %i71, i64 144115188075855872, i64 %i74 + store i64 %i75, i64* undef, align 8 + ret void +} From e89cdf8937bb6017cc99b05823428dd2fd673368 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Mar 2021 13:25:33 -0500 Subject: [PATCH 196/318] [OpenMP] Restore backwards compatibility for libomptarget Summary: The changes introduced in D87946 changed the API for libomptarget functions. `__kmpc_push_target_tripcount` was a function in Clang 11.x but was not given a backward-compatible interface. This change will require people using Clang 13.x or 12.x to recompile their offloading programs. Reviewed By: jdoerfert cchen Differential Revision: https://reviews.llvm.org/D98358 (cherry picked from commit 807466ef28125cf7268c860b09d5563c9c93602a) --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- .../target_teams_distribute_parallel_for_codegen.cpp | 4 ++-- ...target_teams_distribute_parallel_for_if_codegen.cpp | 10 +++++----- ...get_teams_distribute_parallel_for_order_codegen.cpp | 2 +- ...rget_teams_distribute_parallel_for_simd_codegen.cpp | 2 +- ...t_teams_distribute_parallel_for_simd_if_codegen.cpp | 10 +++++----- clang/test/OpenMP/teams_distribute_codegen.cpp | 2 +- .../OpenMP/teams_distribute_parallel_for_codegen.cpp | 2 +- .../teams_distribute_parallel_for_simd_codegen.cpp | 2 +- clang/test/OpenMP/teams_distribute_simd_codegen.cpp | 2 +- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 4 ++-- llvm/test/Transforms/OpenMP/add_attributes.ll | 6 +++--- openmp/libomptarget/include/omptarget.h | 6 ++++-- openmp/libomptarget/src/exports | 3 ++- openmp/libomptarget/src/interface.cpp | 7 ++++++- openmp/libomptarget/src/omptarget.cpp | 4 ++-- 16 files changed, 38 insertions(+), 30 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 57cc2d60e2af..83dfa0780547 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9892,7 +9892,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), + CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), Args); } }; diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp index 0229ace911f8..c0f53239aa13 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -39,7 +39,7 @@ #ifdef CK1 -// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount +// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount_mapper // HCK1: define{{.*}} i32 @{{.+}}target_teams_fun{{.*}}( int target_teams_fun(int *g){ @@ -60,7 +60,7 @@ int target_teams_fun(int *g){ // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]], // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]]) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp index 6650e0557511..efe7df819fb6 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -49,10 +49,10 @@ int Arg; // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test void gtid_test() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( #pragma omp target teams distribute parallel for @@ -107,12 +107,12 @@ int tmain(T Arg) { // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() int main() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp index b2ab37f22ec3..b99ba9d38a43 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -14,7 +14,7 @@ // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test void gtid_test() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: %0 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK: call void [[TARGET_OUTLINE:@.+]]() // CHECK: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index e6049145702b..39ccb87462c0 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -60,7 +60,7 @@ int target_teams_fun(int *g){ // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]], // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], -// HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) +// HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]]) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index 8b0eaba07f1c..19dc15b94f64 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -43,10 +43,10 @@ int Arg; // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test void gtid_test() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( #ifdef OMP5 @@ -110,12 +110,12 @@ int tmain(T Arg) { // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() int main() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp index 5bbb100e669e..aab5cced4c70 100644 --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -33,7 +33,7 @@ int teams_argument_global(int n) { // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}}) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp index b63e5aeddb7a..8fa73e76009b 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -32,7 +32,7 @@ int teams_argument_global(int n){ // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}}) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp index 3d479c4cd29d..9b3855c61759 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -33,7 +33,7 @@ int teams_argument_global(int n){ // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp index fd1214d22ce9..6e5d06b0c568 100644 --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -35,7 +35,7 @@ int teams_argument_global(int n) { // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 1) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 844046167975..75d360bf4237 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -375,7 +375,7 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr, __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32, /* omp_allocator_handle_t */ VoidPtr) -__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64) +__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64) __OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) __OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, @@ -844,7 +844,7 @@ __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {}) __OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {}) diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index b294542667bd..8476f42dd529 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -627,7 +627,7 @@ declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) declare void @__kmpc_destroy_allocator(i32, i8*) -declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64) +declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) declare i32 @__kmpc_warp_active_thread_mask() @@ -1144,7 +1144,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; CHECK: ; Function Attrs: convergent nounwind ; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask() @@ -1669,7 +1669,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly -; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask() diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h index 46bb8206efa1..36c25c33798a 100644 --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -283,8 +283,10 @@ int __tgt_target_teams_nowait_mapper( int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); -void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, - uint64_t loop_tripcount); +void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); + +void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, + uint64_t loop_tripcount); #ifdef __cplusplus } diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports index 5e09a088533d..b7fc1c8c3c86 100644 --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -25,6 +25,8 @@ VERS1.0 { __tgt_target_teams_nowait_mapper; __tgt_mapper_num_components; __tgt_push_mapper_component; + __kmpc_push_target_tripcount; + __kmpc_push_target_tripcount_mapper; omp_get_num_devices; omp_get_initial_device; omp_target_alloc; @@ -34,7 +36,6 @@ VERS1.0 { omp_target_memcpy_rect; omp_target_associate_ptr; omp_target_disassociate_ptr; - __kmpc_push_target_tripcount; local: *; }; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 01f3715d6bcc..b97676a6981b 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -514,8 +514,13 @@ EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base, MapComponentInfoTy(base, begin, size, type, name)); } -EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, +EXTERN void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount) { + __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); +} + +EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, + uint64_t loop_tripcount) { TIMESCOPE_WITH_IDENT(loc); if (IsOffloadDisabled()) return; diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 37150aae2fe6..af6f7d09a4a2 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -900,8 +900,8 @@ TableMap *getTableMap(void *HostPtr) { /// Get loop trip count /// FIXME: This function will not work right if calling -/// __kmpc_push_target_tripcount in one thread but doing offloading in another -/// thread, which might occur when we call task yield. +/// __kmpc_push_target_tripcount_mapper in one thread but doing offloading in +/// another thread, which might occur when we call task yield. uint64_t getLoopTripCount(int64_t DeviceId) { DeviceTy &Device = PM->Devices[DeviceId]; uint64_t LoopTripCount = 0; From f05b649610564b11c481a20598dbb3f532c4602a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 7 Mar 2021 17:27:22 +0100 Subject: [PATCH 197/318] [FastISel] Don't trivially kill extractvalues (PR49467) All extractvalues of the same value at the same index will map to the same register, so even if one specific extractvalue only has one use, we should not mark it as a trivial kill, as there may be more extractvalues later. Fixes https://bugs.llvm.org/show_bug.cgi?id=49467. Differential Revision: https://reviews.llvm.org/D98145 (cherry picked from commit 55ae279ba7a5905f39ce3ae79eac7834a4a134cc) --- llvm/include/llvm/CodeGen/FastISel.h | 5 +++- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 10 +++++--- llvm/test/CodeGen/X86/pr49467.ll | 27 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr49467.ll diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 81c1d6aad49a..26bf4ab2618c 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -490,7 +490,10 @@ class FastISel { /// - \c Add has a constant operand. bool canFoldAddIntoGEP(const User *GEP, const Value *Add); - /// Test whether the given value has exactly one use. + /// Test whether the register associated with this value has exactly one use, + /// in which case that single use is killing. Note that multiple IR values + /// may map onto the same register, in which case this is not the same as + /// checking that an IR value has one use. bool hasTrivialKill(const Value *V); /// Create a machine mem operand from the given instruction. diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 62f7f3d98ba6..0ff77d4ba1ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) { if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) return false; + // Casts and extractvalues may be trivially coalesced by fast-isel. + if (I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr || + I->getOpcode() == Instruction::ExtractValue) + return false; + // Only instructions with a single use in the same basic block are considered // to have trivial kills. return I->hasOneUse() && - !(I->getOpcode() == Instruction::BitCast || - I->getOpcode() == Instruction::PtrToInt || - I->getOpcode() == Instruction::IntToPtr) && cast(*I->user_begin())->getParent() == I->getParent(); } diff --git a/llvm/test/CodeGen/X86/pr49467.ll b/llvm/test/CodeGen/X86/pr49467.ll new file mode 100644 index 000000000000..9b3502552066 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr49467.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=x86_64 < %s | FileCheck %s + +declare { i8*, i64 } @get() + +declare void @use(i8*, i64) + +define void @test(i64* %p) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rdi, (%rsp) # 8-byte Spill +; CHECK-NEXT: callq get@PLT +; CHECK-NEXT: movq (%rsp), %rdi # 8-byte Reload +; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: movq %rsi, (%rdi) +; CHECK-NEXT: # implicit-def: $rdi +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %struct = call { i8*, i64 } @get() + %struct.1 = extractvalue { i8*, i64 } %struct, 1 + store i64 %struct.1, i64* %p, align 8 + %struct.2 = extractvalue { i8*, i64 } %struct, 1 + call void @use(i8* undef, i64 %struct.2) + ret void +} From 79a79d1d01c4c206d8de3569c72747587d929770 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 14 Mar 2021 16:39:03 +0100 Subject: [PATCH 198/318] [X86] Add test for PR49587 (NFC) Shows a miscompile with FastISel. (cherry picked from commit 0d814ca0f02733d6581bf209fadbebf3035380e0) --- llvm/test/CodeGen/X86/pr49587.ll | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr49587.ll diff --git a/llvm/test/CodeGen/X86/pr49587.ll b/llvm/test/CodeGen/X86/pr49587.ll new file mode 100644 index 000000000000..343f1a0149c0 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr49587.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -fast-isel -mtriple=x86_64-- < %s | FileCheck %s + +define i32 @test(i64 %arg) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $1, %rdi +; CHECK-NEXT: setb %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: jb .LBB0_2 +; CHECK-NEXT: # %bb.1: # %no_overflow +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .LBB0_2: # %merge +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: retq +entry: + %usubo = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %arg, i64 1) + %overflow = extractvalue { i64, i1 } %usubo, 1 + br i1 %overflow, label %merge, label %no_overflow + +no_overflow: + br label %merge + +merge: + %phi = phi i32 [ 1, %no_overflow ], [ 0, %entry ] + ret i32 %phi +} + +declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) From 38dd45b00431e2c065e172751492e0ded59e49e6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 14 Mar 2021 16:47:41 +0100 Subject: [PATCH 199/318] [X86][FastISel] Fix with.overflow eflags clobber (PR49587) If the successor block has a phi node, then additional moves may be inserted into predecessors, which may clobber eflags. Don't try to fold the with.overflow result into the branch in that case. This is done by explicitly checking for any phis in successor blocks, not sure if there's some more principled way to address this. Other fused compare and branch patterns avoid the issue by emitting the comparison when handling the branch, so that no instructions may be inserted in between. In this case, the with.overflow call is emitted separately (and I don't think this is avoidable, as it will generally have at least two users). Fixes https://bugs.llvm.org/show_bug.cgi?id=49587. Differential Revision: https://reviews.llvm.org/D98600 (cherry picked from commit 7669455df49e6fc8ae7d9f4bd4ee95bb20e7eb6e) --- llvm/lib/Target/X86/X86FastISel.cpp | 8 ++++++++ llvm/test/CodeGen/X86/pr49587.ll | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index caf158102230..a1a16a19f5e5 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, return false; } + // Make sure no potentially eflags clobbering phi moves can be inserted in + // between. + auto HasPhis = [](const BasicBlock *Succ) { + return !llvm::empty(Succ->phis()); + }; + if (I->isTerminator() && llvm::any_of(successors(I), HasPhis)) + return false; + CC = TmpCC; return true; } diff --git a/llvm/test/CodeGen/X86/pr49587.ll b/llvm/test/CodeGen/X86/pr49587.ll index 343f1a0149c0..7dc54a526608 100644 --- a/llvm/test/CodeGen/X86/pr49587.ll +++ b/llvm/test/CodeGen/X86/pr49587.ll @@ -5,10 +5,11 @@ define i32 @test(i64 %arg) nounwind { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $1, %rdi -; CHECK-NEXT: setb %al +; CHECK-NEXT: setb %cl ; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: jb .LBB0_2 +; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: # %no_overflow ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill From 5b3480610383ba281ef0c7918a6c097058a408d4 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 12 Mar 2021 14:15:27 -0500 Subject: [PATCH 200/318] [InstCombine] add test for zext-of-icmps; NFC PR49475 shows an infinite loop outcome, but this tries to show the root cause with a minimal test. (cherry picked from commit 579b8fc2e97c489308f97b01d13d894c03c0a16c) --- .../Transforms/InstCombine/zext-or-icmp.ll | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index a77aa7ac7ebd..54ae0858aa67 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -106,3 +106,23 @@ block2: %conv2 = zext i1 %cmp1 to i32 ret i32 %conv2 } + +; FIXME: This should not end with more instructions than it started from. + +define i32 @PR49475(i32 %x, i16 %y) { +; CHECK-LABEL: @PR49475( +; CHECK-NEXT: [[M:%.*]] = and i16 [[Y:%.*]], 1 +; CHECK-NEXT: [[B1:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[B11:%.*]] = zext i1 [[B1]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[M]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[Z3:%.*]] = or i32 [[B11]], [[TMP2]] +; CHECK-NEXT: ret i32 [[Z3]] +; + %m = and i16 %y, 1 + %b1 = icmp eq i32 %x, 0 + %b2 = icmp eq i16 %m, 0 + %t1 = or i1 %b1, %b2 + %z = zext i1 %t1 to i32 + ret i32 %z +} From ff2cf8fafa5ad9a76e59fa086d969d4e2ecc3a39 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 13 Mar 2021 08:26:27 -0500 Subject: [PATCH 201/318] [InstCombine] avoid creating an extra instruction in zext fold and possible inf-loop The structure of this fold is suspect vs. most of instcombine because it creates instructions and tries to delete them immediately after. If we don't have the operand types for the icmps, then we are not behaving as assumed. And as shown in PR49475, we can inf-loop. (cherry picked from commit 4224a36957420744756d6a6450eb6502a1bfadc3) --- .../InstCombine/InstCombineCasts.cpp | 1 + .../Transforms/InstCombine/zext-or-icmp.ll | 58 +++++++++++++++++-- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0b53007bb6dc..07e68c44416d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { ICmpInst *LHS = dyn_cast(SrcI->getOperand(0)); ICmpInst *RHS = dyn_cast(SrcI->getOperand(1)); if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index 54ae0858aa67..5ae3d8ea0dba 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -107,17 +107,16 @@ block2: ret i32 %conv2 } -; FIXME: This should not end with more instructions than it started from. +; This should not end with more instructions than it started from. define i32 @PR49475(i32 %x, i16 %y) { ; CHECK-LABEL: @PR49475( ; CHECK-NEXT: [[M:%.*]] = and i16 [[Y:%.*]], 1 ; CHECK-NEXT: [[B1:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[B11:%.*]] = zext i1 [[B1]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[M]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 -; CHECK-NEXT: [[Z3:%.*]] = or i32 [[B11]], [[TMP2]] -; CHECK-NEXT: ret i32 [[Z3]] +; CHECK-NEXT: [[B2:%.*]] = icmp eq i16 [[M]], 0 +; CHECK-NEXT: [[T1:%.*]] = or i1 [[B1]], [[B2]] +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[T1]] to i32 +; CHECK-NEXT: ret i32 [[Z]] ; %m = and i16 %y, 1 %b1 = icmp eq i32 %x, 0 @@ -126,3 +125,50 @@ define i32 @PR49475(i32 %x, i16 %y) { %z = zext i1 %t1 to i32 ret i32 %z } + +; This would infinite-loop. + +define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) { +; CHECK-LABEL: @PR49475_infloop( +; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[T0:%.*]], 0 +; CHECK-NEXT: [[B2:%.*]] = icmp eq i16 [[INSERT:%.*]], 0 +; CHECK-NEXT: [[T1:%.*]] = or i1 [[B]], [[B2]] +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[T1]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], [[T0]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], 140 +; CHECK-NEXT: [[XOR1:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[CONV16:%.*]] = sext i8 [[I162:%.*]] to i64 +; CHECK-NEXT: [[SUB17:%.*]] = sub i64 [[CONV16]], [[E:%.*]] +; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[SUB17]], 32 +; CHECK-NEXT: [[CONV18:%.*]] = ashr exact i64 [[SEXT]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[CONV18]], [[XOR1]] +; CHECK-NEXT: [[CONV19:%.*]] = zext i1 [[CMP]] to i16 +; CHECK-NEXT: [[OR21:%.*]] = or i16 [[CONV19]], [[INSERT]] +; CHECK-NEXT: [[TRUNC44:%.*]] = trunc i16 [[OR21]] to i8 +; CHECK-NEXT: [[INC:%.*]] = or i8 [[TRUNC44]], [[I162]] +; CHECK-NEXT: [[TOBOOL23_NOT:%.*]] = icmp eq i16 [[OR21]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TOBOOL23_NOT]]) +; CHECK-NEXT: ret i8 [[INC]] +; + %b = icmp eq i32 %t0, 0 + %b2 = icmp eq i16 %insert, 0 + %t1 = or i1 %b, %b2 + %ext = zext i1 %t1 to i32 + %and = and i32 %t0, %ext + %conv13 = zext i32 %and to i64 + %xor = xor i64 %conv13, 140 + %conv16 = sext i8 %i162 to i64 + %sub17 = sub i64 %conv16, %e + %sext = shl i64 %sub17, 32 + %conv18 = ashr exact i64 %sext, 32 + %cmp = icmp sge i64 %xor, %conv18 + %conv19 = zext i1 %cmp to i16 + %or21 = or i16 %insert, %conv19 + %trunc44 = trunc i16 %or21 to i8 + %inc = add i8 %i162, %trunc44 + %tobool23.not = icmp eq i16 %or21, 0 + call void @llvm.assume(i1 %tobool23.not) + ret i8 %inc +} + +declare void @llvm.assume(i1 noundef) From 9ae9ab1ca34384e07b751c16645e22a0b953b08b Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Tue, 30 Mar 2021 14:30:15 -0700 Subject: [PATCH 202/318] [RISCV][MC] Fix nf encoding for vector ld/st whole register The three bit nf is one less than the number of NFIELDS, so we manually decrement 1 for VS1/2/4/8R & VL1/2/4/8R. Differential revision: https://reviews.llvm.org/D98185 (cherry picked from commit rG5cdb2e98608bf57c216ee7067e8a12d070c9e2bd) --- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 16 +++--- llvm/test/MC/RISCV/rvv/aliases.s | 16 +++--- llvm/test/MC/RISCV/rvv/load.s | 64 ++++++++++++------------ llvm/test/MC/RISCV/rvv/store.s | 16 +++--- 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 86fbc73d81d5..b3fc76aee161 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -504,19 +504,19 @@ def VSOXEI16_V : VIndexedStore; def VSOXEI32_V : VIndexedStore; def VSOXEI64_V : VIndexedStore; -defm VL1R : VWholeLoad<1, "vl1r">; -defm VL2R : VWholeLoad<2, "vl2r">; -defm VL4R : VWholeLoad<4, "vl4r">; -defm VL8R : VWholeLoad<8, "vl8r">; +defm VL1R : VWholeLoad<0, "vl1r">; +defm VL2R : VWholeLoad<1, "vl2r">; +defm VL4R : VWholeLoad<3, "vl4r">; +defm VL8R : VWholeLoad<7, "vl8r">; def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>; -def VS1R_V : VWholeStore<1, "vs1r.v">; -def VS2R_V : VWholeStore<2, "vs2r.v">; -def VS4R_V : VWholeStore<4, "vs4r.v">; -def VS8R_V : VWholeStore<8, "vs8r.v">; +def VS1R_V : VWholeStore<0, "vs1r.v">; +def VS2R_V : VWholeStore<1, "vs2r.v">; +def VS4R_V : VWholeStore<3, "vs4r.v">; +def VS8R_V : VWholeStore<7, "vs8r.v">; // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; diff --git a/llvm/test/MC/RISCV/rvv/aliases.s b/llvm/test/MC/RISCV/rvv/aliases.s index 2e5120c91e45..ebe9e79399a6 100644 --- a/llvm/test/MC/RISCV/rvv/aliases.s +++ b/llvm/test/MC/RISCV/rvv/aliases.s @@ -54,17 +54,17 @@ vmset.m v0 # ALIAS: vmnot.m v0, v1 # encoding: [0x57,0xa0,0x10,0x76] # NO-ALIAS: vmnand.mm v0, v1, v1 # encoding: [0x57,0xa0,0x10,0x76] vmnot.m v0, v1 -# ALIAS: vl1r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] -# NO-ALIAS: vl1re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] +# ALIAS: vl1r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] +# NO-ALIAS: vl1re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] vl1r.v v0, (a0) -# ALIAS: vl2r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x42] -# NO-ALIAS: vl2re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x42] +# ALIAS: vl2r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] +# NO-ALIAS: vl2re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] vl2r.v v0, (a0) -# ALIAS: vl4r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x82] -# NO-ALIAS: vl4re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x82] +# ALIAS: vl4r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x62] +# NO-ALIAS: vl4re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x62] vl4r.v v0, (a0) -# ALIAS: vl8r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] -# NO-ALIAS: vl8re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] +# ALIAS: vl8r.v v0, (a0) # encoding: [0x07,0x00,0x85,0xe2] +# NO-ALIAS: vl8re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0xe2] vl8r.v v0, (a0) # ALIAS: vneg.v v2, v1, v0.t # encoding: [0x57,0x41,0x10,0x0c] # NO-ALIAS: vrsub.vx v2, v1, zero, v0.t # encoding: [0x57,0x41,0x10,0x0c] diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s index 3d0dbb15c36e..45a3881cb60d 100644 --- a/llvm/test/MC/RISCV/rvv/load.s +++ b/llvm/test/MC/RISCV/rvv/load.s @@ -256,96 +256,96 @@ vloxei64.v v8, (a0), v4 vl1re8.v v8, (a0) # CHECK-INST: vl1re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x22] +# CHECK-ENCODING: [0x07,0x04,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 22 +# CHECK-UNKNOWN: 07 04 85 02 vl1re16.v v8, (a0) # CHECK-INST: vl1re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x22] +# CHECK-ENCODING: [0x07,0x54,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 22 +# CHECK-UNKNOWN: 07 54 85 02 vl1re32.v v8, (a0) # CHECK-INST: vl1re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x22] +# CHECK-ENCODING: [0x07,0x64,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 22 +# CHECK-UNKNOWN: 07 64 85 02 vl1re64.v v8, (a0) # CHECK-INST: vl1re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x22] +# CHECK-ENCODING: [0x07,0x74,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 22 +# CHECK-UNKNOWN: 07 74 85 02 vl2re8.v v8, (a0) # CHECK-INST: vl2re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x42] +# CHECK-ENCODING: [0x07,0x04,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 42 +# CHECK-UNKNOWN: 07 04 85 22 vl2re16.v v8, (a0) # CHECK-INST: vl2re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x42] +# CHECK-ENCODING: [0x07,0x54,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 42 +# CHECK-UNKNOWN: 07 54 85 22 vl2re32.v v8, (a0) # CHECK-INST: vl2re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x42] +# CHECK-ENCODING: [0x07,0x64,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 42 +# CHECK-UNKNOWN: 07 64 85 22 vl2re64.v v8, (a0) # CHECK-INST: vl2re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x42] +# CHECK-ENCODING: [0x07,0x74,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 42 +# CHECK-UNKNOWN: 07 74 85 22 vl4re8.v v8, (a0) # CHECK-INST: vl4re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x82] +# CHECK-ENCODING: [0x07,0x04,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 82 +# CHECK-UNKNOWN: 07 04 85 62 vl4re16.v v8, (a0) # CHECK-INST: vl4re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x82] +# CHECK-ENCODING: [0x07,0x54,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 82 +# CHECK-UNKNOWN: 07 54 85 62 vl4re32.v v8, (a0) # CHECK-INST: vl4re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x82] +# CHECK-ENCODING: [0x07,0x64,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 82 +# CHECK-UNKNOWN: 07 64 85 62 vl4re64.v v8, (a0) # CHECK-INST: vl4re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x82] +# CHECK-ENCODING: [0x07,0x74,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 82 +# CHECK-UNKNOWN: 07 74 85 62 vl8re8.v v8, (a0) # CHECK-INST: vl8re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x02] +# CHECK-ENCODING: [0x07,0x04,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 02 +# CHECK-UNKNOWN: 07 04 85 e2 vl8re16.v v8, (a0) # CHECK-INST: vl8re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x02] +# CHECK-ENCODING: [0x07,0x54,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 02 +# CHECK-UNKNOWN: 07 54 85 e2 vl8re32.v v8, (a0) # CHECK-INST: vl8re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x02] +# CHECK-ENCODING: [0x07,0x64,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 02 +# CHECK-UNKNOWN: 07 64 85 e2 vl8re64.v v8, (a0) # CHECK-INST: vl8re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x02] +# CHECK-ENCODING: [0x07,0x74,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 02 +# CHECK-UNKNOWN: 07 74 85 e2 diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s index e4795aa1c2c9..b5a75ac2d008 100644 --- a/llvm/test/MC/RISCV/rvv/store.s +++ b/llvm/test/MC/RISCV/rvv/store.s @@ -208,24 +208,24 @@ vsoxei64.v v24, (a0), v4 vs1r.v v24, (a0) # CHECK-INST: vs1r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x22] +# CHECK-ENCODING: [0x27,0x0c,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 22 +# CHECK-UNKNOWN: 27 0c 85 02 vs2r.v v24, (a0) # CHECK-INST: vs2r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x42] +# CHECK-ENCODING: [0x27,0x0c,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 42 +# CHECK-UNKNOWN: 27 0c 85 22 vs4r.v v24, (a0) # CHECK-INST: vs4r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x82] +# CHECK-ENCODING: [0x27,0x0c,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 82 +# CHECK-UNKNOWN: 27 0c 85 62 vs8r.v v24, (a0) # CHECK-INST: vs8r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x02] +# CHECK-ENCODING: [0x27,0x0c,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 02 +# CHECK-UNKNOWN: 27 0c 85 e2 From 31001be371e8f2c74470e727e54503fb2aabec8b Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Wed, 17 Mar 2021 16:59:55 +0100 Subject: [PATCH 203/318] [LoopVectorize] Refine hasIrregularType predicate The `hasIrregularType` predicate checks whether an array of N values of type Ty is "bitcast-compatible" with a vector. The previous check returned invalid results in some cases where there's some padding between the array elements: eg. a 4-element array of u7 values is considered as compatible with <4 x u7>, even though the vector is only loading/storing 28 bits instead of 32. The problem causes LLVM to generate incorrect code for some targets: for AArch64 the vector loads/stores are lowered in terms of ubfx/bfi, effectively losing the top (N * padding bits). Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D97465 (cherry picked from commit 4f024938e4c932feba4d28573ec4522106f8d879) --- .../Transforms/Vectorize/LoopVectorize.cpp | 22 +++++---------- .../LoopVectorize/irregular_type.ll | 27 +++++++++++++++++++ 2 files changed, 34 insertions(+), 15 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/irregular_type.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d36e078444bc..b456a97aa4ec 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) { /// A helper function that returns true if the given type is irregular. The /// type is irregular if its allocated size doesn't equal the store size of an -/// element of the corresponding vector type at the given vectorization factor. -static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - // Determine if an array of VF elements of type Ty is "bitcast compatible" - // with a vector. - if (VF.isVector()) { - auto *VectorTy = VectorType::get(Ty, VF); - return TypeSize::get(VF.getKnownMinValue() * - DL.getTypeAllocSize(Ty).getFixedValue(), - VF.isScalable()) != DL.getTypeStoreSize(VectorTy); - } - - // If the vectorization factor is one, we just check if an array of type Ty - // requires padding between elements. +/// element of the corresponding vector type. +static bool hasIrregularType(Type *Ty, const DataLayout &DL) { + // Determine if an array of N elements of type Ty is "bitcast compatible" + // with a vector. + // This is only true if there is no padding between the array elements. return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } @@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = getMemInstValueType(I); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; // Check if masking is required. @@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; return true; diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll new file mode 100644 index 000000000000..167a1a101e6f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +; Ensure the array loads/stores are not optimized into vector operations when +; the element type has padding bits. + +; CHECK: foo +; CHECK: vector.body +; CHECK-NOT: load <4 x i7> +; CHECK-NOT: store <4 x i7> +; CHECK: for.body +define void @foo(i7* %a, i64 %n) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv + %0 = load i7, i7* %arrayidx, align 1 + %sub = add nuw nsw i7 %0, 0 + store i7 %sub, i7* %arrayidx, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp = icmp eq i64 %indvars.iv.next, %n + br i1 %cmp, label %for.exit, label %for.body + +for.exit: + ret void +} From 04ba60cfe598e41084fb848daae47e0ed910fa7d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sun, 28 Mar 2021 16:30:47 -0700 Subject: [PATCH 204/318] [ORC][C-bindings] Fix some ORC C bindings function names and signatures. LLVMOrcDisposeObjectLayer and LLVMOrcExecutionSessionGetJITDylibByName did not have matching signatures between the C-API header and binding implementations. Fixes http://llvm.org/PR49745. Patch by Mats Larsen. Thanks Mats! Reviewed by: lhames Differential Revision: https://reviews.llvm.org/D99478 (cherry picked from commit 666df2e2cbe9fc252d3b2d6cbb214c2c2f6afc65) --- llvm/include/llvm-c/Orc.h | 7 ++++--- llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 183107c148a6..9beef44c89dd 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -339,8 +339,7 @@ LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT); * ownership has not been passed to a JITDylib (e.g. because some error * prevented the client from calling LLVMOrcJITDylibAddGenerator). */ -void LLVMOrcDisposeDefinitionGenerator( - LLVMOrcDefinitionGeneratorRef DG); +void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG); /** * Dispose of a MaterializationUnit. @@ -388,7 +387,9 @@ LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES, * Returns the JITDylib with the given name, or NULL if no such JITDylib * exists. */ -LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name); +LLVMOrcJITDylibRef +LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES, + const char *Name); /** * Return a reference to a newly created resource tracker associated with JD. diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index dfdd2c6c669f..834d4cc8f514 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder( delete unwrap(JTMB); } -void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) { +void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) { delete unwrap(ObjLayer); } From d28af7c654d8db0b68c175db5ce212d74fb5e9bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Tue, 6 Apr 2021 18:38:18 +0200 Subject: [PATCH 205/318] remove -fpch-codegen and -fpch-debuginfo from Clang 12.0 release notes These were new in 11.0. The commit adding the options landed after 11.x branch had already been branched off from master, and only then backported to 11.x, so the release notes change stayed for 12.0. --- clang/docs/ReleaseNotes.rst | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f3499d167361..4cc1b0b9d2cf 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -82,31 +82,6 @@ New Compiler Flags in that case. The option's behaviour mirrors GCC, the helpers are implemented both in compiler-rt and libgcc. -- -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo - for contents of a precompiled header in a separate object file. This object - file needs to be linked in, but its contents do not need to be generated - for other objects using the precompiled header. This should usually save - compile time. If not using clang-cl, the separate object file needs to - be created explicitly from the precompiled header. - Example of use: - - .. code-block:: console - - $ clang++ -x c++-header header.h -o header.pch -fpch-codegen -fpch-debuginfo - $ clang++ -c header.pch -o shared.o - $ clang++ -c source.cpp -o source.o -include-pch header.pch - $ clang++ -o binary source.o shared.o - - - Using -fpch-instantiate-templates when generating the precompiled header - usually increases the amount of code/debuginfo that can be shared. - - In some cases, especially when building with optimizations enabled, using - -fpch-codegen may generate so much code in the shared object that compiling - it may be a net loss in build time. - - Since headers may bring in private symbols of other libraries, it may be - sometimes necessary to discard unused symbols (such as by adding - -Wl,--gc-sections on ELF platforms to the linking command, and possibly - adding -fdata-sections -ffunction-sections to the command generating - the shared object). - New option ``-fbinutils-version=`` specifies the targeted binutils version. For example, ``-fbinutils-version=2.35`` means compatibility with GNU as/ld before 2.35 is not needed: new features can be used and there is no need to From fa0971b87fb2c9d14d1bba2551e61f02f18f329b Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 1 Feb 2021 12:43:33 +0000 Subject: [PATCH 206/318] GlobalISel: check type size before getZExtValue()ing it. Otherwise getZExtValue() asserts. (cherry picked from commit c2b322fc19e829162ed4c7dcd04d9e9b2cd4e66c) --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 5 ++--- .../CodeGen/AArch64/GlobalISel/huge-switch.ll | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b97c369b832d..b7883cbc3120 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -840,9 +840,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, // just re-use the existing condition vreg. - if (CI && CI->getZExtValue() == 1 && - MRI->getType(CondLHS).getSizeInBits() == 1 && - CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && + CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { Cond = CondLHS; } else { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll new file mode 100644 index 000000000000..8742a848c4af --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -o - -O0 -global-isel=1 | FileCheck %s +define void @foo(i512 %in) { +; CHECK-LABEL: foo: +; CHECK: cbz + switch i512 %in, label %default [ + i512 3923188584616675477397368389504791510063972152790021570560, label %l1 + i512 3923188584616675477397368389504791510063972152790021570561, label %l2 + i512 3923188584616675477397368389504791510063972152790021570562, label %l3 + ] + +default: + ret void + +l1: + ret void + +l2: + ret void + +l3: + ret void +} From 757752f568db698e3c0c35065c008489f2319a7b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 22 Apr 2021 15:03:36 -0700 Subject: [PATCH 207/318] Bump version to 12.0.1 --- libcxx/CMakeLists.txt | 2 +- libcxxabi/CMakeLists.txt | 2 +- libunwind/CMakeLists.txt | 2 +- llvm/CMakeLists.txt | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- llvm/utils/release/build_llvm_package.bat | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 9bf1a02f0908..cdd5495e36ab 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -29,7 +29,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL project(libcxx CXX C) set(PACKAGE_NAME libcxx) - set(PACKAGE_VERSION 12.0.0) + set(PACKAGE_VERSION 12.0.1) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 426c855288fc..6de2b5a2ed10 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -28,7 +28,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXXABI_STANDALONE_B project(libcxxabi CXX C) set(PACKAGE_NAME libcxxabi) - set(PACKAGE_VERSION 11.0.0) + set(PACKAGE_VERSION 12.0.1) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index 48cb8e004e08..570b8db90653 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -24,7 +24,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B project(libunwind LANGUAGES C CXX ASM) set(PACKAGE_NAME libunwind) - set(PACKAGE_VERSION 12.0.0) + set(PACKAGE_VERSION 12.0.1) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 277d0fe54d7b..28ccef34d8fc 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -11,7 +11,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 1) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX "") diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index a66a92550a00..942974f7cf91 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 12 llvm_version_minor = 0 -llvm_version_patch = 0 +llvm_version_patch = 1 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index 9c2aa512e179..c281391466b8 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Daniel Dunbar' __email__ = 'daniel@minormatter.com' -__versioninfo__ = (12, 0, 0) +__versioninfo__ = (12, 0, 1) __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev' __all__ = [] diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_package.bat index 35dcd9f613c4..4747c70ca392 100755 --- a/llvm/utils/release/build_llvm_package.bat +++ b/llvm/utils/release/build_llvm_package.bat @@ -27,8 +27,8 @@ set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python36 for /f "usebackq" %%i in (`PowerShell ^(Get-Date^).ToString^('yyyyMMdd'^)`) do set datestamp=%%i set revision=%1 -set package_version=12.0.0-%revision:~0,8% -set clang_format_vs_version=12.0.0.%datestamp% +set package_version=12.0.1-%revision:~0,8% +set clang_format_vs_version=12.0.1.%datestamp% set build_dir=llvm_package_%revision:~0,8% echo Revision: %revision% From eae7f3e3d45077a509a37bb2f2ff36b8196a855e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 18 Mar 2021 14:08:10 +0200 Subject: [PATCH 208/318] [lit] Pass the USERPROFILE variable through on Windows When running in a Windows Container, the Git for Windows Unix tools (C:\Program Files\Git\usr\bin) just hang if this variable isn't passed through. Currently, running the LLVM/clang tests in a Windows Container fails if that directory is added to the path, but succeeds after this change. (After this change, the previously used GnuWin tools can be left out entirely, too, as lit automatically picks up the Git for Windows tools if necessary.) Differential Revision: https://reviews.llvm.org/D98858 (cherry picked from commit 9de63b2e051cb3e79645cc20b83b4d33d132cba0) --- llvm/utils/lit/lit/TestingConfig.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py index 38d05066a2b0..e6c1b937c27a 100644 --- a/llvm/utils/lit/lit/TestingConfig.py +++ b/llvm/utils/lit/lit/TestingConfig.py @@ -33,6 +33,7 @@ def fromdefaults(litConfig): pass_vars.append('INCLUDE') pass_vars.append('LIB') pass_vars.append('PATHEXT') + pass_vars.append('USERPROFILE') environment['PYTHONBUFFERED'] = '1' for var in pass_vars: From 072c90a863aac1334a4950b3da262a025516dea0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 24 Mar 2021 23:58:54 +0200 Subject: [PATCH 209/318] [LLD] Fix probing a MSYS based 'tar' in a Windows Container Don't run the 'tar' tool in a cleared environment with only the LANG variable set, just set LANG on top of the existing environment. If the 'tar' tool is an MSYS based tool, running it in a Windows Container hangs if all environment variables are cleared - in particular, the USERPROFILE variable needs to be kept intact. This is the same issue fixed as was fixed in other places in 9de63b2e051cb3e79645cc20b83b4d33d132cba0, but contrary to running the actual tests, running with an as-cleared-as-possible environment here is less important. Differential Revision: https://reviews.llvm.org/D99304 (cherry picked from commit a88556733a4dced22416bd3f45255128b9eb4f49) --- lld/test/lit.cfg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 8e31fd3977f9..670f41f0b631 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -101,11 +101,13 @@ tar_executable = lit.util.which('tar', config.environment['PATH']) if tar_executable: + env = os.environ + env['LANG'] = 'C' tar_version = subprocess.Popen( [tar_executable, '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env={'LANG': 'C'}) + env=env) sout, _ = tar_version.communicate() if 'GNU tar' in sout.decode(): config.available_features.add('gnutar') From 25dd67ef882c327f9b6a3082cab6c33c9ff52d42 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 29 Apr 2021 23:26:25 -0700 Subject: [PATCH 210/318] [X86][AVX] foldShuffleOfHorizOp - don't attempt to handle 256-bit X86ISD::VBROADCAST (PR49971) NOTE: This is for the 12.x release branch ONLY Minimal patch to avoid the issue encountered in PR49971 (it's already been dealt with in trunk through a larger refactor that can't be easily merged). Bail for non-128-bit vector broadcasts of (F)HADD/SUB ops - the existing logic doesn't correctly deal with the fact that the broadcast will splat across the 128-bit lanes. Reviewed By: spatel, wristow Differential Revision: https://reviews.llvm.org/D101104 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++ llvm/test/CodeGen/X86/horizontal-shuffle-3.ll | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b816c710f98..1e2407c7e7f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37889,6 +37889,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) { // replicating low and high halves (and without changing the type/length of // the vector), we don't need the shuffle. if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) { + if (Opcode == X86ISD::VBROADCAST && !VT.is128BitVector()) + return SDValue(); if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) { // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X diff --git a/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll b/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll index 424ecf352e97..297070ad2bb6 100644 --- a/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll +++ b/llvm/test/CodeGen/X86/horizontal-shuffle-3.ll @@ -98,6 +98,17 @@ define <8 x i32> @test_unpackhi_hsub_v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> ret <8 x i32> %7 } +define <4 x double> @PR49971(<4 x double> %0) { +; CHECK-LABEL: PR49971: +; CHECK: ## %bb.0: +; CHECK-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %2 = tail call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %0, <4 x double> %0) + %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> + ret <4 x double> %3 +} + declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) From 0cbbf06b625605fff83d89b17c2187c7ccfcecd5 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 8 Apr 2021 15:37:32 +0200 Subject: [PATCH 211/318] [clangd] Log a message when gRPC support is off, but remote-index is configured Before this change clangd would emit a diagnostic whenever remote-index was configured but binary didn't have grpc support. This can be annoying when projects are configuring remote-index through their configs but developers have a clangd binary without the support. Differential Revision: https://reviews.llvm.org/D100103 (cherry picked from commit b9b708eef8cb7bcb073361283cd573beb04992a9) --- clang-tools-extra/clangd/ConfigCompile.cpp | 5 +++-- .../clangd/unittests/ConfigCompileTests.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index dadc578c3a81..b4f0d6186886 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -321,8 +321,9 @@ struct FragmentCompiler { llvm::SMRange BlockRange) { #ifndef CLANGD_ENABLE_REMOTE if (External.Server) { - diag(Error, "Clangd isn't compiled with remote index support, ignoring " - "Server." External.Server->Range); + elog("Clangd isn't compiled with remote index support, ignoring Server: " + "{0}", + *External.Server); External.Server.reset(); } #endif diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index d9aa171f3102..4961d3474fd9 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -324,15 +324,15 @@ TEST_F(ConfigCompileTests, ExternalBlockWarnOnMultipleSource) { External.Server.emplace(""); Frag.Index.External = std::move(External); compileAndApply(); - llvm::StringLiteral ExpectedDiag = #ifdef CLANGD_ENABLE_REMOTE - "Exactly one of File or Server must be set."; + EXPECT_THAT( + Diags.Diagnostics, + Contains(AllOf(DiagMessage("Exactly one of File or Server must be set."), + DiagKind(llvm::SourceMgr::DK_Error)))); #else - "Clangd isn't compiled with remote index support, ignoring Server."; + ASSERT_TRUE(Conf.Index.External.hasValue()); + EXPECT_EQ(Conf.Index.External->Kind, Config::ExternalIndexSpec::File); #endif - EXPECT_THAT(Diags.Diagnostics, - Contains(AllOf(DiagMessage(ExpectedDiag), - DiagKind(llvm::SourceMgr::DK_Error)))); } TEST_F(ConfigCompileTests, ExternalBlockErrOnNoSource) { From 907a751a38fff8d05b288ab52b19ba4e2cc1fc38 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 4 Apr 2021 23:15:29 +0300 Subject: [PATCH 212/318] [NFC][InstCombine] Add test for PR49778 (cherry picked from commit 5352490ce613f1bdedaf478765b089b1f0a8be0d) --- .../redundant-left-shift-input-masking-pr49778.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll new file mode 100644 index 000000000000..4865afa56a03 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; PR49778: this should not be folded to 0. +define i32 @src(i1 %x2) { +; CHECK-LABEL: @src( +; CHECK-NEXT: ret i32 0 +; + %x13 = zext i1 %x2 to i32 + %_7 = shl i32 4294967295, %x13 + %mask = xor i32 %_7, 4294967295 + %_8 = and i32 %mask, %x13 + %_9 = shl i32 %_8, %x13 + ret i32 %_9 +} From 4a4b1c75a1ea3f1ca90ef45470c42debb81ffc90 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 4 Apr 2021 23:23:10 +0300 Subject: [PATCH 213/318] [NFC][InstCombine] Extract canTryToConstantAddTwoShiftAmounts() as helper (cherry picked from commit dceb3e599668496420d41b993100d23eeb7c0ada) --- .../InstCombine/InstCombineShifts.cpp | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 7295369365c4..52f064e17820 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -21,6 +21,30 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" +bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1, + Value *ShAmt1) { + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now.. + if (ShAmt0->getType() != ShAmt1->getType()) + return false; + + // As input, we have the following pattern: + // Sh0 (Sh1 X, Q), K + // We want to rewrite that as: + // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) + // While we know that originally (Q+K) would not overflow + // (because 2 * (N-1) u<= iN -1), we have looked past extensions of + // shift amounts. so it may now overflow in smaller bitwidth. + // To ensure that does not happen, we need to ensure that the total maximal + // shift amount is still representable in that smaller bit width. + unsigned MaximalPossibleTotalShiftAmount = + (Sh0->getType()->getScalarSizeInBits() - 1) + + (Sh1->getType()->getScalarSizeInBits() - 1); + APInt MaximalRepresentableShiftAmount = + APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); + return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount); +} + // Given pattern: // (x shiftopcode Q) shiftopcode K // we should rewrite it as @@ -57,26 +81,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts( if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1))))) return nullptr; - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now.. - if (ShAmt0->getType() != ShAmt1->getType()) - return nullptr; - - // As input, we have the following pattern: - // Sh0 (Sh1 X, Q), K - // We want to rewrite that as: - // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) - // While we know that originally (Q+K) would not overflow - // (because 2 * (N-1) u<= iN -1), we have looked past extensions of - // shift amounts. so it may now overflow in smaller bitwidth. - // To ensure that does not happen, we need to ensure that the total maximal - // shift amount is still representable in that smaller bit width. - unsigned MaximalPossibleTotalShiftAmount = - (Sh0->getType()->getScalarSizeInBits() - 1) + - (Sh1->getType()->getScalarSizeInBits() - 1); - APInt MaximalRepresentableShiftAmount = - APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); - if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount)) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1)) return nullptr; // We are only looking for signbit extraction if we have two right shifts. From c27ad80507bfea35da07681fd4ec9972ca698015 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 4 Apr 2021 23:25:29 +0300 Subject: [PATCH 214/318] [InstCombine] dropRedundantMaskingOfLeftShiftInput(): check that adding shift amounts doesn't overflow (PR49778) This is identical to 781d077afb0ed9771c513d064c40170c1ccd21c9, but for the other function. For certain shift amount bit widths, we must first ensure that adding shift amounts is safe, that the sum won't have an unsigned overflow. Fixes https://bugs.llvm.org/show_bug.cgi?id=49778 (cherry picked from commit 2760a808b9916a2839513b7fd7314a464f52481e) --- .../lib/Transforms/InstCombine/InstCombineShifts.cpp | 12 ++++++------ .../redundant-left-shift-input-masking-pr49778.ll | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 52f064e17820..127bf8080959 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -226,9 +226,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now. - if (MaskShAmt->getType() != ShiftShAmt->getType()) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (MaskShAmt+ShiftShAmt) ? @@ -258,9 +258,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now. - if (MaskShAmt->getType() != ShiftShAmt->getType()) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (ShiftShAmt-MaskShAmt) ? diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll index 4865afa56a03..8d70733ed03d 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-pr49778.ll @@ -4,7 +4,12 @@ ; PR49778: this should not be folded to 0. define i32 @src(i1 %x2) { ; CHECK-LABEL: @src( -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: [[X13:%.*]] = zext i1 [[X2:%.*]] to i32 +; CHECK-NEXT: [[_7:%.*]] = shl i32 -1, [[X13]] +; CHECK-NEXT: [[MASK:%.*]] = xor i32 [[_7]], -1 +; CHECK-NEXT: [[_8:%.*]] = and i32 [[MASK]], [[X13]] +; CHECK-NEXT: [[_9:%.*]] = shl i32 [[_8]], [[X13]] +; CHECK-NEXT: ret i32 [[_9]] ; %x13 = zext i1 %x2 to i32 %_7 = shl i32 4294967295, %x13 From 3568d61f11e2eb0017c7b65707bee7bf4111c8ca Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 10 Feb 2021 12:17:37 -0800 Subject: [PATCH 215/318] BPF: Implement TTI.IntImmCost() properly This patch implemented TTI.IntImmCost() properly. Each BPF insn has 32bit immediate space, so for any immediate which can be represented as 32bit signed int, the cost is technically free. If an int cannot be presented as a 32bit signed int, a ld_imm64 instruction is needed and a TCC_Basic is returned. This change is motivated when we observed that several bpf selftests failed with latest llvm trunk, e.g., #10/16 strobemeta.o:FAIL #10/17 strobemeta_nounroll1.o:FAIL #10/18 strobemeta_nounroll2.o:FAIL #10/19 strobemeta_subprogs.o:FAIL #96 snprintf_btf:FAIL The reason of the failure is due to that SpeculateAroundPHIsPass did aggressive transformation which alters control flow for which currently verifer cannot handle well. In llvm12, SpeculateAroundPHIsPass is not called. SpeculateAroundPHIsPass relied on TTI.getIntImmCost() and TTI.getIntImmCostInst() for profitability analysis. This patch implemented TTI.getIntImmCost() properly for BPF backend which also prevented transformation which caused the above test failures. Differential Revision: https://reviews.llvm.org/D96448 (cherry picked from commit a260ae716030d5d2644a2af649501277d326bb21) --- llvm/lib/Target/BPF/BPFTargetMachine.cpp | 6 +++ llvm/lib/Target/BPF/BPFTargetMachine.h | 2 + llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 49 ++++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 llvm/lib/Target/BPF/BPFTargetTransformInfo.h diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index c0244b9f2c74..a8fef2517b03 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -12,6 +12,7 @@ #include "BPFTargetMachine.h" #include "BPF.h" +#include "BPFTargetTransformInfo.h" #include "MCTargetDesc/BPFMCAsmInfo.h" #include "TargetInfo/BPFTargetInfo.h" #include "llvm/CodeGen/Passes.h" @@ -145,6 +146,11 @@ void BPFPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } +TargetTransformInfo +BPFTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(BPFTTIImpl(this, F)); +} + // Install an instruction selector pass using // the ISelDag to gen BPF code. bool BPFPassConfig::addInstSelector() { diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h index 5243a15eb7b0..61c8a44cc402 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -34,6 +34,8 @@ class BPFTargetMachine : public LLVMTargetMachine { TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h new file mode 100644 index 000000000000..622da9a0a3f7 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -0,0 +1,49 @@ +//===------ BPFTargetTransformInfo.h - BPF specific TTI ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file uses the target's specific information to +// provide more precise answers to certain TTI queries, while letting the +// target independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H + +#include "BPFTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { +class BPFTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const BPFSubtarget *ST; + const BPFTargetLowering *TLI; + + const BPFSubtarget *getST() const { return ST; } + const BPFTargetLowering *getTLI() const { return TLI; } + +public: + explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { + if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + + return TTI::TCC_Basic; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H From f9efff398c1159b15964b166368b232f562e6cfc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 11 Feb 2021 10:00:32 -0800 Subject: [PATCH 216/318] BPF: Add LLVMAnalysis in CMakefile LINK_COMPONENTS buildbot reported a build error like below: BPFTargetMachine.cpp:(.text._ZN4llvm19TargetTransformInfo5ModelINS_10BPFTTIImplEED2Ev [_ZN4llvm19TargetTransformInfo5ModelINS_10BPFTTIImplEED2Ev]+0x14): undefined reference to `llvm::TargetTransformInfo::Concept::~Concept()' lib/Target/BPF/CMakeFiles/LLVMBPFCodeGen.dir/BPFTargetMachine.cpp.o: In function `llvm::TargetTransformInfo::Model::~Model()': Commit a260ae716030 ("BPF: Implement TTI.IntImmCost() properly") added TargetTransformInfo to BPF, which requires LLVMAnalysis dependence. In certain cmake configurations, lacking explicit LLVMAnalysis dependency may cause compilation error. Similar to other targets, this patch added LLVMAnalysis in CMakefile LINK_COMPONENTS explicitly. (cherry picked from commit 74975d35b47631da0c7911561f16d3ffd1af142a) --- llvm/lib/Target/BPF/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index 24c6c5e1255e..189a3a84c3df 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_target(BPFCodeGen BTFDebug.cpp LINK_COMPONENTS + Analysis AsmPrinter CodeGen Core From 2460947eefc2176693a4aa4d05cd9733e38c7ffe Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 24 Feb 2021 10:02:48 -0800 Subject: [PATCH 217/318] BPF: Implement TTI.getCmpSelInstrCost() properly The Select insn in BPF is expensive as BPF backend needs to resolve with conditionals. This patch set the getCmpSelInstrCost() to SCEVCheapExpansionBudget for Select insn to prevent some Select insn related optimizations. This change is motivated during bcc code review for https://github.com/iovisor/bcc/pull/3270 where IndVarSimplifyPass eventually caused generating the following asm code: ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) { 14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 15: bc 51 00 00 00 00 00 00 w1 = w5 16: 04 01 00 00 ff ff ff ff w1 += -1 17: 67 05 00 00 20 00 00 00 r5 <<= 32 18: 77 05 00 00 20 00 00 00 r5 >>= 32 19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 20: b7 05 00 00 06 00 00 00 r5 = 6 00000000000000a8 : 21: b7 02 00 00 00 00 00 00 r2 = 0 22: b7 01 00 00 00 00 00 00 r1 = 0 ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) { 23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1 24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5 Note that insn #15 has w1 = w5 and w1 is refined later but r5(w5) is eventually saved on stack at insn #24 for later use. This cause later verifier failures. With this change, IndVarSimplifyPass won't do the above transformation any more. Differential Revision: https://reviews.llvm.org/D97479 (cherry picked from commit 1959ead525b8830cc8a345f45e1c3ef9902d3229) --- llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 622da9a0a3f7..62055497e685 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -18,6 +18,7 @@ #include "BPFTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" namespace llvm { class BPFTTIImpl : public BasicTTIImplBase { @@ -42,6 +43,17 @@ class BPFTTIImpl : public BasicTTIImplBase { return TTI::TCC_Basic; } + + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const llvm::Instruction *I = nullptr) { + if (Opcode == Instruction::Select) + return SCEVCheapExpansionBudget; + + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); + } }; } // end namespace llvm From 6fe7c3728d1e98e05c67ceb03f429cb04a30e151 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 25 Feb 2021 15:34:48 -0800 Subject: [PATCH 218/318] BPF: Add LLVMTransformUtils in CMakefile LINK_COMPONENTS Commit 1959ead525b8 ("BPF: Implement TTI.getCmpSelInstrCost() properly") introduced a dependency on LLVMTransformUtils library. Let us encode this dependency explicitly in CMakefile to avoid build error. (cherry picked from commit 6d102f15a3af0a44cf2e26677e260bee425312f3) --- llvm/lib/Target/BPF/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index 189a3a84c3df..2d804ca8a73e 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(BPFCodeGen SelectionDAG Support Target + TransformUtils ADD_TO_COMPONENT BPF From b8e4d4eafeded48f3c07797a2d8ccc950394085e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 21 Apr 2021 00:09:53 -0500 Subject: [PATCH 219/318] [PollyACC] Fix implicit function definitions. NFC. The isl_id_* have been in used without including the correspodning isl/id.h header. According to rules in C, a function is defined implicitly when first used with an assumed int return type (32 bits on 64 bit systems). But the implementation returns a pointer (64 bits on 64 bit systems). Is usually has no consequence because the return value is stored in a registers that is 64 bits (RAX) and the optimizer does not truncate its value before using it again as a pointer value. However, LTO optimizers will be rightfull;y confused. Fix by including This fixes llvm.org/PR50021 (cherry picked from commit 90e5ce0b0d6b0e72fdc034cbb612f67d67de0fdd) --- polly/lib/External/ppcg/print.c | 1 + 1 file changed, 1 insertion(+) diff --git a/polly/lib/External/ppcg/print.c b/polly/lib/External/ppcg/print.c index 79aaf2b00d23..dd839e48e51b 100644 --- a/polly/lib/External/ppcg/print.c +++ b/polly/lib/External/ppcg/print.c @@ -9,6 +9,7 @@ #include #include +#include #include "print.h" #include "util.h" From 8b2c019ace3c2e04108b550c5a2b60fc1c63865f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 21 Apr 2021 10:12:23 -0500 Subject: [PATCH 220/318] [PollyACC] Fix declaration/stub definition mismatch. NFC. external.c defines stub functions that are never used because of how Polly uses PPCG. Unfortunately, they are declared as functions without return values or parameters which does not match their declarations. Since they are never called, this was usually not a problem, but an LTO build gets confused with differently declared functions, or in case of pet_options_args, a global variable declaration that is defined as a function Resolve by including the declaring headers in external.c which forces the declaration and definition to match at compile-time. This fixes llvm.org/50021 (cherry picked from commit 89b59345ee29d2cc1afa1f60445916ae2e74be6d) --- polly/lib/External/ppcg/external.c | 167 +++++++++++++++-------------- 1 file changed, 89 insertions(+), 78 deletions(-) diff --git a/polly/lib/External/ppcg/external.c b/polly/lib/External/ppcg/external.c index 3a63ffbd97e5..c5ef6320e64f 100644 --- a/polly/lib/External/ppcg/external.c +++ b/polly/lib/External/ppcg/external.c @@ -1,181 +1,192 @@ -#include "assert.h" -#include "stdio.h" -#include "stdlib.h" +#include +#include +#include +#include +#include "cpu.h" +#include "opencl.h" + #define die() { \ fprintf(stderr, "Dummy function %s called\n", __FUNCTION__); \ abort(); \ } -void pet_scop_compute_outer_to_any(){ +__isl_give isl_union_map *pet_scop_compute_outer_to_any( + __isl_keep pet_scop *scop) { die(); } -void pet_scop_compute_outer_to_inner(){ +__isl_give isl_union_map *pet_scop_compute_outer_to_inner( + __isl_keep pet_scop *scop) { die(); } -void pet_tree_get_type(){ +enum pet_tree_type pet_tree_get_type(__isl_keep pet_tree *tree) { die(); } -void pet_tree_foreach_access_expr(){ +int pet_tree_foreach_access_expr(__isl_keep pet_tree *tree, + int (*fn)(__isl_keep pet_expr *expr, void *user), void *user) { die(); } -void pet_expr_get_ctx(){ +isl_ctx *pet_expr_get_ctx(__isl_keep pet_expr *expr) { die(); } -void pet_expr_access_is_read(){ +isl_bool pet_expr_access_is_read(__isl_keep pet_expr *expr) { die(); } -void pet_expr_access_is_write(){ +isl_bool pet_expr_access_is_write(__isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_tagged_may_read(){ +__isl_give isl_union_map *pet_expr_access_get_tagged_may_read( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_tagged_may_write(){ +__isl_give isl_union_map *pet_expr_access_get_tagged_may_write( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_must_write(){ +__isl_give isl_union_map *pet_expr_access_get_must_write( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_index(){ +__isl_give isl_multi_pw_aff *pet_expr_access_get_index( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_ref_id(){ +__isl_give isl_id *pet_expr_access_get_ref_id(__isl_keep pet_expr *expr) { die(); } -void print_cpu(){ +__isl_give isl_printer *print_cpu(__isl_take isl_printer *p, + struct ppcg_scop *ps, struct ppcg_options *options) { die(); } -void pet_stmt_print_body(){ - die(); -} -void pet_loc_get_start(){ - die(); -} -void pet_loc_get_end(){ - die(); -} -void pet_scop_collect_tagged_may_reads(){ - die(); -} -void pet_scop_collect_may_reads(){ +__isl_give isl_printer *pet_stmt_print_body(struct pet_stmt *stmt, + __isl_take isl_printer *p, __isl_keep isl_id_to_ast_expr *ref2expr) { die(); } -void pet_scop_collect_tagged_may_writes(){ +unsigned pet_loc_get_start(__isl_keep pet_loc *loc) { die(); } -void pet_scop_collect_may_writes(){ +unsigned pet_loc_get_end(__isl_keep pet_loc *loc) { die(); } -void pet_scop_collect_tagged_must_writes(){ +int pet_transform_C_source(isl_ctx *ctx, const char *input, FILE *output, + __isl_give isl_printer *(*transform)(__isl_take isl_printer *p, + __isl_take pet_scop *scop, void *user), void *user) { die(); } -void pet_scop_collect_must_writes(){ +__isl_give isl_printer *pet_scop_print_original(__isl_keep pet_scop *scop, + __isl_take isl_printer *p) { die(); } -void pet_scop_collect_tagged_must_kills(){ +__isl_null pet_scop *pet_scop_free(__isl_take pet_scop *scop) { die(); } -void pet_transform_C_source(){ +__isl_give pet_scop *pet_scop_align_params(__isl_take pet_scop *scop) { die(); } -void pet_scop_print_original(){ +int pet_scop_can_build_ast_exprs(__isl_keep pet_scop *scop) { die(); } -void pet_scop_free(){ +int pet_scop_has_data_dependent_conditions(__isl_keep pet_scop *scop) { die(); } -void pet_scop_align_params(){ +int pet_tree_foreach_expr(__isl_keep pet_tree *tree, + int (*fn)(__isl_keep pet_expr *expr, void *user), void *user) { die(); } -void pet_scop_can_build_ast_exprs(){ +int pet_expr_foreach_call_expr(__isl_keep pet_expr *expr, + int (*fn)(__isl_keep pet_expr *expr, void *user), void *user) { die(); } -void pet_scop_has_data_dependent_conditions(){ +int pet_stmt_is_kill(struct pet_stmt *stmt) { die(); } -void pet_tree_foreach_expr(){ +struct isl_args pet_options_args; +const char *ppcg_version(void) { die(); } -void pet_expr_foreach_call_expr(){ +int pet_options_set_encapsulate_dynamic_control(isl_ctx *ctx, int val) { die(); } -void pet_stmt_is_kill(){ +int generate_opencl(isl_ctx *ctx, struct ppcg_options *options, + const char *input, const char *output) { die(); } -void pet_options_args() { +int generate_cpu(isl_ctx *ctx, struct ppcg_options *options, + const char *input, const char *output) { die(); } -void ppcg_print_guarded() { +__isl_give isl_id_to_ast_expr *pet_stmt_build_ast_exprs(struct pet_stmt *stmt, + __isl_keep isl_ast_build *build, + __isl_give isl_multi_pw_aff *(*fn_index)( + __isl_take isl_multi_pw_aff *mpa, __isl_keep isl_id *id, + void *user), void *user_index, + __isl_give isl_ast_expr *(*fn_expr)(__isl_take isl_ast_expr *expr, + __isl_keep isl_id *id, void *user), void *user_expr) { die(); } -void ppcg_version() { +__isl_give isl_union_map *pet_scop_get_tagged_may_reads( + __isl_keep pet_scop *scop) { die(); } -void pet_options_set_encapsulate_dynamic_control() { +__isl_give isl_union_map *pet_scop_get_may_reads(__isl_keep pet_scop *scop) { die(); } -void generate_opencl() { +__isl_give isl_union_map *pet_scop_get_may_writes(__isl_keep pet_scop *scop) { die(); } -void generate_cpu() { +__isl_give isl_union_map *pet_scop_get_must_writes(__isl_keep pet_scop *scop) { die(); } -void pet_stmt_build_ast_exprs() { +__isl_give isl_union_map *pet_scop_get_tagged_may_writes( + __isl_keep pet_scop *scop) { die(); } - void pet_scop_get_tagged_may_reads() { +__isl_give isl_union_map *pet_scop_get_tagged_must_writes( + __isl_keep pet_scop *scop) { die(); } - void pet_scop_get_may_reads() { - die(); -} -void pet_scop_get_may_writes() { - die(); -} -void pet_scop_get_must_writes() { - die(); -} -void pet_scop_get_tagged_may_writes() { - die(); -} -void pet_scop_get_tagged_must_writes() { -die(); -} -void pet_scop_get_must_kills() { +__isl_give isl_union_map *pet_scop_get_must_kills(__isl_keep pet_scop *scop) { die(); } -void pet_scop_get_tagged_must_kills() { +__isl_give isl_union_map *pet_scop_get_tagged_must_kills( + __isl_keep pet_scop *scop) { die(); } -void pet_expr_call_get_name() { +__isl_keep const char *pet_expr_call_get_name(__isl_keep pet_expr *expr) { die(); } -void pet_expr_call_set_name() { +__isl_give pet_expr *pet_expr_call_set_name(__isl_take pet_expr *expr, + __isl_keep const char *name) { die(); } -void pet_expr_get_arg() { +__isl_give pet_expr *pet_expr_get_arg(__isl_keep pet_expr *expr, int pos) { die(); } -void pet_expr_new_cast() { +__isl_give pet_expr *pet_expr_new_cast(const char *type_name, + __isl_take pet_expr *arg) { die(); } -void pet_expr_set_arg() { +__isl_give pet_expr *pet_expr_set_arg(__isl_take pet_expr *expr, int pos, + __isl_take pet_expr *arg) { die(); } -void pet_tree_copy() { +__isl_give pet_tree *pet_tree_copy(__isl_keep pet_tree *tree) { die(); } -void pet_tree_free() { +__isl_null pet_tree *pet_tree_free(__isl_take pet_tree *tree) { die(); } -void pet_tree_map_call_expr() { +__isl_give pet_tree *pet_tree_map_call_expr(__isl_take pet_tree *tree, + __isl_give pet_expr *(*fn)(__isl_take pet_expr *expr, void *user), + void *user) { die(); } -void pet_expr_access_get_may_read() { +__isl_give isl_union_map *pet_expr_access_get_may_read( + __isl_keep pet_expr *expr) { die(); } -void pet_expr_access_get_may_write() { +__isl_give isl_union_map *pet_expr_access_get_may_write( + __isl_keep pet_expr *expr) { die(); } From 3263c81589eca689341ab5084723bdb7fe4a1286 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 11 Feb 2021 22:28:19 +0000 Subject: [PATCH 221/318] Partially Revert "scan-view: Remove Reporter.py and associated AppleScript files" This reverts some of commit dbb01536f6f49fa428f170e34466072ef439b3e9. The Reporter module was still being used by the ScanView.py module and deleting it caused scan-view to fail. This commit adds back Reporter.py but removes the code the references the AppleScript files which were removed in dbb01536f6f49fa428f170e34466072ef439b3e9. Reviewed By: NoQ Differential Revision: https://reviews.llvm.org/D96367 (cherry picked from commit e3cd3a3c91524c957e06bb0170343548f02b6842) --- clang/tools/scan-view/CMakeLists.txt | 1 + clang/tools/scan-view/share/Reporter.py | 183 ++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 clang/tools/scan-view/share/Reporter.py diff --git a/clang/tools/scan-view/CMakeLists.txt b/clang/tools/scan-view/CMakeLists.txt index dd3d33439299..eccc6b83195b 100644 --- a/clang/tools/scan-view/CMakeLists.txt +++ b/clang/tools/scan-view/CMakeLists.txt @@ -5,6 +5,7 @@ set(BinFiles set(ShareFiles ScanView.py + Reporter.py startfile.py bugcatcher.ico) diff --git a/clang/tools/scan-view/share/Reporter.py b/clang/tools/scan-view/share/Reporter.py new file mode 100644 index 000000000000..31a14fb0cf74 --- /dev/null +++ b/clang/tools/scan-view/share/Reporter.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Methods for reporting bugs.""" + +import subprocess, sys, os + +__all__ = ['ReportFailure', 'BugReport', 'getReporters'] + +# + +class ReportFailure(Exception): + """Generic exception for failures in bug reporting.""" + def __init__(self, value): + self.value = value + +# Collect information about a bug. + +class BugReport(object): + def __init__(self, title, description, files): + self.title = title + self.description = description + self.files = files + +# Reporter interfaces. + +import os + +import email, mimetypes, smtplib +from email import encoders +from email.message import Message +from email.mime.base import MIMEBase +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +#===------------------------------------------------------------------------===# +# ReporterParameter +#===------------------------------------------------------------------------===# + +class ReporterParameter(object): + def __init__(self, n): + self.name = n + def getName(self): + return self.name + def getValue(self,r,bugtype,getConfigOption): + return getConfigOption(r.getName(),self.getName()) + def saveConfigValue(self): + return True + +class TextParameter (ReporterParameter): + def getHTML(self,r,bugtype,getConfigOption): + return """\ + +%s: + +"""%(self.getName(),r.getName(),self.getName(),self.getValue(r,bugtype,getConfigOption)) + +class SelectionParameter (ReporterParameter): + def __init__(self, n, values): + ReporterParameter.__init__(self,n) + self.values = values + + def getHTML(self,r,bugtype,getConfigOption): + default = self.getValue(r,bugtype,getConfigOption) + return """\ + +%s:"""%(self.getName(),r.getName(),self.getName(),'\n'.join(["""\ +"""%(o[0], + o[0] == default and ' selected="selected"' or '', + o[1]) for o in self.values])) + +#===------------------------------------------------------------------------===# +# Reporters +#===------------------------------------------------------------------------===# + +class EmailReporter(object): + def getName(self): + return 'Email' + + def getParameters(self): + return [TextParameter(x) for x in ['To', 'From', 'SMTP Server', 'SMTP Port']] + + # Lifted from python email module examples. + def attachFile(self, outer, path): + # Guess the content type based on the file's extension. Encoding + # will be ignored, although we should check for simple things like + # gzip'd or compressed files. + ctype, encoding = mimetypes.guess_type(path) + if ctype is None or encoding is not None: + # No guess could be made, or the file is encoded (compressed), so + # use a generic bag-of-bits type. + ctype = 'application/octet-stream' + maintype, subtype = ctype.split('/', 1) + if maintype == 'text': + fp = open(path) + # Note: we should handle calculating the charset + msg = MIMEText(fp.read(), _subtype=subtype) + fp.close() + else: + fp = open(path, 'rb') + msg = MIMEBase(maintype, subtype) + msg.set_payload(fp.read()) + fp.close() + # Encode the payload using Base64 + encoders.encode_base64(msg) + # Set the filename parameter + msg.add_header('Content-Disposition', 'attachment', filename=os.path.basename(path)) + outer.attach(msg) + + def fileReport(self, report, parameters): + mainMsg = """\ +BUG REPORT +--- +Title: %s +Description: %s +"""%(report.title, report.description) + + if not parameters.get('To'): + raise ReportFailure('No "To" address specified.') + if not parameters.get('From'): + raise ReportFailure('No "From" address specified.') + + msg = MIMEMultipart() + msg['Subject'] = 'BUG REPORT: %s'%(report.title) + # FIXME: Get config parameters + msg['To'] = parameters.get('To') + msg['From'] = parameters.get('From') + msg.preamble = mainMsg + + msg.attach(MIMEText(mainMsg, _subtype='text/plain')) + for file in report.files: + self.attachFile(msg, file) + + try: + s = smtplib.SMTP(host=parameters.get('SMTP Server'), + port=parameters.get('SMTP Port')) + s.sendmail(msg['From'], msg['To'], msg.as_string()) + s.close() + except: + raise ReportFailure('Unable to send message via SMTP.') + + return "Message sent!" + +class BugzillaReporter(object): + def getName(self): + return 'Bugzilla' + + def getParameters(self): + return [TextParameter(x) for x in ['URL','Product']] + + def fileReport(self, report, parameters): + raise NotImplementedError + + +class RadarClassificationParameter(SelectionParameter): + def __init__(self): + SelectionParameter.__init__(self,"Classification", + [['1', 'Security'], ['2', 'Crash/Hang/Data Loss'], + ['3', 'Performance'], ['4', 'UI/Usability'], + ['6', 'Serious Bug'], ['7', 'Other']]) + + def saveConfigValue(self): + return False + + def getValue(self,r,bugtype,getConfigOption): + if bugtype.find("leak") != -1: + return '3' + elif bugtype.find("dereference") != -1: + return '2' + elif bugtype.find("missing ivar release") != -1: + return '3' + else: + return '7' + +### + +def getReporters(): + reporters = [] + reporters.append(EmailReporter()) + return reporters + From c1831fc655979a0501a792f730d84d68e15a888e Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Wed, 17 Mar 2021 19:08:46 -0700 Subject: [PATCH 222/318] [RISCV] Fix isel pattern of masked vmslt[u] This patch changes the operand order of masked vmslt[u] from (mask, rs1, scalar, maskedoff, vl) to (maskedoff, rs1, scalar, mask, vl). Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D98839 (cherry picked from commit fca5d63aa8d43a21557874d9bc040e944ab0500d) --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 20 +++--- llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll | 60 ++++++++++++---- llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll | 72 ++++++++++++++----- llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll | 60 ++++++++++++---- llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll | 72 ++++++++++++++----- 5 files changed, 208 insertions(+), 76 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 60bd1b24cab8..5c228820f0cc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3909,10 +3909,10 @@ foreach vti = AllIntegerVectors in { (DecImm simm5_plus1:$rs2), GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0), + def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask VR:$merge), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK") VR:$merge, @@ -3922,17 +3922,17 @@ foreach vti = AllIntegerVectors in { GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask VR:$merge), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK") VR:$merge, @@ -3950,11 +3950,11 @@ foreach vti = AllIntegerVectors in { vti.RegClass:$rs1, GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), - (vti.Mask VR:$merge), - (XLenVT (VLOp GPR:$vl)))), + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar 0), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll index 894a232a167d..a51949573c97 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll @@ -1504,9 +1504,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i8.i8( @@ -1537,9 +1539,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i8.i8( @@ -1570,9 +1574,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i8.i8( @@ -1603,9 +1609,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i8.i8( @@ -1636,9 +1644,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -1669,9 +1679,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -1702,9 +1714,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i16.i16( @@ -1735,9 +1749,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i16.i16( @@ -1768,9 +1784,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i16.i16( @@ -1801,9 +1819,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -1834,9 +1854,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -1867,9 +1889,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i32.i32( @@ -1900,9 +1924,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i32.i32( @@ -1933,9 +1959,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -1966,9 +1994,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i32.i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll index 7802c2a84ff8..c75f37c440d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll @@ -1801,9 +1801,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i8.i8( @@ -1834,9 +1836,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i8.i8( @@ -1867,9 +1871,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i8.i8( @@ -1900,9 +1906,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i8.i8( @@ -1933,9 +1941,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -1966,9 +1976,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -1999,9 +2011,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i16.i16( @@ -2032,9 +2046,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, -1, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i16.i16( @@ -2065,9 +2081,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i16.i16( @@ -2098,9 +2116,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -2131,9 +2151,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -2164,9 +2186,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i32.i32( @@ -2197,9 +2221,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i32.i32( @@ -2230,9 +2256,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -2263,9 +2291,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv8i32.i32( @@ -2296,9 +2326,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu -; CHECK-NEXT: vmsle.vi v9, v8, 14, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsle.vi v25, v8, 14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv1i64.i64( @@ -2329,9 +2361,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu -; CHECK-NEXT: vmsle.vi v10, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v25, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv2i64.i64( @@ -2362,9 +2396,11 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu -; CHECK-NEXT: vmsle.vi v12, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v25, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmslt.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll index 8cd17ead0234..ca78411b24c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll @@ -1504,9 +1504,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i8.i8( @@ -1537,9 +1539,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i8.i8( @@ -1570,9 +1574,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i8.i8( @@ -1603,9 +1609,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i8.i8( @@ -1636,9 +1644,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv16i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -1669,9 +1679,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv32i8_i8( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -1702,9 +1714,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i16.i16( @@ -1735,9 +1749,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsne.vv v9, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsne.vv v25, v8, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i16.i16( @@ -1768,9 +1784,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i16.i16( @@ -1801,9 +1819,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -1834,9 +1854,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv16i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -1867,9 +1889,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i32.i32( @@ -1900,9 +1924,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i32.i32( @@ -1933,9 +1959,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -1966,9 +1994,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i32_i32( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i32.i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll index 83dc531750ee..a145dd684bac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll @@ -1801,9 +1801,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf8,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -15, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i8.i8( @@ -1834,9 +1836,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -13, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i8.i8( @@ -1867,9 +1871,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -11, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i8.i8( @@ -1900,9 +1906,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -9, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -9, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i8.i8( @@ -1933,9 +1941,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv16i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, -7, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -1966,9 +1976,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv32i8_i8( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e8,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, -5, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -1999,9 +2011,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, -3, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i16.i16( @@ -2032,9 +2046,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu -; CHECK-NEXT: vmsne.vv v9, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsne.vv v25, v8, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i16.i16( @@ -2065,9 +2081,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i16.i16( @@ -2098,9 +2116,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 2, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -2131,9 +2151,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv16i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -2164,9 +2186,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 6, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i32.i32( @@ -2197,9 +2221,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 8, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i32.i32( @@ -2230,9 +2256,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, 10, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -2263,9 +2291,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i32_i32( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, 12, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv8i32.i32( @@ -2296,9 +2326,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu -; CHECK-NEXT: vmsleu.vi v9, v8, 14, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmsleu.vi v25, v8, 14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv1i64.i64( @@ -2329,9 +2361,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu -; CHECK-NEXT: vmsleu.vi v10, v8, -16, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v25, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv2i64.i64( @@ -2362,9 +2396,11 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i64_i64( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu -; CHECK-NEXT: vmsleu.vi v12, v8, -14, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v25, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vmsltu.mask.nxv4i64.i64( From e0fe1c58acfa0bde36afde8354cb31fc1e0b75e2 Mon Sep 17 00:00:00 2001 From: Luke Drummond Date: Wed, 10 Mar 2021 18:14:42 +0000 Subject: [PATCH 223/318] [OpenCL] Respect calling convention for builtin `__translate_sampler_initializer` has a calling convention of `spir_func`, but clang generated calls to it using the default CC. Instruction Combining was lowering these mismatching calling conventions to `store i1* undef` which itself was subsequently lowered to a trap instruction by simplifyCFG resulting in runtime `SIGILL` There are arguably two bugs here: but whether there's any wisdom in converting an obviously invalid call into a runtime crash over aborting with a sensible error message will require further discussion. So for now it's enough to set the right calling convention on the runtime helper. Reviewed By: svenh, bader Differential Revision: https://reviews.llvm.org/D98411 (cherry picked from commit fcfd3fda71905d7c48f75a531c2265ad3b9876ea) --- clang/lib/CodeGen/CodeGenModule.cpp | 12 +++++++----- clang/test/CodeGenOpenCL/sampler.cl | 12 ++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 31afbc6b4262..9c9bd4e374af 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6215,15 +6215,17 @@ llvm::SanitizerStatReport &CodeGenModule::getSanStats() { return *SanStats; } + llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); - auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); - auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); - return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy, - "__translate_sampler_initializer"), - {C}); + auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); + auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); + auto *Call = CGF.Builder.CreateCall( + CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C}); + Call->setCallingConv(Call->getCalledFunction()->getCallingConv()); + return Call; } CharUnits CodeGenModule::getNaturalPointeeTypeAlignment( diff --git a/clang/test/CodeGenOpenCL/sampler.cl b/clang/test/CodeGenOpenCL/sampler.cl index e6bda49f51c8..5ad8d0dbbf37 100644 --- a/clang/test/CodeGenOpenCL/sampler.cl +++ b/clang/test/CodeGenOpenCL/sampler.cl @@ -39,7 +39,7 @@ kernel void foo(sampler_t smp_par) { // Case 2b sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_NEAREST; // CHECK: [[smp_ptr:%[A-Za-z0-9_\.]+]] = alloca %opencl.sampler_t addrspace(2)* - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19) // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMP]], %opencl.sampler_t addrspace(2)** [[smp_ptr]] // Case 1b @@ -56,12 +56,12 @@ kernel void foo(sampler_t smp_par) { // Case 1a/2a fnc4smp(glb_smp); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1a/2c fnc4smp(glb_smp_const); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1c @@ -70,12 +70,12 @@ kernel void foo(sampler_t smp_par) { // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) fnc4smp(5); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; fnc4smp(const_smp); - // CHECK: [[CONST_SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[CONST_SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]] fnc4smp(const_smp); // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]] @@ -83,7 +83,7 @@ kernel void foo(sampler_t smp_par) { constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; fnc4smp(constant_smp); - // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) + // CHECK: [[SAMP:%[0-9]+]] = call spir_func %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // TODO: enable sampler initialization with non-constant integer. From a5a6cfe2f030e81e689ed9af4e95ddf95c4d8675 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 4 Mar 2021 12:58:22 -0800 Subject: [PATCH 224/318] BPF: permit type modifiers for __builtin_btf_type_id() relocation Lorenz Bauer from Cloudflare tried to use "const struct " as the type for __builtin_btf_type_id(*(const struct )0, 1) relocation and hit a llvm BPF fatal error. https://lore.kernel.org/bpf/a3782f71-3f6b-1e75-17a9-1827822c2030@fb.com/ ... fatal error: error in backend: Empty type name for BTF_TYPE_ID_REMOTE reloc Currently, we require the debuginfo type itself must have a name. In this case, the debuginfo type is "const" which points to "struct ". The "const" type does not have a name, hence the above fatal error will be triggered. Let us permit "const" and "volatile" type modifiers. We skip modifiers in some other cases as well like structure member type tracing. This can aviod the above fatal error. Differential Revision: https://reviews.llvm.org/D97986 (cherry picked from commit 9c0274cdeae904089806be6faee72b9126d2cf5b) --- llvm/lib/Target/BPF/BPFPreserveDIType.cpp | 9 +++ .../CodeGen/BPF/BTF/builtin-btf-type-id-2.ll | 73 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index 18a4f60c171a..0348e2200acb 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -85,8 +85,17 @@ static bool BPFPreserveDITypeImpl(Function &F) { } else { Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE; DIType *Ty = cast(MD); + while (auto *DTy = dyn_cast(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type) + break; + Ty = DTy->getBaseType(); + } + if (Ty->getName().empty()) report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc"); + MD = Ty; } BasicBlock *BB = Call->getParent(); diff --git a/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll b/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll new file mode 100644 index 000000000000..63c56c4dfec5 --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id-2.ll @@ -0,0 +1,73 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux -S -o %t1 %s +; RUN: llc -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK %s +; Source code: +; struct s { +; int a; +; }; +; int test(void) { +; return __builtin_btf_type_id(*(const struct s *)0, 1); +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm -Xclang -disable-llvm-passes test.c + +; Function Attrs: nounwind +define dso_local i32 @test() #0 !dbg !7 { +entry: + %0 = call i64 @llvm.bpf.btf.type.id(i32 0, i64 1), !dbg !11, !llvm.preserve.access.index !12 + %conv = trunc i64 %0 to i32, !dbg !11 + ret i32 %conv, !dbg !16 +} + +; CHECK: .long 1 # BTF_KIND_INT(id = 2) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 16777248 # 0x1000020 + +; CHECK: .long 16 # BTF_KIND_STRUCT(id = 4) +; CHECK-NEXT: .long 67108865 # 0x4000001 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 18 +; CHECK-NEXT: .long 2 + +; CHECK: .ascii "int" # string offset=1 +; CHECK: .ascii ".text" # string offset=10 +; CHECK: .byte 115 # string offset=16 +; CHECK: .byte 97 # string offset=18 +; CHECK: .byte 48 # string offset=20 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 10 # Field reloc section string offset=10 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 20 +; CHECK-NEXT: .long 7 + +; Function Attrs: nounwind readnone +declare i64 @llvm.bpf.btf.type.id(i32, i64) #1 + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9783e2098800b954c55ae598a1ce5c4b93444fc0)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/bpf/test") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9783e2098800b954c55ae598a1ce5c4b93444fc0)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{!10} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DILocation(line: 5, column: 10, scope: !7) +!12 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !13) +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 1, size: 32, elements: !14) +!14 = !{!15} +!15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !1, line: 2, baseType: !10, size: 32) +!16 = !DILocation(line: 5, column: 3, scope: !7) From 6564e0cf7e61518cb15443fca42bc2206a6123e2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 2 Mar 2021 09:35:21 -0800 Subject: [PATCH 225/318] BPF: Fix a bug in peephole TRUNC elimination optimization Andrei Matei reported a llvm11 core dump for his bpf program https://bugs.llvm.org/show_bug.cgi?id=48578 The core dump happens in LiveVariables analysis phase. #4 0x00007fce54356bb0 __restore_rt #5 0x00007fce4d51785e llvm::LiveVariables::HandleVirtRegUse(unsigned int, llvm::MachineBasicBlock*, llvm::MachineInstr&) #6 0x00007fce4d519abe llvm::LiveVariables::runOnInstr(llvm::MachineInstr&, llvm::SmallVectorImpl&) #7 0x00007fce4d519ec6 llvm::LiveVariables::runOnBlock(llvm::MachineBasicBlock*, unsigned int) #8 0x00007fce4d51a4bf llvm::LiveVariables::runOnMachineFunction(llvm::MachineFunction&) The bug can be reproduced with llvm12 and latest trunk as well. Futher analysis shows that there is a bug in BPF peephole TRUNC elimination optimization, which tries to remove unnecessary TRUNC operations (a <<= 32; a >>= 32). Specifically, the compiler did wrong transformation for the following patterns: %1 = LDW ... %2 = SLL_ri %1, 32 %3 = SRL_ri %2, 32 ... %3 ... %4 = SRA_ri %2, 32 ... %4 ... The current transformation did not check how many uses of %2 and did transformation like %1 = LDW ... ... %1 ... %4 = SRL_ri %2, 32 ... %4 ... and pseudo register %2 is used by not defined and caused LiveVariables analysis core dump. To fix the issue, when traversing back from SRL_ri to SLL_ri, check to ensure SLL_ri has only one use. Otherwise, don't do transformation. Differential Revision: https://reviews.llvm.org/D97792 (cherry picked from commit 51cdb780db3b9b46c783efcec672c4da272e9992) --- llvm/lib/Target/BPF/BPFMIPeephole.cpp | 3 ++ llvm/test/CodeGen/BPF/remove_truncate_8.ll | 41 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 llvm/test/CodeGen/BPF/remove_truncate_8.ll diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index df870314fffe..354980e4bf3c 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -475,6 +475,9 @@ bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { if (MI.getOpcode() == BPF::SRL_ri && MI.getOperand(2).getImm() == 32) { SrcReg = MI.getOperand(1).getReg(); + if (!MRI->hasOneNonDBGUse(SrcReg)) + continue; + MI2 = MRI->getVRegDef(SrcReg); DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/BPF/remove_truncate_8.ll b/llvm/test/CodeGen/BPF/remove_truncate_8.ll new file mode 100644 index 000000000000..fb1eabb0f0fd --- /dev/null +++ b/llvm/test/CodeGen/BPF/remove_truncate_8.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=bpf -verify-machineinstrs | FileCheck %s +; Source Code: +; struct loc_prog { +; unsigned int ip; +; int len; +; }; +; int exec_prog(struct loc_prog *prog) { +; if (prog->ip < prog->len) { +; int x = prog->ip; +; if (x < 3) +; prog->ip += 2; +; } +; return 3; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm t.c + +%struct.loc_prog = type { i32, i32 } + +; Function Attrs: nofree norecurse nounwind willreturn +define dso_local i32 @exec_prog(%struct.loc_prog* nocapture %prog) local_unnamed_addr { +entry: + %ip = getelementptr inbounds %struct.loc_prog, %struct.loc_prog* %prog, i64 0, i32 0 + %0 = load i32, i32* %ip, align 4 + %len = getelementptr inbounds %struct.loc_prog, %struct.loc_prog* %prog, i64 0, i32 1 + %1 = load i32, i32* %len, align 4 + %cmp = icmp ult i32 %0, %1 + %cmp2 = icmp slt i32 %0, 3 + %or.cond = and i1 %cmp2, %cmp +; CHECK: r{{[0-9]+}} <<= 32 +; CHECK: r{{[0-9]+}} s>>= 32 + br i1 %or.cond, label %if.then3, label %if.end5 + +if.then3: ; preds = %entry + %add = add nsw i32 %0, 2 + store i32 %add, i32* %ip, align 4 + br label %if.end5 + +if.end5: ; preds = %if.then3, %entry + ret i32 3 +} From e294ece42d85191875782ed05cb607451f493944 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 28 Feb 2021 22:46:39 -0800 Subject: [PATCH 226/318] BPF: fix enum value 0 issue for __builtin_preserve_enum_value() Lorenz Bauer reported that the following code will have compilation error for bpf target: enum e { TWO }; bpf_core_enum_value_exists(enum e, TWO); The clang emitted the following error message: __builtin_preserve_enum_value argument 1 invalid In SemaChecking, an expression like "*(enum NAME)1" will have cast kind CK_IntegralToPointer, but "*(enum NAME)0" will have cast kind CK_NullToPointer. Current implementation only permits CK_IntegralToPointer, missing enum value 0 case. This patch permits CK_NullToPointer cast kind and the above test case can pass now. Differential Revision: https://reviews.llvm.org/D97659 (cherry picked from commit 283db5f0837d55f91242812003adf6e189ba743e) --- clang/lib/Sema/SemaChecking.cpp | 5 ++++- .../CodeGen/builtins-bpf-preserve-field-info-4.c | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2d3d36f4adad..2b55712d44c2 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2623,7 +2623,10 @@ static bool isValidBPFPreserveEnumValueArg(Expr *Arg) { return false; const auto *CE = dyn_cast(UO->getSubExpr()); - if (!CE || CE->getCastKind() != CK_IntegralToPointer) + if (!CE) + return false; + if (CE->getCastKind() != CK_IntegralToPointer && + CE->getCastKind() != CK_NullToPointer) return false; // The integer must be from an EnumConstantDecl. diff --git a/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c b/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c index e07c680bb370..b167b776e385 100644 --- a/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c +++ b/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c @@ -4,10 +4,11 @@ #define _(x, y) (__builtin_preserve_enum_value((x), (y))) enum AA { + VAL0 = 0, VAL1 = 2, VAL2 = 0xffffffff80000000UL, }; -typedef enum { VAL10 = -2, VAL11 = 0xffff8000, } __BB; +typedef enum { VAL00, VAL10 = -2, VAL11 = 0xffff8000, } __BB; unsigned unit1() { return _(*(enum AA *)VAL1, 0) + _(*(__BB *)VAL10, 1); @@ -17,10 +18,16 @@ unsigned unit2() { return _(*(enum AA *)VAL2, 0) + _(*(__BB *)VAL11, 1); } +unsigned unit3() { + return _(*(enum AA *)VAL0, 0) + _(*(__BB *)VAL00, 1); +} + // CHECK: @0 = private unnamed_addr constant [7 x i8] c"VAL1:2\00", align 1 // CHECK: @1 = private unnamed_addr constant [9 x i8] c"VAL10:-2\00", align 1 // CHECK: @2 = private unnamed_addr constant [17 x i8] c"VAL2:-2147483648\00", align 1 // CHECK: @3 = private unnamed_addr constant [17 x i8] c"VAL11:4294934528\00", align 1 +// CHECK: @4 = private unnamed_addr constant [7 x i8] c"VAL0:0\00", align 1 +// CHECK: @5 = private unnamed_addr constant [8 x i8] c"VAL00:0\00", align 1 // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 0, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @0, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA:[0-9]+]] // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 1, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @1, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM:[0-9]+]] @@ -28,5 +35,8 @@ unsigned unit2() { // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 2, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @2, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA]] // CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 3, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @3, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM]] +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 4, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @4, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA]] +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 5, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @5, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM]] + // CHECK: ![[ENUM_AA]] = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA" // CHECK: ![[TYPEDEF_ENUM]] = !DIDerivedType(tag: DW_TAG_typedef, name: "__BB" From 6baa5ce2e4b70424981ba5632d10681d41f57cfc Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Wed, 5 May 2021 00:55:36 -0400 Subject: [PATCH 227/318] Fix typo, arvm7 -> armv7 (cherry picked from commit 3a62d4fde88544125ce9ceff990db108ee91148a) --- lldb/docs/man/lldb.rst | 4 ++-- lldb/tools/driver/Driver.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/docs/man/lldb.rst b/lldb/docs/man/lldb.rst index 6dca15fa35dc..b75288db380d 100644 --- a/lldb/docs/man/lldb.rst +++ b/lldb/docs/man/lldb.rst @@ -256,11 +256,11 @@ executable. To disambiguate between arguments passed to lldb and arguments passed to the debugged executable, arguments starting with a - must be passed after --. - lldb --arch x86_64 /path/to/program program argument -- --arch arvm7 + lldb --arch x86_64 /path/to/program program argument -- --arch armv7 For convenience, passing the executable after -- is also supported. - lldb --arch x86_64 -- /path/to/program program argument --arch arvm7 + lldb --arch x86_64 -- /path/to/program program argument --arch armv7 Passing one of the attach options causes :program:`lldb` to immediately attach to the given process. diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index e4a60127b65e..210a712f9741 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -751,11 +751,11 @@ static void printHelp(LLDBOptTable &table, llvm::StringRef tool_name) { arguments passed to the debugged executable, arguments starting with a - must be passed after --. - lldb --arch x86_64 /path/to/program program argument -- --arch arvm7 + lldb --arch x86_64 /path/to/program program argument -- --arch armv7 For convenience, passing the executable after -- is also supported. - lldb --arch x86_64 -- /path/to/program program argument --arch arvm7 + lldb --arch x86_64 -- /path/to/program program argument --arch armv7 Passing one of the attach options causes lldb to immediately attach to the given process. From 471a386a3d348e933d200e1cc01413aa655d508e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 17 Mar 2021 10:48:20 +0000 Subject: [PATCH 228/318] StackProtector: ensure protection does not interfere with tail call frame. The IR stack protector pass must insert stack checks before the call instead of between it and the return. Similarly, SDAG one should recognize that ADJCALLFRAME instructions could be part of the terminal sequence of a tail call. In this case because such call frames cannot be nested in LLVM the stack protection code must skip over the whole sequence (or risk clobbering argument registers). (cherry picked from commit 5e3d9fcc3a8802cea5b850a3ca40c515d916bf82) --- .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 33 ++++++++-- llvm/lib/CodeGen/StackProtector.cpp | 24 +++++-- .../AArch64/stack-protector-musttail.ll | 66 +++++++++++++++++++ .../ARM/Windows/stack-protector-musttail.ll | 56 ++++++++++++++++ llvm/test/CodeGen/X86/tailcc-ssp.ll | 26 ++++++++ 5 files changed, 197 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/stack-protector-musttail.ll create mode 100644 llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll create mode 100644 llvm/test/CodeGen/X86/tailcc-ssp.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7bae5048fc0e..d17dd1c5eccb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1691,9 +1691,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { /// terminator, but additionally the copies that move the vregs into the /// physical registers. static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB) { +FindSplitPointForStackProtector(MachineBasicBlock *BB, + const TargetInstrInfo &TII) { MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); - // if (SplitPoint == BB->begin()) return SplitPoint; @@ -1701,6 +1701,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) { MachineBasicBlock::iterator Previous = SplitPoint; --Previous; + if (TII.isTailCall(*SplitPoint) && + Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { + // call itself, then we must insert before the sequence even starts. For + // example: + // + // ADJCALLSTACKDOWN ... + // + // ADJCALLSTACKUP ... + // TAILJMP somewhere + // On the other hand, it could be an unrelated call in which case this tail call + // has to register moves of its own and should be the split point. For example: + // ADJCALLSTACKDOWN + // CALL something_else + // ADJCALLSTACKUP + // + // TAILJMP somewhere + do { + --Previous; + if (Previous->isCall()) + return SplitPoint; + } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); + + return Previous; + } + while (MIIsInTerminatorSequence(*Previous)) { SplitPoint = Previous; if (Previous == Start) @@ -1740,7 +1765,7 @@ SelectionDAGISel::FinishBasicBlock() { // Add load and check to the basicblock. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); @@ -1759,7 +1784,7 @@ SelectionDAGISel::FinishBasicBlock() { // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 8d91afb6e99d..10c6dcbdb049 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() { // instrumentation has already been generated. HasIRCheck = true; + // If we're instrumenting a block with a musttail call, the check has to be + // inserted before the call rather than between it and the return. The + // verifier guarantees that a musttail call is either directly before the + // return or with a single correct bitcast of the return value in between so + // we don't need to worry about many situations here. + Instruction *CheckLoc = RI; + Instruction *Prev = RI->getPrevNonDebugInstruction(); + if (Prev && isa(Prev) && cast(Prev)->isMustTailCall()) + CheckLoc = Prev; + else if (Prev) { + Prev = Prev->getPrevNonDebugInstruction(); + if (Prev && isa(Prev) && cast(Prev)->isMustTailCall()) + CheckLoc = Prev; + } + // Generate epilogue instrumentation. The epilogue intrumentation can be // function-based or inlined depending on which mechanism the target is // providing. if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) { // Generate the function-based epilogue instrumentation. // The target provides a guard check function, generate a call to it. - IRBuilder<> B(RI); + IRBuilder<> B(CheckLoc); LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard"); CallInst *Call = B.CreateCall(GuardCheck, {Guard}); Call->setAttributes(GuardCheck->getAttributes()); Call->setCallingConv(GuardCheck->getCallingConv()); } else { // Generate the epilogue with inline instrumentation. - // If we do not support SelectionDAG based tail calls, generate IR level - // tail calls. + // If we do not support SelectionDAG based calls, generate IR level + // calls. // // For each block with a return instruction, convert this: // @@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); + BasicBlock *NewBB = + BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { diff --git a/llvm/test/CodeGen/AArch64/stack-protector-musttail.ll b/llvm/test/CodeGen/AArch64/stack-protector-musttail.ll new file mode 100644 index 000000000000..8a2e095e6a64 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/stack-protector-musttail.ll @@ -0,0 +1,66 @@ +; RUN: llc -mtriple=arm64-apple-macosx -fast-isel %s -o - -start-before=stack-protector -stop-after=stack-protector | FileCheck %s + +@var = global [2 x i64]* null + +declare void @callee() + +define void @caller1() ssp { +; CHECK-LABEL: define void @caller1() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: [[GUARD:%.*]] = call i8* @llvm.stackguard() +; CHECK: [[TOKEN:%.*]] = load volatile i8*, i8** {{%.*}} +; CHECK: [[TST:%.*]] = icmp eq i8* [[GUARD]], [[TOKEN]] +; CHECK: br i1 [[TST]] + +; CHECK: musttail call void @callee() +; CHECK-NEXT: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + musttail call void @callee() + ret void +} + +define void @justret() ssp { +; CHECK-LABEL: define void @justret() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: [[GUARD:%.*]] = call i8* @llvm.stackguard() +; CHECK: [[TOKEN:%.*]] = load volatile i8*, i8** {{%.*}} +; CHECK: [[TST:%.*]] = icmp eq i8* [[GUARD]], [[TOKEN]] +; CHECK: br i1 [[TST]] + +; CHECK: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + br label %retblock + +retblock: + ret void +} + + +declare i64* @callee2() + +define i8* @caller2() ssp { +; CHECK-LABEL: define i8* @caller2() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: [[GUARD:%.*]] = call i8* @llvm.stackguard() +; CHECK: [[TOKEN:%.*]] = load volatile i8*, i8** {{%.*}} +; CHECK: [[TST:%.*]] = icmp eq i8* [[GUARD]], [[TOKEN]] +; CHECK: br i1 [[TST]] + +; CHECK: [[TMP:%.*]] = musttail call i64* @callee2() +; CHECK-NEXT: [[RES:%.*]] = bitcast i64* [[TMP]] to i8* +; CHECK-NEXT: ret i8* [[RES]] + + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + %tmp = musttail call i64* @callee2() + %res = bitcast i64* %tmp to i8* + ret i8* %res +} diff --git a/llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll b/llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll new file mode 100644 index 000000000000..93b601c9369f --- /dev/null +++ b/llvm/test/CodeGen/ARM/Windows/stack-protector-musttail.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=thumbv7-windows-msvc -fast-isel %s -o - -start-before=stack-protector -stop-after=stack-protector | FileCheck %s + +@var = global [2 x i64]* null + +declare void @callee() + +define void @caller1() sspreq { +; CHECK-LABEL: define void @caller1() +; Prologue: + +; CHECK: call void @__security_check_cookie + +; CHECK: musttail call void @callee() +; CHECK-NEXT: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + musttail call void @callee() + ret void +} + +define void @justret() sspreq { +; CHECK-LABEL: define void @justret() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: call void @__security_check_cookie + +; CHECK: ret void + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + br label %retblock + +retblock: + ret void +} + + +declare i64* @callee2() + +define i8* @caller2() sspreq { +; CHECK-LABEL: define i8* @caller2() +; Prologue: +; CHECK: @llvm.stackguard + +; CHECK: call void @__security_check_cookie + +; CHECK: [[TMP:%.*]] = musttail call i64* @callee2() +; CHECK-NEXT: [[RES:%.*]] = bitcast i64* [[TMP]] to i8* +; CHECK-NEXT: ret i8* [[RES]] + + %var = alloca [2 x i64] + store [2 x i64]* %var, [2 x i64]** @var + %tmp = musttail call i64* @callee2() + %res = bitcast i64* %tmp to i8* + ret i8* %res +} diff --git a/llvm/test/CodeGen/X86/tailcc-ssp.ll b/llvm/test/CodeGen/X86/tailcc-ssp.ll new file mode 100644 index 000000000000..b85be6a5e790 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcc-ssp.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=x86_64-windows-msvc %s -o - -verify-machineinstrs | FileCheck %s + +declare void @h(i8*, i64, i8*) + +define tailcc void @tailcall_frame(i8* %0, i64 %1) sspreq { +; CHECK-LABEL: tailcall_frame: +; CHECK: callq __security_check_cookie +; CHECK: xorl %ecx, %ecx +; CHECK: jmp h + + tail call tailcc void @h(i8* null, i64 0, i8* null) + ret void +} + +declare void @bar() +define void @tailcall_unrelated_frame() sspreq { +; CHECK-LABEL: tailcall_unrelated_frame: +; CHECK: subq [[STACK:\$.*]], %rsp +; CHECK: callq bar +; CHECK: callq __security_check_cookie +; CHECK: addq [[STACK]], %rsp +; CHECK: jmp bar + call void @bar() + tail call void @bar() + ret void +} From ac593de16cc5282630ce44dd8378ae5b7b91644c Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Thu, 8 Apr 2021 12:33:25 +0900 Subject: [PATCH 229/318] [LoopReroll] Fix rerolling loop with extra instructions Fixes PR47627 This fix suppresses rerolling a loop which has an unrerollable instruction. Sample IR for the explanation below: ``` define void @foo([2 x i32]* nocapture %a) { entry: br label %loop loop: ; base instruction %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] ; unrerollable instructions %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvar, i64 0 store i32 999, i32* %stptrx, align 4 ; extra simple arithmetic operations, used by root instructions %plus20 = add nuw nsw i64 %indvar, 20 %plus10 = add nuw nsw i64 %indvar, 10 ; root instruction 0 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 %value0 = load i32, i32* %ldptr0, align 4 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 store i32 %value0, i32* %stptr0, align 4 ; root instruction 1 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 %value1 = load i32, i32* %ldptr1, align 4 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 store i32 %value1, i32* %stptr1, align 4 ; loop-increment and latch %indvar.next = add nuw nsw i64 %indvar, 1 %exitcond = icmp eq i64 %indvar.next, 5 br i1 %exitcond, label %exit, label %loop exit: ret void } ``` In the loop rerolling pass, `%indvar` and `%indvar.next` are appended to the `LoopIncs` vector in the `LoopReroll::DAGRootTracker::findRoots` function. Before this fix, two instructions with `unrerollable instructions` comment above are marked as `IL_All` at the end of the `LoopReroll::DAGRootTracker::collectUsedInstructions` function, as well as instructions with `extra simple arithmetic operations` comment and `loop-increment and latch` comment. It is incorrect because `IL_All` means that the instruction should be executed in all iterations of the rerolled loop but the `store` instruction should not. This fix rejects instructions which may have side effects and don't belong to def-use chains of any root instructions and reductions. See https://bugs.llvm.org/show_bug.cgi?id=47627 for more information. (cherry picked from commit d9a9c992d190dd6645ea911b66cf0cadba0dadc3) --- llvm/lib/Transforms/Scalar/LoopRerollPass.cpp | 6 + .../test/Transforms/LoopReroll/extra_instr.ll | 268 ++++++++++++++++++ 2 files changed, 274 insertions(+) create mode 100644 llvm/test/Transforms/LoopReroll/extra_instr.ll diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp index b3bae47e96de..65a6205f0302 100644 --- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1081,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po DenseSet V; collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V); for (auto *I : V) { + if (I->mayHaveSideEffects()) { + LLVM_DEBUG(dbgs() << "LRR: Aborting - " + << "An instruction which does not belong to any root " + << "sets must not have side effects: " << *I); + return false; + } Uses[I].set(IL_All); } diff --git a/llvm/test/Transforms/LoopReroll/extra_instr.ll b/llvm/test/Transforms/LoopReroll/extra_instr.ll new file mode 100644 index 000000000000..aae29079ade7 --- /dev/null +++ b/llvm/test/Transforms/LoopReroll/extra_instr.ll @@ -0,0 +1,268 @@ +; RUN: opt -S -loop-reroll %s | FileCheck %s +target triple = "aarch64--linux-gnu" + +define void @rerollable1([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv +; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4 +; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4 + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; NO unrerollable instructions + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment + %iv.next = add nuw nsw i64 %iv, 1 + + ; latch + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @unrerollable1([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 +; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; unrerollable instructions using %iv + %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 + store i32 999, i32* %stptrx, align 4 + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment + %iv.next = add nuw nsw i64 %iv, 1 + + ; latch + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @unrerollable2([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1 +; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 +; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; loop-increment + %iv.next = add nuw nsw i64 %iv, 1 + + ; unrerollable instructions using %iv.next + %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0 + store i32 999, i32* %stptrx, align 4 + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; latch + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define dso_local void @rerollable2() { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} +; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}} + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; NO unrerollable instructions + + ; root set 1 + + ; base instruction + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root instruction 0 + %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 + %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 + tail call void @bar(i32 %iv.scaled.add1.div5) + ; root instruction 2 + %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 + %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 + tail call void @bar(i32 %iv.scaled.add2.div5) + + ; root set 2 + + ; base instruction + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root instruction 0 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root instruction 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; loop-increment + %iv.next = add nuw nsw i32 %iv, 1 + + ; latch + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +define dso_local void @unrerollable3() { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 +; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 +; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 +; CHECK-NEXT: tail call void @bar(i32 %iv.mul7) + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; unrerollable instructions using %iv + %iv.mul7 = mul nuw nsw i32 %iv, 7 + tail call void @bar(i32 %iv.mul7) + + ; root set 1 + + ; base instruction + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root instruction 0 + %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 + %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 + tail call void @bar(i32 %iv.scaled.add1.div5) + ; root instruction 2 + %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 + %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 + tail call void @bar(i32 %iv.scaled.add2.div5) + + ; root set 2 + + ; base instruction + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root instruction 0 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root instruction 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; loop-increment + %iv.next = add nuw nsw i32 %iv, 1 + + ; latch + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +declare dso_local void @bar(i32) From 225b775620c69fa87c5fd63b29ef8f08c9922fb4 Mon Sep 17 00:00:00 2001 From: Ahsan Saghir Date: Fri, 23 Apr 2021 08:26:45 -0500 Subject: [PATCH 230/318] [PowerPC] Prevent argument promotion of types with size greater than 128 bits This patch prevents argument promotion of types having type size greater than 128 bits. Fixes Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=49952 Reviewed By: #powerpc, nemanjai Differential Revision: https://reviews.llvm.org/D101188 (cherry picked from commit 670736a904746e92dde141266b6d4881b56d51a2) --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 21 ++++ .../Target/PowerPC/PPCTargetTransformInfo.h | 3 + llvm/test/CodeGen/PowerPC/arg_promotion.ll | 108 ++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/arg_promotion.ll diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b3d8100fe016..c90ff8b7d59d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1212,6 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } +bool PPCTTIImpl::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const { + + // We need to ensure that argument promotion does not + // attempt to promote pointers to MMA types (__vector_pair + // and __vector_quad) since these types explicitly cannot be + // passed as arguments. Both of these types are larger than + // the 128-bit Altivec vectors and have a scalar size of 1 bit. + if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + return false; + + return llvm::none_of(Args, [](Argument *A) { + auto *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) + return (EltTy->isIntOrIntVectorTy(1) && + EltTy->getPrimitiveSizeInBits() > 128); + return false; + }); +} + bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index bc946715156f..c38ae90bc7dc 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -129,6 +129,9 @@ class PPCTTIImpl : public BasicTTIImplBase { unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl &Args) const; /// @} }; diff --git a/llvm/test/CodeGen/PowerPC/arg_promotion.ll b/llvm/test/CodeGen/PowerPC/arg_promotion.ll new file mode 100644 index 000000000000..e52d2e47201f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/arg_promotion.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -argpromotion -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; Test to check that we do not promote arguments when the +; type size is greater than 128 bits. + +define internal fastcc void @print_acc(<512 x i1>* nocapture readonly %a) nounwind { +; CHECK-LABEL: @print_acc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, <512 x i1>* [[A:%.*]], align 64 +; CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 0 +; CHECK-NEXT: ret void +; +entry: + %0 = load <512 x i1>, <512 x i1>* %a, align 64 + %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) + %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 + ret void +} + +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) nounwind + +define dso_local void @test(<512 x i1>* nocapture %a, <16 x i8> %ac) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[AC:%.*]], <16 x i8> [[AC]]) +; CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[A:%.*]], align 64 +; CHECK-NEXT: tail call fastcc void @print_acc(<512 x i1>* nonnull [[A]]) +; CHECK-NEXT: ret void +; +entry: + %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %ac, <16 x i8> %ac) + store <512 x i1> %0, <512 x i1>* %a, align 64 + tail call fastcc void @print_acc(<512 x i1>* nonnull %a) + ret void +} + +declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>) nounwind + +@.str = private unnamed_addr constant [11 x i8] c"Vector: { \00", align 1 +@.str.1 = private unnamed_addr constant [5 x i8] c"%d, \00", align 1 +@.str.2 = private unnamed_addr constant [6 x i8] c"%d }\0A\00", align 1 + +define internal fastcc void @printWideVec(<16 x i32> %ptr.val) nounwind { +; CHECK-LABEL: @printWideVec( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <16 x i32> [[PTR_VAL:%.*]], i32 0 +; CHECK-NEXT: [[CALL1:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT]]) +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 1 +; CHECK-NEXT: [[CALL1_1:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_1]]) +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 2 +; CHECK-NEXT: [[CALL1_2:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_2]]) +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 3 +; CHECK-NEXT: [[CALL1_3:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_3]]) +; CHECK-NEXT: [[VECEXT_4:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 4 +; CHECK-NEXT: [[CALL1_4:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_4]]) +; CHECK-NEXT: [[VECEXT_5:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 5 +; CHECK-NEXT: [[CALL1_5:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_5]]) +; CHECK-NEXT: [[VECEXT_6:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 6 +; CHECK-NEXT: [[CALL1_6:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext [[VECEXT_6]]) +; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <16 x i32> [[PTR_VAL]], i32 7 +; CHECK-NEXT: [[CALL3:%.*]] = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @.str.2, i64 0, i64 0), i32 signext [[VECEXT2]]) +; CHECK-NEXT: ret void +; +entry: + %call = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) + %vecext = extractelement <16 x i32> %ptr.val, i32 0 + %call1 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext) + %vecext.1 = extractelement <16 x i32> %ptr.val, i32 1 + %call1.1 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.1) + %vecext.2 = extractelement <16 x i32> %ptr.val, i32 2 + %call1.2 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.2) + %vecext.3 = extractelement <16 x i32> %ptr.val, i32 3 + %call1.3 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.3) + %vecext.4 = extractelement <16 x i32> %ptr.val, i32 4 + %call1.4 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.4) + %vecext.5 = extractelement <16 x i32> %ptr.val, i32 5 + %call1.5 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.5) + %vecext.6 = extractelement <16 x i32> %ptr.val, i32 6 + %call1.6 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i64 0, i64 0), i32 signext %vecext.6) + %vecext2 = extractelement <16 x i32> %ptr.val, i32 7 + %call3 = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @.str.2, i64 0, i64 0), i32 signext %vecext2) + ret void +} + +declare noundef signext i32 @printf(i8* nocapture noundef readonly, ...) nounwind + +define dso_local void @test1(<4 x i32> %a, <4 x i32> %b) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> , <16 x i32> +; CHECK-NEXT: [[VECINIT22:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: tail call fastcc void @printWideVec(<16 x i32> [[VECINIT22]]) +; CHECK-NEXT: ret void +; +entry: + %0 = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> + %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <16 x i32> + %2 = shufflevector <16 x i32> %0, <16 x i32> , <16 x i32> + %vecinit22 = shufflevector <16 x i32> %2, <16 x i32> %1, <16 x i32> + tail call fastcc void @printWideVec(<16 x i32> %vecinit22) + ret void +} From c89d50033228953d29e835ea5cb8e7066c0d8583 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 5 Apr 2021 12:03:50 -0400 Subject: [PATCH 231/318] [InstCombine] add test for miscompile from select value equivalence; NFC The new test is reduced from: https://llvm.org/PR49832 ...but we already show a potential miscompile in the existing test too. (cherry picked from commit c0b0da4684908b8e8143c0762fc766c1a2a5849f) --- .../Transforms/InstCombine/select-binop-cmp.ll | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index bbf7456ae811..beea2862a8ef 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -551,6 +551,8 @@ define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) { ret i32 %C } +; FIXME: Value equivalence substitution is all-or-nothing, so needs a scalar compare. + define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec_bad( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], @@ -564,6 +566,18 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) ret <2 x i8> %C } +; FIXME: Value equivalence substitution is all-or-nothing, so needs a scalar compare. + +define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x) { +; CHECK-LABEL: @vec_select_no_equivalence( +; CHECK-NEXT: ret <2 x i32> [[X:%.*]] +; + %x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> + %cond = icmp eq <2 x i32> %x, zeroinitializer + %s = select <2 x i1> %cond, <2 x i32> %x10, <2 x i32> %x + ret <2 x i32> %s +} + ; Folding this would only be legal if we sanitized undef to 0. define <2 x i8> @select_xor_icmp_vec_undef(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec_undef( From 4a12f51ad0090c3bcfea29c8dd021486ac3aa329 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 5 Apr 2021 12:14:49 -0400 Subject: [PATCH 232/318] [InstCombine] fix potential miscompile in select value equivalence As shown in the example based on: https://llvm.org/PR49832 ...and the existing test, we can't substitute a vector value because the equality compare replacement that we are attempting requires that the comparison is true for the entire value. Vector select can be partly true/false. (cherry picked from commit c590a9880d7a660a1c911fce07f3d01ea18be2df) --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 5 ++++- llvm/test/Transforms/InstCombine/select-binop-cmp.ll | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index f26c194d31b9..5f174aae09ec 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1095,7 +1095,10 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, /// TODO: Wrapping flags could be preserved in some cases with better analysis. Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp) { - if (!Cmp.isEquality()) + // Value equivalence substitution requires an all-or-nothing replacement. + // It does not make sense for a vector compare where each lane is chosen + // independently. + if (!Cmp.isEquality() || Cmp.getType()->isVectorTy()) return nullptr; // Canonicalize the pattern to ICMP_EQ by swapping the select operands. diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index beea2862a8ef..7c1cc21b4280 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -551,12 +551,12 @@ define i32 @select_xor_icmp_bad_6(i32 %x, i32 %y, i32 %z) { ret i32 %C } -; FIXME: Value equivalence substitution is all-or-nothing, so needs a scalar compare. +; Value equivalence substitution is all-or-nothing, so needs a scalar compare. define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @select_xor_icmp_vec_bad( ; CHECK-NEXT: [[A:%.*]] = icmp eq <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[Z:%.*]], +; CHECK-NEXT: [[B:%.*]] = xor <2 x i8> [[X]], [[Z:%.*]] ; CHECK-NEXT: [[C:%.*]] = select <2 x i1> [[A]], <2 x i8> [[B]], <2 x i8> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[C]] ; @@ -566,11 +566,14 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) ret <2 x i8> %C } -; FIXME: Value equivalence substitution is all-or-nothing, so needs a scalar compare. +; Value equivalence substitution is all-or-nothing, so needs a scalar compare. define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x) { ; CHECK-LABEL: @vec_select_no_equivalence( -; CHECK-NEXT: ret <2 x i32> [[X:%.*]] +; CHECK-NEXT: [[X10:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[X10]], <2 x i32> [[X]] +; CHECK-NEXT: ret <2 x i32> [[S]] ; %x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> %cond = icmp eq <2 x i32> %x, zeroinitializer From 266c82f94da232d736f413c8d9e08d066c2d7202 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 5 Apr 2021 16:11:39 -0400 Subject: [PATCH 233/318] [InstSimplify] add test for vector select with operand replacement; NFC We need a sibling fix to c590a9880d7a ( https://llvm.org/PR49832 ) to avoid miscompiling. (cherry picked from commit 78e5cf66fec52c8e6e665c3c9e64d38498d94a5d) --- llvm/test/Transforms/InstSimplify/select.ll | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 6460b42d63c1..68e7411c8310 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -969,6 +969,28 @@ define @ignore_scalable_undef( %cond) { ret %s } +define i32 @select_ctpop_zero(i32 %x) { +; CHECK-LABEL: @select_ctpop_zero( +; CHECK-NEXT: [[T1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[T1]] +; + %t0 = icmp eq i32 %x, 0 + %t1 = call i32 @llvm.ctpop.i32(i32 %x) + %sel = select i1 %t0, i32 0, i32 %t1 + ret i32 %sel +} +declare i32 @llvm.ctpop.i32(i32) + +define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @vec_select_no_equivalence( +; CHECK-NEXT: ret <2 x i32> zeroinitializer +; + %x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> + %cond = icmp eq <2 x i32> %x, zeroinitializer + %s = select <2 x i1> %cond, <2 x i32> %x10, <2 x i32> zeroinitializer + ret <2 x i32> %s +} + ; TODO: these can be optimized more define i32 @poison(i32 %x, i32 %y) { From 8e2ff387d30d540195ffef299785d392b0ee17dd Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 5 Apr 2021 16:47:29 -0400 Subject: [PATCH 234/318] [InstSimplify] fix potential miscompile in select value equivalence This is the sibling fix to c590a9880d7a - as there, we can't subsitute a vector value the equality compare replacement that we are trying requires that the comparison is true for the entire value. Vector select can be partly true/false. (cherry picked from commit e2a0f512eacad0699be9660f668726d7deb2cd75) --- llvm/lib/Analysis/InstructionSimplify.cpp | 8 +++++--- llvm/test/Transforms/InstSimplify/select.ll | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index c40e5c36cdc7..a12816885c40 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4127,10 +4127,12 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, TrueVal, FalseVal)) return V; - // If we have an equality comparison, then we know the value in one of the - // arms of the select. See if substituting this value into the arm and + // If we have a scalar equality comparison, then we know the value in one of + // the arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. - if (Pred == ICmpInst::ICMP_EQ) { + // Note that the equivalence/replacement opportunity does not hold for vectors + // because each element of a vector select is chosen independently. + if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) { if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, /* AllowRefinement */ false, MaxRecurse) == TrueVal || diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 68e7411c8310..4291fc0a839e 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -983,7 +983,10 @@ declare i32 @llvm.ctpop.i32(i32) define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @vec_select_no_equivalence( -; CHECK-NEXT: ret <2 x i32> zeroinitializer +; CHECK-NEXT: [[X10:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> +; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer +; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[X10]], <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x i32> [[S]] ; %x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> %cond = icmp eq <2 x i32> %x, zeroinitializer From 372e6fbc8778911d743a1d8bf371bff5f805abea Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 5 Apr 2021 22:22:18 +0000 Subject: [PATCH 235/318] workflows: Use -g1 when compiling libraries for ABI checks This should help reduce memory usage of the abi-dumper tool and avoid running out of memory and disk space. --- .github/workflows/libclang-abi-tests.yml | 2 +- .github/workflows/llvm-tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index ed54c4a1e54d..af7c8d7db119 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -104,7 +104,7 @@ jobs: - name: Configure run: | mkdir install - cmake -B build -S llvm -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm + cmake -B build -S llvm -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g1 -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm - name: Build run: ninja -C build/ ${{ needs.abi-dump-setup.outputs.ABI_LIBS }} install-clang-headers - name: Dump ABI diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 9017a014be02..077def0250fb 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -107,7 +107,7 @@ jobs: - name: Configure run: | mkdir install - cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g1 -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm - name: Build # Need to run install-LLVM twice to ensure the symlink is installed (this is a bug). run: | From 452500ebcde0443bc421eda407cedc888bfd8fca Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 6 Apr 2021 17:19:56 +0000 Subject: [PATCH 236/318] workflows: Use uncompressed abi dumps The compressed dumps are no longer readable, but I'm not sure why. --- .github/workflows/libclang-abi-tests.yml | 5 ++--- .github/workflows/llvm-tests.yml | 8 +++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index af7c8d7db119..09ea5cfbfc82 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -113,13 +113,12 @@ jobs: for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do # Remove symbol versioning from dumps, so we can compare across major versions. sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' $lib-${{ matrix.ref }}.abi - tar -czf $lib-${{ matrix.ref }}.abi.tar.gz $lib-${{ matrix.ref }}.abi done - name: Upload ABI file uses: actions/upload-artifact@v2 with: name: ${{ matrix.name }} - path: "*${{ matrix.ref }}.abi.tar.gz" + path: "*${{ matrix.ref }}.abi" abi-compare: runs-on: ubuntu-latest @@ -141,7 +140,7 @@ jobs: - name: Compare ABI run: | for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do - abi-compliance-checker -lib $lib -old build-baseline/$lib*.abi.tar.gz -new build-latest/$lib*.abi.tar.gz + abi-compliance-checker -lib $lib -old build-baseline/$lib*.abi -new build-latest/$lib*.abi done - name: Upload ABI Comparison if: always() diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 077def0250fb..bb011c0dafbb 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -126,12 +126,11 @@ jobs: abi-dumper $EXTRA_ARGS -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o ${{ matrix.ref }}.abi ./install/lib/libLLVM.so # Remove symbol versioning from dumps, so we can compare across major versions. sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi - tar -czf ${{ matrix.ref }}.abi.tar.gz ${{ matrix.ref }}.abi - name: Upload ABI file uses: actions/upload-artifact@v1 with: name: ${{ matrix.name }} - path: ${{ matrix.ref }}.abi.tar.gz + path: ${{ matrix.ref }}.abi - name: Upload symbol list file if: matrix.name == 'build-baseline' @@ -167,7 +166,10 @@ jobs: # This option doesn't seem to work with the ABI dumper, so passing it here. export EXTRA_ARGS="-symbols-list symbol-list/llvm.symbols" fi - abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" + # FIXME: Reading of gzip'd abi files on the GitHub runners stop + # working some time in March of 2021, likely due to a change in the + # runner's environment. + abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.abi -new build-latest/*.abi || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" - name: Upload ABI Comparison if: always() uses: actions/upload-artifact@v1 From a1a197b54ec6686963b4e56ee8117dd79679ec4a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 4 May 2021 03:50:40 +0000 Subject: [PATCH 237/318] workflows: Fix tarball download for libclang-abi-tests --- .github/workflows/libclang-abi-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index 09ea5cfbfc82..c7d1993ba006 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -41,7 +41,7 @@ jobs: remote_repo='https://github.com/llvm/llvm-project' if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then major_version=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) - baseline_ref="$major_version.0.0" + baseline_ref="llvmorg-$major_version.0.0" # If there is a minor release, we want to use that as the base line. minor_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9] | tail -n1 | grep -o 'llvmorg-.\+' || true` @@ -60,7 +60,7 @@ jobs: echo ::set-output name=ABI_LIBS::libclang.so else echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - echo ::set-output name=BASELINE_REF::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.0.0 + echo ::set-output name=BASELINE_REF::llvmorg-${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.0.0 echo ::set-output name=ABI_HEADERS::. echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so fi From 2db5d42193abefcb41b10bd70b7ab536cb03f1cc Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 10 May 2021 13:20:52 -0700 Subject: [PATCH 238/318] Remove extra test case added in 266c82f94da232d736f413c8d9e08d066c2d7202 This test case was added by accident and is failing on the release/12.x branch. --- llvm/test/Transforms/InstSimplify/select.ll | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 4291fc0a839e..93f09d89bf40 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -969,18 +969,6 @@ define @ignore_scalable_undef( %cond) { ret %s } -define i32 @select_ctpop_zero(i32 %x) { -; CHECK-LABEL: @select_ctpop_zero( -; CHECK-NEXT: [[T1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[T1]] -; - %t0 = icmp eq i32 %x, 0 - %t1 = call i32 @llvm.ctpop.i32(i32 %x) - %sel = select i1 %t0, i32 0, i32 %t1 - ret i32 %sel -} -declare i32 @llvm.ctpop.i32(i32) - define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @vec_select_no_equivalence( ; CHECK-NEXT: [[X10:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> From f3e07c841e2f96a73b253d3b1a95e2ac8df5a376 Mon Sep 17 00:00:00 2001 From: Joachim Meyer Date: Thu, 6 May 2021 22:26:19 +0200 Subject: [PATCH 239/318] [NFC] Correctly assert the indents for printEnumValHelpStr. Only verify that there's no negative indent. Noted by @chapuni in https://reviews.llvm.org/D93494. Reviewed By: chapuni Differential Revision: https://reviews.llvm.org/D102021 (cherry picked from commit d9f2960c932c9803e662098e33d899efa3c67f44) --- llvm/lib/Support/CommandLine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index e2f014d1815b..123a23a5242c 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1729,7 +1729,7 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent, void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent, size_t FirstLineIndentedBy) { const StringRef ValHelpPrefix = " "; - assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size()); + assert(BaseIndent >= FirstLineIndentedBy); std::pair Split = HelpStr.split('\n'); outs().indent(BaseIndent - FirstLineIndentedBy) << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n"; From 24535af52ae139f2bb361855fbbaf47cc9e5d580 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 9 Apr 2021 10:39:59 -0700 Subject: [PATCH 240/318] [AArch64][GlobalISel] Fix incorrect codegen for <16 x s8> G_ASHR. Fixes PR49904 (cherry picked from commit 40e75cafc0fef365b5580a9c09595ac475db0c19) --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 2 +- llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 5259f4f5a4d0..fc5ef02e8457 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1791,7 +1791,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr( NegOpc = AArch64::NEGv8i16; } else if (Ty == LLT::vector(16, 8)) { Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; - NegOpc = AArch64::NEGv8i16; + NegOpc = AArch64::NEGv16i8; } else if (Ty == LLT::vector(8, 8)) { Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; NegOpc = AArch64::NEGv8i8; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir index 6a5c33ed9c14..1056a449ab21 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir @@ -562,8 +562,8 @@ body: | ; CHECK: liveins: $q0, $q1 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK: [[NEGv8i16_:%[0-9]+]]:fpr128 = NEGv8i16 [[COPY1]] - ; CHECK: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[NEGv8i16_]] + ; CHECK: [[NEGv16i8_:%[0-9]+]]:fpr128 = NEGv16i8 [[COPY1]] + ; CHECK: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[NEGv16i8_]] ; CHECK: $q0 = COPY [[USHLv16i8_]] ; CHECK: RET_ReallyLR implicit $q0 %0:fpr(<16 x s8>) = COPY $q0 From 067c06dc8395a2d79792c0ac4e48c2a79836b46f Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 6 May 2021 15:22:21 +0200 Subject: [PATCH 241/318] [SystemZ] Don't use libcall for 128 bit shifts. Expand 128 bit shifts instead of using a libcall. This patch removes the 128 bit shift libcalls and thereby causes ExpandShiftWithUnknownAmountBit() to be called. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D101993 (cherry picked from commit 1c4cb510b4daccc0f4763958567affc2b442f317) --- .../Target/SystemZ/SystemZISelLowering.cpp | 5 +- llvm/test/CodeGen/SystemZ/shift-12.ll | 94 +++++++++++++++---- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 603446755aaf..9ace36f344a5 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -285,10 +285,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); - // FIXME: Can we support these natively? + // Expand 128 bit shifts without using a libcall. setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); // We have native instructions for i8, i16 and i32 extensions, but not i1. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); diff --git a/llvm/test/CodeGen/SystemZ/shift-12.ll b/llvm/test/CodeGen/SystemZ/shift-12.ll index 7559602aa256..421928f28698 100644 --- a/llvm/test/CodeGen/SystemZ/shift-12.ll +++ b/llvm/test/CodeGen/SystemZ/shift-12.ll @@ -2,7 +2,7 @@ ; Test removal of AND operations that don't affect last 6 bits of shift amount ; operand. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s ; Test that AND is not removed when some lower 6 bits are not set. define i32 @f1(i32 %a, i32 %sh) { @@ -119,31 +119,28 @@ define i32 @f10(i32 %a, i32 %sh) { ret i32 %reuse } -; Test that AND is not removed for i128 (which calls __ashlti3) define i128 @f11(i128 %a, i32 %sh) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: stmg %r13, %r15, 104(%r15) -; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: aghi %r15, -192 -; CHECK-NEXT: .cfi_def_cfa_offset 352 ; CHECK-NEXT: lg %r0, 8(%r3) -; CHECK-NEXT: # kill: def $r4l killed $r4l def $r4d -; CHECK-NEXT: lgr %r13, %r2 ; CHECK-NEXT: lg %r1, 0(%r3) -; CHECK-NEXT: stg %r0, 168(%r15) -; CHECK-NEXT: risbg %r4, %r4, 57, 191, 0 -; CHECK-NEXT: la %r2, 176(%r15) -; CHECK-NEXT: la %r3, 160(%r15) -; CHECK-NEXT: stg %r1, 160(%r15) -; CHECK-NEXT: brasl %r14, __ashlti3@PLT -; CHECK-NEXT: lg %r0, 184(%r15) -; CHECK-NEXT: lg %r1, 176(%r15) -; CHECK-NEXT: stg %r0, 8(%r13) -; CHECK-NEXT: stg %r1, 0(%r13) -; CHECK-NEXT: lmg %r13, %r15, 296(%r15) +; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 +; CHECK-NEXT: lcr %r14, %r3 +; CHECK-NEXT: sllg %r5, %r1, 0(%r4) +; CHECK-NEXT: srlg %r14, %r0, 0(%r14) +; CHECK-NEXT: ogr %r5, %r14 +; CHECK-NEXT: sllg %r3, %r0, -64(%r3) +; CHECK-NEXT: tmll %r4, 127 +; CHECK-NEXT: locgrle %r3, %r5 +; CHECK-NEXT: sllg %r0, %r0, 0(%r4) +; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locghinle %r0, 0 +; CHECK-NEXT: stg %r0, 8(%r2) +; CHECK-NEXT: stg %r3, 0(%r2) +; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 %ext = zext i32 %and to i128 @@ -151,3 +148,62 @@ define i128 @f11(i128 %a, i32 %sh) { ret i128 %shift } +define i128 @f12(i128 %a, i32 %sh) { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: lg %r0, 0(%r3) +; CHECK-NEXT: lg %r1, 8(%r3) +; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 +; CHECK-NEXT: lcr %r14, %r3 +; CHECK-NEXT: srlg %r5, %r1, 0(%r4) +; CHECK-NEXT: sllg %r14, %r0, 0(%r14) +; CHECK-NEXT: ogr %r5, %r14 +; CHECK-NEXT: srlg %r3, %r0, -64(%r3) +; CHECK-NEXT: tmll %r4, 127 +; CHECK-NEXT: locgrle %r3, %r5 +; CHECK-NEXT: srlg %r0, %r0, 0(%r4) +; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locghinle %r0, 0 +; CHECK-NEXT: stg %r0, 0(%r2) +; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: lmg %r14, %r15, 112(%r15) +; CHECK-NEXT: br %r14 + %and = and i32 %sh, 127 + %ext = zext i32 %and to i128 + %shift = lshr i128 %a, %ext + ret i128 %shift +} + +define i128 @f13(i128 %a, i32 %sh) { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: lg %r0, 0(%r3) +; CHECK-NEXT: lg %r1, 8(%r3) +; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 +; CHECK-NEXT: lcr %r14, %r3 +; CHECK-NEXT: srlg %r5, %r1, 0(%r4) +; CHECK-NEXT: sllg %r14, %r0, 0(%r14) +; CHECK-NEXT: ogr %r5, %r14 +; CHECK-NEXT: srag %r14, %r0, 0(%r4) +; CHECK-NEXT: srag %r3, %r0, -64(%r3) +; CHECK-NEXT: srag %r0, %r0, 63 +; CHECK-NEXT: tmll %r4, 127 +; CHECK-NEXT: locgrle %r3, %r5 +; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locgrle %r0, %r14 +; CHECK-NEXT: stg %r0, 0(%r2) +; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: lmg %r14, %r15, 112(%r15) +; CHECK-NEXT: br %r14 + %and = and i32 %sh, 127 + %ext = zext i32 %and to i128 + %shift = ashr i128 %a, %ext + ret i128 %shift +} + From b89942c336a4506dabd5e1b2a6f1b5cbaddebe55 Mon Sep 17 00:00:00 2001 From: Alan Phipps Date: Tue, 11 May 2021 11:40:11 -0500 Subject: [PATCH 242/318] [Coverage] Fix branch coverage merging in FunctionCoverageSummary::get() for instantiation Fix branch coverage merging in FunctionCoverageSummary::get() for instantiation groups. This change corrects the implementation for the branch coverage summary to do the same thing for branches that is done for lines and regions. That is, across function instantiations in an instantiation group, the maximum branch coverage found in any of those instantiations is returned, with the total number of branches being the same across instantiations. Differential Revision: https://reviews.llvm.org/D102193 (cherry picked from commit eccb925147d5f262a3e74cc050d0665dd4e6d8db) --- llvm/test/tools/llvm-cov/branch-templates.cpp | 16 +++++++++++++++- llvm/tools/llvm-cov/CoverageSummaryInfo.cpp | 6 +----- llvm/tools/llvm-cov/CoverageSummaryInfo.h | 5 +++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/llvm/test/tools/llvm-cov/branch-templates.cpp b/llvm/test/tools/llvm-cov/branch-templates.cpp index 750dc7bd58f2..4797428f8835 100644 --- a/llvm/test/tools/llvm-cov/branch-templates.cpp +++ b/llvm/test/tools/llvm-cov/branch-templates.cpp @@ -1,9 +1,9 @@ // RUN: llvm-profdata merge %S/Inputs/branch-templates.proftext -o %t.profdata // RUN: llvm-cov show --show-expansions --show-branches=count %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s // RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -show-functions -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORT +// RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORTFILE #include - template void unused(T x) { return; @@ -45,3 +45,17 @@ int main() { // REPORT-NEXT: _Z4funcIfEiT_ 5 2 60.00% 7 3 57.14% 2 1 50.00% // REPORT-NEXT: --- // REPORT-NEXT: TOTAL 22 7 68.18% 31 11 64.52% 12 6 50.00% + +// Make sure the covered branch tally for the function instantiation group is +// merged to reflect maximum branch coverage of a single instantiation, just +// like what is done for lines and regions. Also, the total branch tally +// summary for an instantiation group should agree with the total number of +// branches in the definition (In this case, 2 and 6 for func<>() and main(), +// respectively). This is returned by: FunctionCoverageSummary::get(const +// InstantiationGroup &Group, ...) + +// REPORTFILE: Filename Regions Missed Regions Cover Functions Missed Functions Executed Lines Missed Lines Cover Branches Missed Branches Cover +// REPORTFILE-NEXT: --- +// REPORTFILE-NEXT: branch-templates.cpp 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% +// REPORTFILE-NEXT: --- +// REPORTFILE-NEXT: TOTAL 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp b/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp index 4a0a86168908..10e059adeb7d 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp @@ -100,11 +100,7 @@ FunctionCoverageSummary::get(const InstantiationGroup &Group, for (const auto &FCS : Summaries.drop_front()) { Summary.RegionCoverage.merge(FCS.RegionCoverage); Summary.LineCoverage.merge(FCS.LineCoverage); - - // Sum branch coverage across instantiation groups for the summary rather - // than "merge" the maximum count. This is a clearer view into whether all - // created branches are covered. - Summary.BranchCoverage += FCS.BranchCoverage; + Summary.BranchCoverage.merge(FCS.BranchCoverage); } return Summary; } diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/llvm/tools/llvm-cov/CoverageSummaryInfo.h index 4bc1c24a079f..62e7cad1012b 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.h +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.h @@ -123,6 +123,11 @@ class BranchCoverageInfo { return *this; } + void merge(const BranchCoverageInfo &RHS) { + Covered = std::max(Covered, RHS.Covered); + NumBranches = std::max(NumBranches, RHS.NumBranches); + } + size_t getCovered() const { return Covered; } size_t getNumBranches() const { return NumBranches; } From aa97726f6040c68dfdd8076e8efe3ef119f6b037 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 14 Mar 2021 17:41:21 +0100 Subject: [PATCH 243/318] [SCCP] Avoid modifying AdditionalUsers while iterating over it When run under valgrind, or with a malloc that poisons freed memory, this can lead to segfaults or other problems. To avoid modifying the AdditionalUsers DenseMap while still iterating, save the instructions to be notified in a separate SmallPtrSet, and use this to later call OperandChangedState on each instruction. Fixes PR49582. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D98602 (cherry picked from commit 6abb92f2103a58d097620b4410054c5bb18c48ec) --- llvm/lib/Transforms/Scalar/SCCP.cpp | 7 +- .../SCCP/pr49582-iterator-invalidation.ll | 854 ++++++++++++++++++ 2 files changed, 860 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index de6be52adf21..8feed9e9ebfe 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -542,9 +542,14 @@ class SCCPSolver : public InstVisitor { auto Iter = AdditionalUsers.find(I); if (Iter != AdditionalUsers.end()) { + // Copy additional users before notifying them of changes, because new + // users may be added, potentially invalidating the iterator. + SmallVector ToNotify; for (User *U : Iter->second) if (auto *UI = dyn_cast(U)) - OperandChangedState(UI); + ToNotify.push_back(UI); + for (Instruction *UI : ToNotify) + OperandChangedState(UI); } } void handleCallOverdefined(CallBase &CB); diff --git a/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll b/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll new file mode 100644 index 000000000000..6d5b2e1841b4 --- /dev/null +++ b/llvm/test/Transforms/SCCP/pr49582-iterator-invalidation.ll @@ -0,0 +1,854 @@ +; RUN: opt < %s -ipsccp -disable-output +; PR49582: This test checks for an iterator invalidation issue, which only gets +; exposed on a large-enough test case. We intentionally do not check the output. + +@c = external dso_local global i32*, align 8 +@d = external dso_local global i32, align 4 + +define void @f(i32 %i) { +entry: + br label %for.cond + +for.cond: ; preds = %if.end628, %entry + %e.0 = phi i32 [ 1, %entry ], [ %e.15, %if.end628 ] + %cmp = icmp slt i32 %e.0, %i + call void @llvm.assume(i1 %cmp) + %0 = load i32*, i32** @c, align 8 + %tobool = icmp ne i32* %0, null + br i1 %tobool, label %if.then, label %if.end628 + +if.then: ; preds = %for.cond + %1 = load i32, i32* %0, align 4 + %tobool1 = icmp ne i32 %1, 0 + br i1 %tobool1, label %if.then2, label %if.else78 + +if.then2: ; preds = %if.then + %add = add nsw i32 %e.0, 1 + %cmp3 = icmp sge i32 %add, %i + br i1 %cmp3, label %if.then4, label %if.end + +if.then4: ; preds = %if.then2 + %idxprom = sext i32 %add to i64 + br label %if.end + +if.end: ; preds = %if.then4, %if.then2 + br i1 %cmp3, label %if.then9, label %if.end13 + +if.then9: ; preds = %if.end + %idxprom11 = sext i32 %add to i64 + br label %if.end13 + +if.end13: ; preds = %if.then9, %if.end + br i1 %cmp3, label %if.then16, label %if.end20 + +if.then16: ; preds = %if.end13 + %idxprom18 = sext i32 %add to i64 + br label %if.end20 + +if.end20: ; preds = %if.then16, %if.end13 + %add21 = add nsw i32 %e.0, 3 + %cmp22 = icmp sge i32 %add21, %i + br i1 %cmp22, label %if.then23, label %if.end25 + +if.then23: ; preds = %if.end20 + br label %if.end25 + +if.end25: ; preds = %if.then23, %if.end20 + %e.1 = phi i32 [ %add21, %if.then23 ], [ %e.0, %if.end20 ] + %cmp26 = icmp sge i32 %e.1, %i + br i1 %cmp26, label %if.then27, label %if.end28 + +if.then27: ; preds = %if.end25 + %inc = add nsw i32 %e.1, 1 + br label %if.end28 + +if.end28: ; preds = %if.then27, %if.end25 + %e.2 = phi i32 [ %inc, %if.then27 ], [ %e.1, %if.end25 ] + %add29 = add nsw i32 %e.2, 2 + %cmp30 = icmp sge i32 %add29, %i + br i1 %cmp30, label %if.then31, label %if.end33 + +if.then31: ; preds = %if.end28 + br label %if.end33 + +if.end33: ; preds = %if.then31, %if.end28 + %e.3 = phi i32 [ %add29, %if.then31 ], [ %e.2, %if.end28 ] + %cmp34 = icmp sge i32 %e.3, %i + br i1 %cmp34, label %if.then35, label %if.end38 + +if.then35: ; preds = %if.end33 + %idxprom36 = sext i32 %e.3 to i64 + br label %if.end38 + +if.end38: ; preds = %if.then35, %if.end33 + br i1 %cmp34, label %if.then40, label %if.end43 + +if.then40: ; preds = %if.end38 + %idxprom41 = sext i32 %e.3 to i64 + br label %if.end43 + +if.end43: ; preds = %if.then40, %if.end38 + br i1 %cmp34, label %if.then45, label %if.end47 + +if.then45: ; preds = %if.end43 + %inc46 = add nsw i32 %e.3, 1 + br label %if.end47 + +if.end47: ; preds = %if.then45, %if.end43 + %e.4 = phi i32 [ %inc46, %if.then45 ], [ %e.3, %if.end43 ] + %cmp48 = icmp sge i32 %e.4, %i + br i1 %cmp48, label %if.then49, label %if.end51 + +if.then49: ; preds = %if.end47 + %inc50 = add nsw i32 %e.4, 1 + br label %if.end51 + +if.end51: ; preds = %if.then49, %if.end47 + %e.5 = phi i32 [ %inc50, %if.then49 ], [ %e.4, %if.end47 ] + %2 = load i32*, i32** @c, align 8 + %tobool52 = icmp ne i32* %2, null + br i1 %tobool52, label %if.then53, label %if.else + +if.then53: ; preds = %if.end51 + %cmp54 = icmp sge i32 %e.5, %i + br i1 %cmp54, label %if.then55, label %if.end628 + +if.then55: ; preds = %if.then53 + unreachable + +if.else: ; preds = %if.end51 + %3 = load i32, i32* @d, align 4 + %tobool57 = icmp ne i32 %3, 0 + br i1 %tobool57, label %if.then58, label %if.else68 + +if.then58: ; preds = %if.else + %cmp59 = icmp sge i32 %e.5, %i + br i1 %cmp59, label %if.then60, label %if.end62 + +if.then60: ; preds = %if.then58 + %inc61 = add nsw i32 %e.5, 1 + br label %if.end62 + +if.end62: ; preds = %if.then60, %if.then58 + %e.6 = phi i32 [ %inc61, %if.then60 ], [ %e.5, %if.then58 ] + %add63 = add nsw i32 %e.6, 1 + %cmp64 = icmp sge i32 %add63, %i + br i1 %cmp64, label %if.then65, label %if.end628 + +if.then65: ; preds = %if.end62 + br label %if.end628 + +if.else68: ; preds = %if.else + %add69 = add nsw i32 %e.5, 2 + %cmp70 = icmp sge i32 %add69, %i + br i1 %cmp70, label %if.then71, label %if.end628 + +if.then71: ; preds = %if.else68 + %idxprom73 = sext i32 %add69 to i64 + br label %if.end628 + +if.else78: ; preds = %if.then + %call = call i32 @g() + %tobool79 = icmp ne i32 %call, 0 + br i1 %tobool79, label %if.then80, label %if.else123 + +if.then80: ; preds = %if.else78 + %add81 = add nsw i32 %e.0, 3 + %cmp82 = icmp sge i32 %add81, %i + br i1 %cmp82, label %if.then83, label %if.end87 + +if.then83: ; preds = %if.then80 + %idxprom85 = sext i32 %add81 to i64 + br label %if.end87 + +if.end87: ; preds = %if.then83, %if.then80 + br i1 %cmp82, label %if.then90, label %if.end94 + +if.then90: ; preds = %if.end87 + %idxprom92 = sext i32 %add81 to i64 + br label %if.end94 + +if.end94: ; preds = %if.then90, %if.end87 + br i1 %cmp82, label %if.then97, label %if.end99 + +if.then97: ; preds = %if.end94 + br label %if.end99 + +if.end99: ; preds = %if.then97, %if.end94 + %e.7 = phi i32 [ %add81, %if.then97 ], [ %e.0, %if.end94 ] + %cmp100 = icmp sge i32 %e.7, %i + br i1 %cmp100, label %if.then101, label %if.end103 + +if.then101: ; preds = %if.end99 + %inc102 = add nsw i32 %e.7, 1 + br label %if.end103 + +if.end103: ; preds = %if.then101, %if.end99 + %e.8 = phi i32 [ %inc102, %if.then101 ], [ %e.7, %if.end99 ] + %add104 = add nsw i32 %e.8, 1 + %cmp105 = icmp sge i32 %add104, %i + br i1 %cmp105, label %if.then106, label %if.end108 + +if.then106: ; preds = %if.end103 + br label %if.end108 + +if.end108: ; preds = %if.then106, %if.end103 + %e.9 = phi i32 [ %add104, %if.then106 ], [ %e.8, %if.end103 ] + %cmp109 = icmp sge i32 %e.9, %i + br i1 %cmp109, label %if.then110, label %if.end113 + +if.then110: ; preds = %if.end108 + %idxprom111 = sext i32 %e.9 to i64 + br label %if.end113 + +if.end113: ; preds = %if.then110, %if.end108 + br i1 %cmp109, label %if.then115, label %if.end118 + +if.then115: ; preds = %if.end113 + %idxprom116 = sext i32 %e.9 to i64 + unreachable + +if.end118: ; preds = %if.end113 + br i1 %cmp109, label %if.then120, label %if.end628 + +if.then120: ; preds = %if.end118 + br label %if.end628 + +if.else123: ; preds = %if.else78 + %call124 = call i32 @g() + %tobool125 = icmp ne i32 %call124, 0 + br i1 %tobool125, label %if.then126, label %if.end628 + +if.then126: ; preds = %if.else123 + %call127 = call i32 @g() + %tobool128 = icmp ne i32 %call127, 0 + br i1 %tobool128, label %if.then129, label %if.else164 + +if.then129: ; preds = %if.then126 + %add130 = add nsw i32 %e.0, 1 + %cmp131 = icmp sge i32 %add130, %i + br i1 %cmp131, label %if.then132, label %if.end134 + +if.then132: ; preds = %if.then129 + br label %if.end134 + +if.end134: ; preds = %if.then132, %if.then129 + %e.10 = phi i32 [ %add130, %if.then132 ], [ %e.0, %if.then129 ] + %cmp135 = icmp sge i32 %e.10, %i + br i1 %cmp135, label %if.then136, label %if.end139 + +if.then136: ; preds = %if.end134 + %idxprom137 = sext i32 %e.10 to i64 + br label %if.end139 + +if.end139: ; preds = %if.then136, %if.end134 + br i1 %cmp135, label %if.then141, label %if.end144 + +if.then141: ; preds = %if.end139 + %idxprom142 = sext i32 %e.10 to i64 + br label %if.end144 + +if.end144: ; preds = %if.then141, %if.end139 + br i1 %cmp135, label %if.then146, label %if.end149 + +if.then146: ; preds = %if.end144 + %idxprom147 = sext i32 %e.10 to i64 + br label %if.end149 + +if.end149: ; preds = %if.then146, %if.end144 + br i1 %cmp135, label %if.then151, label %if.else154 + +if.then151: ; preds = %if.end149 + %idxprom152 = sext i32 %e.10 to i64 + br label %if.end160 + +if.else154: ; preds = %if.end149 + %idxprom157 = sext i32 %e.10 to i64 + br label %if.end160 + +if.end160: ; preds = %if.else154, %if.then151 + br i1 %cmp135, label %if.then162, label %if.end628 + +if.then162: ; preds = %if.end160 + unreachable + +if.else164: ; preds = %if.then126 + %4 = load i32*, i32** @c, align 8 + %tobool165 = icmp ne i32* %4, null + br i1 %tobool165, label %if.then166, label %if.else195 + +if.then166: ; preds = %if.else164 + %add167 = add nsw i32 %e.0, 1 + %cmp168 = icmp sge i32 %add167, %i + br i1 %cmp168, label %if.then169, label %if.end173 + +if.then169: ; preds = %if.then166 + %idxprom171 = sext i32 %add167 to i64 + br label %if.end173 + +if.end173: ; preds = %if.then169, %if.then166 + br i1 %cmp168, label %if.then176, label %if.end180 + +if.then176: ; preds = %if.end173 + %idxprom178 = sext i32 %add167 to i64 + unreachable + +if.end180: ; preds = %if.end173 + br i1 %cmp168, label %if.then183, label %if.end187 + +if.then183: ; preds = %if.end180 + %idxprom185 = sext i32 %add167 to i64 + unreachable + +if.end187: ; preds = %if.end180 + br i1 %cmp168, label %if.then190, label %if.end628 + +if.then190: ; preds = %if.end187 + br label %if.end628 + +if.else195: ; preds = %if.else164 + %5 = load i32, i32* @d, align 4 + %tobool196 = icmp ne i32 %5, 0 + br i1 %tobool196, label %if.then197, label %if.else205 + +if.then197: ; preds = %if.else195 + %add198 = add nsw i32 %e.0, 1 + %cmp199 = icmp sge i32 %add198, %i + br i1 %cmp199, label %if.then200, label %if.end628 + +if.then200: ; preds = %if.then197 + %idxprom202 = sext i32 %add198 to i64 + br label %if.end628 + +if.else205: ; preds = %if.else195 + %call206 = call i32 @h() + %tobool207 = icmp ne i32 %call206, 0 + br i1 %tobool207, label %if.then208, label %if.else217 + +if.then208: ; preds = %if.else205 + %add209 = add nsw i32 %e.0, 1 + %cmp210 = icmp sge i32 %add209, %i + br i1 %cmp210, label %if.then211, label %if.end215 + +if.then211: ; preds = %if.then208 + %idxprom213 = sext i32 %add209 to i64 + unreachable + +if.end215: ; preds = %if.then208 + %6 = zext i32 %add209 to i64 + br label %if.end628 + +if.else217: ; preds = %if.else205 + %7 = load i32*, i32** @c, align 8 + %tobool218 = icmp ne i32* %7, null + br i1 %tobool218, label %if.then219, label %if.else227 + +if.then219: ; preds = %if.else217 + %add220 = add nsw i32 %e.0, 1 + %cmp221 = icmp sge i32 %add220, %i + br i1 %cmp221, label %if.then222, label %if.end628 + +if.then222: ; preds = %if.then219 + %idxprom224 = sext i32 %add220 to i64 + br label %if.end628 + +if.else227: ; preds = %if.else217 + %call228 = call i32 @g() + %tobool229 = icmp ne i32 %call228, 0 + br i1 %tobool229, label %if.then230, label %if.else245 + +if.then230: ; preds = %if.else227 + %add231 = add nsw i32 %e.0, 1 + %cmp232 = icmp sge i32 %add231, %i + br i1 %cmp232, label %if.then233, label %if.end237 + +if.then233: ; preds = %if.then230 + %idxprom235 = sext i32 %add231 to i64 + br label %if.end237 + +if.end237: ; preds = %if.then233, %if.then230 + br i1 %cmp232, label %if.then240, label %if.end628 + +if.then240: ; preds = %if.end237 + %idxprom242 = sext i32 %add231 to i64 + br label %if.end628 + +if.else245: ; preds = %if.else227 + %8 = load i32*, i32** @c, align 8 + %tobool246 = icmp ne i32* %8, null + br i1 %tobool246, label %if.then247, label %if.else258 + +if.then247: ; preds = %if.else245 + %add248 = add nsw i32 %e.0, 1 + %cmp249 = icmp sge i32 %add248, %i + br i1 %cmp249, label %if.then250, label %if.end254 + +if.then250: ; preds = %if.then247 + %idxprom252 = sext i32 %add248 to i64 + unreachable + +if.end254: ; preds = %if.then247 + %9 = zext i32 %add248 to i64 + br label %if.end628 + +if.else258: ; preds = %if.else245 + %10 = load i32, i32* @d, align 4 + %tobool259 = icmp ne i32 %10, 0 + br i1 %tobool259, label %if.then260, label %if.else268 + +if.then260: ; preds = %if.else258 + %add261 = add nsw i32 %e.0, 1 + %cmp262 = icmp sge i32 %add261, %i + br i1 %cmp262, label %if.then263, label %if.end628 + +if.then263: ; preds = %if.then260 + %idxprom265 = sext i32 %add261 to i64 + br label %if.end628 + +if.else268: ; preds = %if.else258 + %call269 = call i32 @h() + %tobool270 = icmp ne i32 %call269, 0 + br i1 %tobool270, label %if.then271, label %if.else279 + +if.then271: ; preds = %if.else268 + %add272 = add nsw i32 %e.0, 1 + %cmp273 = icmp sge i32 %add272, %i + br i1 %cmp273, label %if.then274, label %if.end628 + +if.then274: ; preds = %if.then271 + %idxprom276 = sext i32 %add272 to i64 + br label %if.end628 + +if.else279: ; preds = %if.else268 + %11 = load i32*, i32** @c, align 8 + %tobool280 = icmp ne i32* %11, null + br i1 %tobool280, label %if.then281, label %if.else287 + +if.then281: ; preds = %if.else279 + %add282 = add nsw i32 %e.0, 2 + %cmp283 = icmp sge i32 %add282, %i + br i1 %cmp283, label %if.then284, label %if.end628 + +if.then284: ; preds = %if.then281 + br label %if.end628 + +if.else287: ; preds = %if.else279 + %call288 = call i32 @g() + %tobool289 = icmp ne i32 %call288, 0 + br i1 %tobool289, label %if.then290, label %if.else307 + +if.then290: ; preds = %if.else287 + %12 = load i32*, i32** @c, align 8 + %tobool291 = icmp ne i32* %12, null + br i1 %tobool291, label %if.then292, label %if.else298 + +if.then292: ; preds = %if.then290 + %add293 = add nsw i32 %e.0, 3 + %cmp294 = icmp sge i32 %add293, %i + br i1 %cmp294, label %if.then295, label %if.end628 + +if.then295: ; preds = %if.then292 + br label %if.end628 + +if.else298: ; preds = %if.then290 + %add299 = add nsw i32 %e.0, 4 + %cmp300 = icmp sge i32 %add299, %i + br i1 %cmp300, label %if.then301, label %if.end628 + +if.then301: ; preds = %if.else298 + %idxprom303 = sext i32 %add299 to i64 + br label %if.end628 + +if.else307: ; preds = %if.else287 + %13 = load i32*, i32** @c, align 8 + %tobool308 = icmp ne i32* %13, null + br i1 %tobool308, label %if.then309, label %if.else324 + +if.then309: ; preds = %if.else307 + %add310 = add nsw i32 %e.0, 1 + %cmp311 = icmp sge i32 %add310, %i + br i1 %cmp311, label %if.then312, label %if.else316 + +if.then312: ; preds = %if.then309 + %idxprom314 = sext i32 %add310 to i64 + br label %if.end628 + +if.else316: ; preds = %if.then309 + br i1 undef, label %if.then318, label %if.end628 + +if.then318: ; preds = %if.else316 + %idxprom320 = sext i32 %add310 to i64 + br label %if.end628 + +if.else324: ; preds = %if.else307 + %call325 = call i32 @g() + %tobool326 = icmp ne i32 %call325, 0 + br i1 %tobool326, label %if.then327, label %if.else475 + +if.then327: ; preds = %if.else324 + %add328 = add nsw i32 %e.0, 2 + %cmp329 = icmp sge i32 %add328, %i + br i1 %cmp329, label %if.then330, label %if.end332 + +if.then330: ; preds = %if.then327 + br label %if.end332 + +if.end332: ; preds = %if.then330, %if.then327 + %e.11 = phi i32 [ %add328, %if.then330 ], [ %e.0, %if.then327 ] + %cmp333 = icmp sge i32 %e.11, %i + br i1 %cmp333, label %if.then334, label %if.end336 + +if.then334: ; preds = %if.end332 + %inc335 = add nsw i32 %e.11, 1 + br label %if.end336 + +if.end336: ; preds = %if.then334, %if.end332 + %e.12 = phi i32 [ %inc335, %if.then334 ], [ %e.11, %if.end332 ] + %cmp337 = icmp sge i32 %e.12, %i + br i1 %cmp337, label %if.then338, label %if.end340 + +if.then338: ; preds = %if.end336 + %inc339 = add nsw i32 %e.12, 1 + br label %if.end340 + +if.end340: ; preds = %if.then338, %if.end336 + %e.13 = phi i32 [ %inc339, %if.then338 ], [ %e.12, %if.end336 ] + %cmp341 = icmp sge i32 %e.13, %i + br i1 %cmp341, label %if.then342, label %if.end344 + +if.then342: ; preds = %if.end340 + %inc343 = add nsw i32 %e.13, 1 + br label %if.end344 + +if.end344: ; preds = %if.then342, %if.end340 + %e.14 = phi i32 [ %inc343, %if.then342 ], [ %e.13, %if.end340 ] + %call345 = call i32 @g() + %tobool346 = icmp ne i32 %call345, 0 + br i1 %tobool346, label %if.then347, label %if.else398 + +if.then347: ; preds = %if.end344 + %cmp348 = icmp sge i32 %e.14, %i + br i1 %cmp348, label %if.then349, label %if.end352 + +if.then349: ; preds = %if.then347 + %idxprom350 = sext i32 %e.14 to i64 + br label %if.end352 + +if.end352: ; preds = %if.then349, %if.then347 + br i1 %cmp348, label %if.then354, label %if.else357 + +if.then354: ; preds = %if.end352 + %idxprom355 = sext i32 %e.14 to i64 + br label %if.end361 + +if.else357: ; preds = %if.end352 + %idxprom359 = sext i32 %e.14 to i64 + br label %if.end361 + +if.end361: ; preds = %if.else357, %if.then354 + br i1 %cmp348, label %if.then363, label %if.end366 + +if.then363: ; preds = %if.end361 + %idxprom364 = sext i32 %e.14 to i64 + br label %if.end366 + +if.end366: ; preds = %if.then363, %if.end361 + br i1 %cmp348, label %if.then368, label %if.end371 + +if.then368: ; preds = %if.end366 + %idxprom369 = sext i32 %e.14 to i64 + br label %if.end371 + +if.end371: ; preds = %if.then368, %if.end366 + br i1 %cmp348, label %if.then373, label %if.end376 + +if.then373: ; preds = %if.end371 + %idxprom374 = sext i32 %e.14 to i64 + br label %if.end376 + +if.end376: ; preds = %if.then373, %if.end371 + br i1 %cmp348, label %if.then378, label %if.end381 + +if.then378: ; preds = %if.end376 + %idxprom379 = sext i32 %e.14 to i64 + br label %if.end381 + +if.end381: ; preds = %if.then378, %if.end376 + br i1 %cmp348, label %if.then383, label %if.else386 + +if.then383: ; preds = %if.end381 + %idxprom384 = sext i32 %e.14 to i64 + br label %if.end390 + +if.else386: ; preds = %if.end381 + %idxprom388 = sext i32 %e.14 to i64 + br label %if.end390 + +if.end390: ; preds = %if.else386, %if.then383 + %add391 = add nsw i32 %e.14, 1 + %cmp392 = icmp sge i32 %add391, %i + br i1 %cmp392, label %if.then393, label %if.end628 + +if.then393: ; preds = %if.end390 + %idxprom395 = sext i32 %add391 to i64 + br label %if.end628 + +if.else398: ; preds = %if.end344 + %call399 = call i32 @h() + %tobool400 = icmp ne i32 %call399, 0 + br i1 %tobool400, label %if.then401, label %if.else409 + +if.then401: ; preds = %if.else398 + %add402 = add nsw i32 %e.14, 1 + %cmp403 = icmp sge i32 %add402, %i + br i1 %cmp403, label %if.then404, label %if.end628 + +if.then404: ; preds = %if.then401 + %idxprom406 = sext i32 %add402 to i64 + br label %if.end628 + +if.else409: ; preds = %if.else398 + %call410 = call i32 @h() + %tobool411 = icmp ne i32 %call410, 0 + br i1 %tobool411, label %if.then412, label %if.else420 + +if.then412: ; preds = %if.else409 + %add413 = add nsw i32 %e.14, 1 + %cmp414 = icmp sge i32 %add413, %i + br i1 %cmp414, label %if.then415, label %if.end628 + +if.then415: ; preds = %if.then412 + %idxprom417 = sext i32 %add413 to i64 + br label %if.end628 + +if.else420: ; preds = %if.else409 + %call421 = call i32 @h() + %tobool422 = icmp ne i32 %call421, 0 + br i1 %tobool422, label %if.then423, label %if.else431 + +if.then423: ; preds = %if.else420 + %add424 = add nsw i32 %e.14, 3 + %cmp425 = icmp sge i32 %add424, %i + br i1 %cmp425, label %if.then426, label %if.end628 + +if.then426: ; preds = %if.then423 + %idxprom428 = sext i32 %add424 to i64 + br label %if.end628 + +if.else431: ; preds = %if.else420 + %call432 = call i32 @h() + %tobool433 = icmp ne i32 %call432, 0 + br i1 %tobool433, label %if.then434, label %if.else440 + +if.then434: ; preds = %if.else431 + %add435 = add nsw i32 %e.14, 1 + %cmp436 = icmp sge i32 %add435, %i + br i1 %cmp436, label %if.then437, label %if.end628 + +if.then437: ; preds = %if.then434 + br label %if.end628 + +if.else440: ; preds = %if.else431 + %call441 = call i32 @h() + %tobool442 = icmp ne i32 %call441, 0 + br i1 %tobool442, label %if.then443, label %if.else451 + +if.then443: ; preds = %if.else440 + %tobool444 = icmp ne i32 %e.14, 0 + br i1 %tobool444, label %if.then445, label %if.end628 + +if.then445: ; preds = %if.then443 + %cmp446 = icmp sge i32 %e.14, %i + br i1 %cmp446, label %if.then447, label %if.end628 + +if.then447: ; preds = %if.then445 + br label %if.end628 + +if.else451: ; preds = %if.else440 + %call452 = call i32 @h() + %tobool453 = icmp ne i32 %call452, 0 + br i1 %tobool453, label %if.then454, label %if.else460 + +if.then454: ; preds = %if.else451 + %add455 = add nsw i32 %e.14, 1 + %cmp456 = icmp sge i32 %add455, %i + br i1 %cmp456, label %if.then457, label %if.end628 + +if.then457: ; preds = %if.then454 + br label %if.end628 + +if.else460: ; preds = %if.else451 + %add461 = add nsw i32 %e.14, 2 + %cmp462 = icmp sge i32 %add461, %i + br i1 %cmp462, label %if.then463, label %if.end628 + +if.then463: ; preds = %if.else460 + %idxprom465 = sext i32 %add461 to i64 + br label %if.end628 + +if.else475: ; preds = %if.else324 + %call476 = call i32 @g() + %tobool477 = icmp ne i32 %call476, 0 + br i1 %tobool477, label %if.then478, label %if.else509 + +if.then478: ; preds = %if.else475 + %call479 = call i32 @h() + %tobool480 = icmp ne i32 %call479, 0 + br i1 %tobool480, label %if.then481, label %if.else487 + +if.then481: ; preds = %if.then478 + %add482 = add nsw i32 %e.0, 1 + %cmp483 = icmp sge i32 %add482, %i + br i1 %cmp483, label %if.then484, label %if.end628 + +if.then484: ; preds = %if.then481 + br label %if.end628 + +if.else487: ; preds = %if.then478 + %call488 = call i32 @h() + %tobool489 = icmp ne i32 %call488, 0 + br i1 %tobool489, label %if.then490, label %if.else496 + +if.then490: ; preds = %if.else487 + %add491 = add nsw i32 %e.0, 1 + %cmp492 = icmp sge i32 %add491, %i + br i1 %cmp492, label %if.then493, label %if.end628 + +if.then493: ; preds = %if.then490 + br label %if.end628 + +if.else496: ; preds = %if.else487 + %add497 = add nsw i32 %e.0, 1 + %cmp498 = icmp sge i32 %add497, %i + br i1 %cmp498, label %if.then499, label %if.else501 + +if.then499: ; preds = %if.else496 + br label %if.end628 + +if.else501: ; preds = %if.else496 + br i1 undef, label %if.then503, label %if.end628 + +if.then503: ; preds = %if.else501 + br label %if.end628 + +if.else509: ; preds = %if.else475 + %call510 = call i32 @g() + %tobool511 = icmp ne i32 %call510, 0 + br i1 %tobool511, label %if.then512, label %if.else565 + +if.then512: ; preds = %if.else509 + %add513 = add nsw i32 %e.0, 1 + %cmp514 = icmp sge i32 %add513, %i + br i1 %cmp514, label %if.then515, label %if.end519 + +if.then515: ; preds = %if.then512 + %idxprom517 = sext i32 %add513 to i64 + br label %if.end519 + +if.end519: ; preds = %if.then515, %if.then512 + br i1 %cmp514, label %if.then522, label %if.end526 + +if.then522: ; preds = %if.end519 + %idxprom524 = sext i32 %add513 to i64 + br label %if.end526 + +if.end526: ; preds = %if.then522, %if.end519 + br i1 %cmp514, label %if.then529, label %if.end533 + +if.then529: ; preds = %if.end526 + %idxprom531 = sext i32 %add513 to i64 + br label %if.end533 + +if.end533: ; preds = %if.then529, %if.end526 + %add534 = add nsw i32 %e.0, 2 + %cmp535 = icmp sge i32 %add534, %i + br i1 %cmp535, label %if.then536, label %if.end540 + +if.then536: ; preds = %if.end533 + %idxprom538 = sext i32 %add534 to i64 + br label %if.end540 + +if.end540: ; preds = %if.then536, %if.end533 + br i1 %cmp535, label %if.then543, label %if.end547 + +if.then543: ; preds = %if.end540 + %idxprom545 = sext i32 %add534 to i64 + unreachable + +if.end547: ; preds = %if.end540 + br i1 %cmp514, label %if.then550, label %if.else554 + +if.then550: ; preds = %if.end547 + %idxprom552 = sext i32 %add513 to i64 + br label %if.end559 + +if.else554: ; preds = %if.end547 + %idxprom557 = sext i32 %add513 to i64 + br label %if.end559 + +if.end559: ; preds = %if.else554, %if.then550 + br i1 %cmp514, label %if.then562, label %if.end628 + +if.then562: ; preds = %if.end559 + br label %if.end628 + +if.else565: ; preds = %if.else509 + %call566 = call i32 @g() + %tobool567 = icmp ne i32 %call566, 0 + br i1 %tobool567, label %if.then568, label %if.else590 + +if.then568: ; preds = %if.else565 + %add569 = add nsw i32 %e.0, 2 + %cmp570 = icmp sge i32 %add569, %i + br i1 %cmp570, label %if.then571, label %if.else575 + +if.then571: ; preds = %if.then568 + %idxprom573 = sext i32 %add569 to i64 + br label %if.end582 + +if.else575: ; preds = %if.then568 + %idxprom579 = sext i32 %add569 to i64 + br label %if.end582 + +if.end582: ; preds = %if.else575, %if.then571 + %add583 = add nsw i32 %e.0, 1 + %cmp584 = icmp sge i32 %add583, %i + br i1 %cmp584, label %if.then585, label %if.end628 + +if.then585: ; preds = %if.end582 + %idxprom587 = sext i32 %add583 to i64 + br label %if.end628 + +if.else590: ; preds = %if.else565 + %call591 = call i32 @g() + %tobool592 = icmp ne i32 %call591, 0 + br i1 %tobool592, label %if.then593, label %if.end628 + +if.then593: ; preds = %if.else590 + %add594 = add nsw i32 %e.0, 1 + %cmp595 = icmp sge i32 %add594, %i + br i1 %cmp595, label %if.then596, label %if.else600 + +if.then596: ; preds = %if.then593 + %idxprom598 = sext i32 %add594 to i64 + br label %if.end628 + +if.else600: ; preds = %if.then593 + br i1 undef, label %if.then602, label %if.end628 + +if.then602: ; preds = %if.else600 + %idxprom604 = sext i32 %add594 to i64 + br label %if.end628 + +if.end628: ; preds = %if.then602, %if.else600, %if.then596, %if.else590, %if.then585, %if.end582, %if.then562, %if.end559, %if.then503, %if.else501, %if.then499, %if.then493, %if.then490, %if.then484, %if.then481, %if.then463, %if.else460, %if.then457, %if.then454, %if.then447, %if.then445, %if.then443, %if.then437, %if.then434, %if.then426, %if.then423, %if.then415, %if.then412, %if.then404, %if.then401, %if.then393, %if.end390, %if.then318, %if.else316, %if.then312, %if.then301, %if.else298, %if.then295, %if.then292, %if.then284, %if.then281, %if.then274, %if.then271, %if.then263, %if.then260, %if.end254, %if.then240, %if.end237, %if.then222, %if.then219, %if.end215, %if.then200, %if.then197, %if.then190, %if.end187, %if.end160, %if.else123, %if.then120, %if.end118, %if.then71, %if.else68, %if.then65, %if.end62, %if.then53, %for.cond + %e.15 = phi i32 [ %e.5, %if.then53 ], [ %add63, %if.then65 ], [ %e.6, %if.end62 ], [ %e.5, %if.then71 ], [ %e.5, %if.else68 ], [ %e.9, %if.then120 ], [ %e.9, %if.end118 ], [ %e.10, %if.end160 ], [ %e.0, %if.then190 ], [ %e.0, %if.end187 ], [ %e.0, %if.then200 ], [ %e.0, %if.then197 ], [ %e.0, %if.end215 ], [ %e.0, %if.then222 ], [ %e.0, %if.then219 ], [ %e.0, %if.then240 ], [ %e.0, %if.end237 ], [ %e.0, %if.end254 ], [ %e.0, %if.then263 ], [ %e.0, %if.then260 ], [ %e.0, %if.then274 ], [ %e.0, %if.then271 ], [ %add282, %if.then284 ], [ %e.0, %if.then281 ], [ %add293, %if.then295 ], [ %e.0, %if.then292 ], [ %e.0, %if.then301 ], [ %e.0, %if.else298 ], [ %e.0, %if.then312 ], [ %e.0, %if.then318 ], [ %e.0, %if.else316 ], [ %e.14, %if.then393 ], [ %e.14, %if.end390 ], [ %e.14, %if.then404 ], [ %e.14, %if.then401 ], [ %e.14, %if.then415 ], [ %e.14, %if.then412 ], [ %e.14, %if.then426 ], [ %e.14, %if.then423 ], [ %add435, %if.then437 ], [ %e.14, %if.then434 ], [ %e.14, %if.then447 ], [ %e.14, %if.then445 ], [ %e.14, %if.then443 ], [ %add455, %if.then457 ], [ %e.14, %if.then454 ], [ %e.14, %if.then463 ], [ %e.14, %if.else460 ], [ %add482, %if.then484 ], [ %e.0, %if.then481 ], [ %add491, %if.then493 ], [ %e.0, %if.then490 ], [ %add497, %if.then499 ], [ %add497, %if.then503 ], [ %e.0, %if.else501 ], [ %add513, %if.then562 ], [ %e.0, %if.end559 ], [ %e.0, %if.then585 ], [ %e.0, %if.end582 ], [ %e.0, %if.then596 ], [ %e.0, %if.then602 ], [ %e.0, %if.else600 ], [ %e.0, %if.else590 ], [ %e.0, %if.else123 ], [ %e.0, %for.cond ] + br label %for.cond +} + +declare i32 @g() + +declare i32 @h() + +; Function Attrs: nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) + From 33d312b2d731507327252fd597bac1b738870330 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 11 May 2021 20:35:17 -0700 Subject: [PATCH 244/318] Revert "[Coverage] Fix branch coverage merging in FunctionCoverageSummary::get() for instantiation" This reverts commit b89942c336a4506dabd5e1b2a6f1b5cbaddebe55. There are issues with this patch and also no tracking bug for it. --- llvm/test/tools/llvm-cov/branch-templates.cpp | 16 +--------------- llvm/tools/llvm-cov/CoverageSummaryInfo.cpp | 6 +++++- llvm/tools/llvm-cov/CoverageSummaryInfo.h | 5 ----- 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/llvm/test/tools/llvm-cov/branch-templates.cpp b/llvm/test/tools/llvm-cov/branch-templates.cpp index 4797428f8835..750dc7bd58f2 100644 --- a/llvm/test/tools/llvm-cov/branch-templates.cpp +++ b/llvm/test/tools/llvm-cov/branch-templates.cpp @@ -1,9 +1,9 @@ // RUN: llvm-profdata merge %S/Inputs/branch-templates.proftext -o %t.profdata // RUN: llvm-cov show --show-expansions --show-branches=count %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s // RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -show-functions -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORT -// RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORTFILE #include + template void unused(T x) { return; @@ -45,17 +45,3 @@ int main() { // REPORT-NEXT: _Z4funcIfEiT_ 5 2 60.00% 7 3 57.14% 2 1 50.00% // REPORT-NEXT: --- // REPORT-NEXT: TOTAL 22 7 68.18% 31 11 64.52% 12 6 50.00% - -// Make sure the covered branch tally for the function instantiation group is -// merged to reflect maximum branch coverage of a single instantiation, just -// like what is done for lines and regions. Also, the total branch tally -// summary for an instantiation group should agree with the total number of -// branches in the definition (In this case, 2 and 6 for func<>() and main(), -// respectively). This is returned by: FunctionCoverageSummary::get(const -// InstantiationGroup &Group, ...) - -// REPORTFILE: Filename Regions Missed Regions Cover Functions Missed Functions Executed Lines Missed Lines Cover Branches Missed Branches Cover -// REPORTFILE-NEXT: --- -// REPORTFILE-NEXT: branch-templates.cpp 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% -// REPORTFILE-NEXT: --- -// REPORTFILE-NEXT: TOTAL 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp b/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp index 10e059adeb7d..4a0a86168908 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp @@ -100,7 +100,11 @@ FunctionCoverageSummary::get(const InstantiationGroup &Group, for (const auto &FCS : Summaries.drop_front()) { Summary.RegionCoverage.merge(FCS.RegionCoverage); Summary.LineCoverage.merge(FCS.LineCoverage); - Summary.BranchCoverage.merge(FCS.BranchCoverage); + + // Sum branch coverage across instantiation groups for the summary rather + // than "merge" the maximum count. This is a clearer view into whether all + // created branches are covered. + Summary.BranchCoverage += FCS.BranchCoverage; } return Summary; } diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/llvm/tools/llvm-cov/CoverageSummaryInfo.h index 62e7cad1012b..4bc1c24a079f 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.h +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.h @@ -123,11 +123,6 @@ class BranchCoverageInfo { return *this; } - void merge(const BranchCoverageInfo &RHS) { - Covered = std::max(Covered, RHS.Covered); - NumBranches = std::max(NumBranches, RHS.NumBranches); - } - size_t getCovered() const { return Covered; } size_t getNumBranches() const { return NumBranches; } From 6912082cfd129bbc2bd60f293371e20140d50b86 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 20 Apr 2021 11:23:10 -0700 Subject: [PATCH 245/318] [ELF] Don't set versionId on undefined weak lazy symbols An unfetched lazy symbol (undefined weak) should be considered to have its original versionId which is VER_NDX_GLOBAL, instead of the lazy symbol's versionId. (The original versionId cannot be non-VER_NDX_GLOBAL because a undefined versioned symbol is an error.) The regression was introduced in D77280 when making version scripts work with lazy symbols fetched by LTO calls. Fix PR49915 Differential Revision: https://reviews.llvm.org/D100624 (cherry picked from commit 1c00530b30e21fd0f5b316401f6485bee08ce850) --- lld/ELF/SyntheticSections.cpp | 4 +++- lld/test/ELF/version-script-weak.s | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 9a875bd7ec3e..70c36c63d101 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3110,7 +3110,9 @@ size_t VersionTableSection::getSize() const { void VersionTableSection::writeTo(uint8_t *buf) { buf += 2; for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) { - write16(buf, s.sym->versionId); + // Use the original versionId for an unfetched lazy symbol (undefined weak), + // which must be VER_NDX_GLOBAL (an undefined versioned symbol is an error). + write16(buf, s.sym->isLazy() ? VER_NDX_GLOBAL : s.sym->versionId); buf += 2; } } diff --git a/lld/test/ELF/version-script-weak.s b/lld/test/ELF/version-script-weak.s index 7c902eb98bf4..cfa2455ee2bd 100644 --- a/lld/test/ELF/version-script-weak.s +++ b/lld/test/ELF/version-script-weak.s @@ -24,6 +24,19 @@ # CHECK-NEXT: Section: Undefined # CHECK-NEXT: } +## The version of an unfetched lazy symbol is VER_NDX_GLOBAL. It is not affected by version scripts. +# RUN: echo "v1 { *; };" > %t2.script +# RUN: ld.lld -shared --version-script %t2.script %t.o --start-lib %t1.o --end-lib -o %t2.so +# RUN: llvm-readelf --dyn-syms %t2.so | FileCheck %s --check-prefix=CHECK2 + +# CHECK2: NOTYPE WEAK DEFAULT UND foo{{$}} + +# RUN: ld.lld -shared --soname=tshared --version-script %t2.script %t1.o -o %tshared.so +# RUN: ld.lld -shared --version-script %t2.script %t.o --start-lib %t1.o --end-lib %tshared.so -o %t3.so +# RUN: llvm-readelf --dyn-syms %t3.so | FileCheck %s --check-prefix=CHECK3 + +# CHECK3: NOTYPE WEAK DEFAULT UND foo@v1 + .text callq foo@PLT .weak foo From 0ef78361565a861cac846b7c1f807dc2d278145d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 May 2021 11:24:14 +0100 Subject: [PATCH 246/318] [IndVarSimplify] Add additional tests using isImpliedViaMerge. (cherry picked from commit d65e5f60f110046898ad146c508a7d225d398549) --- .../IndVarSimplify/eliminate-exit.ll | 55 ++++++++++++++++ .../promote-iv-to-eliminate-casts.ll | 62 +++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll index ddf8ada68e95..eec7908b6a8b 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll @@ -436,3 +436,58 @@ exit: } declare void @side_effect() + +; The exit condition %outer.cond.1 depends on a phi in %inner. Make sure we do +; not incorrectly determine %x.lcssa <= -1. +define i32 @exit_cond_depends_on_inner_loop() { +; CHECK-LABEL: @exit_cond_depends_on_inner_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[IV_OUTER:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_OUTER_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ -1, [[OUTER_HEADER]] ], [ [[CALL:%.*]], [[INNER]] ] +; CHECK-NEXT: [[CALL]] = call i32 @match() +; CHECK-NEXT: [[INNER_COND:%.*]] = icmp sgt i32 [[CALL]], -1 +; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER]], label [[OUTER_EXITING_1:%.*]] +; CHECK: outer.exiting.1: +; CHECK-NEXT: [[X_LCSSA:%.*]] = phi i32 [ [[X]], [[INNER]] ] +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[OUTER_LATCH]] +; CHECK: outer.latch: +; CHECK-NEXT: [[IV_OUTER_NEXT]] = add nuw nsw i32 [[IV_OUTER]], 1 +; CHECK-NEXT: [[OUTER_COND_2:%.*]] = icmp ult i32 [[IV_OUTER]], 100 +; CHECK-NEXT: br i1 [[OUTER_COND_2]], label [[OUTER_HEADER]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[X_RES:%.*]] = phi i32 [ [[X_LCSSA]], [[OUTER_EXITING_1]] ], [ -1, [[OUTER_LATCH]] ] +; CHECK-NEXT: ret i32 [[X_RES]] +; +entry: + br label %outer.header + +outer.header: + %iv.outer = phi i32 [ 0, %entry ], [ %iv.outer.next , %outer.latch ] + br label %inner + +inner: + %x = phi i32 [ -1, %outer.header ], [ %call, %inner ] + %call = call i32 @match() + %inner.cond = icmp sgt i32 %call, -1 + br i1 %inner.cond, label %inner, label %outer.exiting.1 + +outer.exiting.1: + %x.lcssa = phi i32 [ %x, %inner ] + %outer.cond.1 = icmp sgt i32 %x.lcssa, -1 + br i1 %outer.cond.1, label %exit, label %outer.latch + +outer.latch: + %iv.outer.next = add nuw nsw i32 %iv.outer, 1 + %outer.cond.2 = icmp ult i32 %iv.outer, 100 + br i1 %outer.cond.2, label %outer.header, label %exit + +exit: + %x.res = phi i32 [ %x.lcssa, %outer.exiting.1 ], [ -1, %outer.latch ] + ret i32 %x.res +} + +declare i32 @match() diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll index 5cc288c58e68..2ecea576c380 100644 --- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll +++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll @@ -373,4 +373,66 @@ loop: br i1 %loopcond, label %loopexit, label %loop } +define void @promote_latch_condition_decrementing_loop_05(i32* %p, i32* %a, i1 %cond) { +; CHECK-LABEL: @promote_latch_condition_decrementing_loop_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: [[LEN_MINUS_1:%.*]] = add nsw i32 [[LEN]], -1 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[IV_START:%.*]] = phi i32 [ [[LEN]], [[IF_TRUE]] ], [ [[LEN_MINUS_1]], [[IF_FALSE]] ] +; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0 +; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[IV_START]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loopexit.loopexit: +; CHECK-NEXT: br label [[LOOPEXIT]] +; CHECK: loopexit: +; CHECK-NEXT: ret void +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ] +; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] +; + +entry: + %len = load i32, i32* %p, align 4, !range !0 + br i1 %cond, label %if.true, label %if.false + +if.true: + br label %merge + +if.false: + %len.minus.1 = add nsw i32 %len, -1 + br label %merge + +merge: + %iv_start = phi i32 [ %len, %if.true ], [%len.minus.1, %if.false ] + %zero_check = icmp eq i32 %len, 0 + br i1 %zero_check, label %loopexit, label %preheader + +preheader: + br label %loop + +loopexit: + ret void + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ %iv_start, %preheader ] + %iv.wide = zext i32 %iv to i64 + %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide + store atomic i32 0, i32* %el unordered, align 4 + %iv.next = add nsw i32 %iv, -1 + %loopcond = icmp slt i32 %iv, 1 + br i1 %loopcond, label %loopexit, label %loop +} + !0 = !{i32 0, i32 2147483647} From 4e46ff469405bc73ec25fcf78126fb5fbd7a18a1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 7 May 2021 19:39:05 +0100 Subject: [PATCH 247/318] [SCEV] By more careful when traversing phis in isImpliedViaMerge. I think currently isImpliedViaMerge can incorrectly return true for phis in a loop/cycle, if the found condition involves the previous value of Consider the case in exit_cond_depends_on_inner_loop. At some point, we call (modulo simplifications) isImpliedViaMerge(<=, %x.lcssa, -1, %call, -1). The existing code tries to prove IncV <= -1 for all incoming values InvV using the found condition (%call <= -1). At the moment this succeeds, but only because it does not compare the same runtime value. The found condition checks the value of the last iteration, but the incoming value is from the *previous* iteration. Hence we incorrectly determine that the *previous* value was <= -1, which may not be true. I think we need to be more careful when looking at the incoming values here. In particular, we need to rule out that a found condition refers to any value that may refer to one of the previous iterations. I'm not sure there's a reliable way to do so (that also works of irreducible control flow). So for now this patch adds an additional requirement that the incoming value must properly dominate the phi block. This should ensure the values do not change in a cycle. I am not entirely sure if will catch all cases and I appreciate a through second look in that regard. Alternatively we could also unconditionally bail out in this case, instead of checking the incoming values Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D101829 (cherry picked from commit 6c99e631201aaea0a75708749cbaf2ba08a493f9) --- llvm/lib/Analysis/ScalarEvolution.cpp | 4 ++++ llvm/test/Transforms/IRCE/decrementing-loop.ll | 11 ++++++----- llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index fe9d8297d679..1a9ae68573e9 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10622,6 +10622,10 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, if (!dominates(RHS, IncBB)) return false; const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + // Make sure L does not refer to a value from a potentially previous + // iteration of a loop. + if (!properlyDominates(L, IncBB)) + return false; if (!ProvedEasily(L, RHS)) return false; } diff --git a/llvm/test/Transforms/IRCE/decrementing-loop.ll b/llvm/test/Transforms/IRCE/decrementing-loop.ll index a824522cf206..d809fb4f7d97 100644 --- a/llvm/test/Transforms/IRCE/decrementing-loop.ll +++ b/llvm/test/Transforms/IRCE/decrementing-loop.ll @@ -212,16 +212,17 @@ exit: ret void } +; TODO: we need to be more careful when trying to look through phi nodes in +; cycles, because the condition to prove may reference the previous value of +; the phi. So we currently fail to optimize this case. ; Check that we can figure out that IV is non-negative via implication through ; two Phi nodes, one being AddRec. define void @test_05(i32* %a, i32* %a_len_ptr, i1 %cond) { ; CHECK-LABEL: test_05 -; CHECK: mainloop: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK: br i1 true, label %in.bounds, label %out.of.bounds -; CHECK: loop.preloop: +; CHECK: entry: +; CHECK: br label %merge +; CHECK-NOT: mainloop entry: %len.a = load i32, i32* %a_len_ptr, !range !0 diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll index eec7908b6a8b..e574c2f84ea3 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll @@ -453,7 +453,8 @@ define i32 @exit_cond_depends_on_inner_loop() { ; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER]], label [[OUTER_EXITING_1:%.*]] ; CHECK: outer.exiting.1: ; CHECK-NEXT: [[X_LCSSA:%.*]] = phi i32 [ [[X]], [[INNER]] ] -; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[OUTER_LATCH]] +; CHECK-NEXT: [[OUTER_COND_1:%.*]] = icmp sgt i32 [[X_LCSSA]], -1 +; CHECK-NEXT: br i1 [[OUTER_COND_1]], label [[EXIT:%.*]], label [[OUTER_LATCH]] ; CHECK: outer.latch: ; CHECK-NEXT: [[IV_OUTER_NEXT]] = add nuw nsw i32 [[IV_OUTER]], 1 ; CHECK-NEXT: [[OUTER_COND_2:%.*]] = icmp ult i32 [[IV_OUTER]], 100 From 4eb7b15cb447b339e82bd320adf4a09ca64ab839 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 8 May 2021 17:05:05 +0200 Subject: [PATCH 248/318] [Inliner] Fix noalias metadata handling for instructions simplified during cloning (PR50270) Instead of using VMap, which may include instructions from the caller as a result of simplification, iterate over the (FirstNewBlock, Caller->end()) range, which will only include new instructions. Fixes https://bugs.llvm.org/show_bug.cgi?id=50270. Differential Revision: https://reviews.llvm.org/D102110 (cherry picked from commit aa9b02ac75350a6c7c949dd24d5c6a931be26ff9) --- llvm/lib/Transforms/Utils/InlineFunction.cpp | 106 ++++++++----------- llvm/test/Transforms/Inline/pr50270.ll | 71 +++++++++++++ 2 files changed, 117 insertions(+), 60 deletions(-) create mode 100644 llvm/test/Transforms/Inline/pr50270.ll diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 3026342cc4a6..fb271a2118ba 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -780,7 +780,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, /// When inlining a call site that has !llvm.mem.parallel_loop_access, /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should /// be propagated to all memory-accessing cloned instructions. -static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { +static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, + Function::iterator FEnd) { MDNode *MemParallelLoopAccess = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); @@ -789,41 +790,33 @@ static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) return; - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!VMI->second || !isa(VMI->first)) - continue; - - Instruction *NI = dyn_cast(VMI->second); - if (!NI) - continue; - - // This metadata is only relevant for instructions that access memory. - if (!NI->mayReadOrWriteMemory()) - continue; + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // This metadata is only relevant for instructions that access memory. + if (!I.mayReadOrWriteMemory()) + continue; - if (MemParallelLoopAccess) { - // TODO: This probably should not overwrite MemParalleLoopAccess. - MemParallelLoopAccess = MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access), - MemParallelLoopAccess); - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, + if (MemParallelLoopAccess) { + // TODO: This probably should not overwrite MemParalleLoopAccess. + MemParallelLoopAccess = MDNode::concatenate( + I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), + MemParallelLoopAccess); + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, MemParallelLoopAccess); - } + } - if (AccessGroup) - NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( - NI->getMetadata(LLVMContext::MD_access_group), AccessGroup)); + if (AccessGroup) + I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( + I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); - if (AliasScope) - NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope)); + if (AliasScope) + I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); - if (NoAlias) - NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_noalias), NoAlias)); + if (NoAlias) + I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_noalias), NoAlias)); + } } } @@ -844,9 +837,9 @@ class ScopedAliasMetadataDeepCloner { /// subsequent remap() calls. void clone(); - /// Remap instructions in the given VMap from the original to the cloned + /// Remap instructions in the given range from the original to the cloned /// metadata. - void remap(ValueToValueMapTy &VMap); + void remap(Function::iterator FStart, Function::iterator FEnd); }; ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( @@ -907,34 +900,27 @@ void ScopedAliasMetadataDeepCloner::clone() { } } -void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) { +void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, + Function::iterator FEnd) { if (MDMap.empty()) return; // Nothing to do. - for (auto Entry : VMap) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!Entry->second || !isa(Entry->first)) - continue; - - Instruction *I = dyn_cast(Entry->second); - if (!I) - continue; - - // Only update scopes when we find them in the map. If they are not, it is - // because we already handled that instruction before. This is faster than - // tracking which instructions we already updated. - if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) - if (MDNode *MNew = MDMap.lookup(M)) - I->setMetadata(LLVMContext::MD_alias_scope, MNew); - - if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) - if (MDNode *MNew = MDMap.lookup(M)) - I->setMetadata(LLVMContext::MD_noalias, MNew); - - if (auto *Decl = dyn_cast(I)) - if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) - Decl->setScopeList(MNew); + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // TODO: The null checks for the MDMap.lookup() results should no longer + // be necessary. + if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_alias_scope, MNew); + + if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_noalias, MNew); + + if (auto *Decl = dyn_cast(&I)) + if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) + Decl->setScopeList(MNew); + } } } @@ -1926,7 +1912,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Now clone the inlined noalias scope metadata. SAMetadataCloner.clone(); - SAMetadataCloner.remap(VMap); + SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); @@ -1936,7 +1922,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AddReturnAttributes(CB, VMap); // Propagate metadata on the callsite if necessary. - PropagateCallSiteMetadata(CB, VMap); + PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); // Register any cloned assumptions. if (IFI.GetAssumptionCache) diff --git a/llvm/test/Transforms/Inline/pr50270.ll b/llvm/test/Transforms/Inline/pr50270.ll new file mode 100644 index 000000000000..be7c3379ce87 --- /dev/null +++ b/llvm/test/Transforms/Inline/pr50270.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -inline < %s | FileCheck %s + +; This tests cases where instructions in the callee are simplified to +; instructions in the caller, thus making VMap contain instructions from +; the caller. We should not be assigning incorrect noalias metadata in +; that case. + +declare { i64* } @opaque_callee() + +define { i64* } @callee(i64* %x) { +; CHECK-LABEL: @callee( +; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64* } undef, i64* [[X:%.*]], 0 +; CHECK-NEXT: ret { i64* } [[RES]] +; + %res = insertvalue { i64* } undef, i64* %x, 0 + ret { i64* } %res +} + +; @opaque_callee() should not receive noalias metadata here. +define void @caller() { +; CHECK-LABEL: @caller( +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: [[S:%.*]] = call { i64* } @opaque_callee() +; CHECK-NEXT: [[X:%.*]] = extractvalue { i64* } [[S]], 0 +; CHECK-NEXT: ret void +; + call void @llvm.experimental.noalias.scope.decl(metadata !0) + %s = call { i64* } @opaque_callee() + %x = extractvalue { i64* } %s, 0 + call { i64* } @callee(i64* %x), !noalias !0 + ret void +} + +; @opaque_callee() should no the same noalias metadata as the load from the +; else branch, not as the load in the if branch. +define { i64* } @self_caller(i1 %c, i64* %a) { +; CHECK-LABEL: @self_caller( +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[S:%.*]] = call { i64* } @opaque_callee(), !noalias !0 +; CHECK-NEXT: [[X:%.*]] = extractvalue { i64* } [[S]], 0 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !3) +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i64, i64* [[X]], align 4, !alias.scope !3 +; CHECK-NEXT: ret { i64* } [[S]] +; CHECK: else: +; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64* } undef, i64* [[A:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = load volatile i64, i64* [[A]], align 4, !alias.scope !0 +; CHECK-NEXT: ret { i64* } [[R2]] +; + call void @llvm.experimental.noalias.scope.decl(metadata !0) + br i1 %c, label %if, label %else + +if: + %s = call { i64* } @opaque_callee(), !noalias !0 + %x = extractvalue { i64* } %s, 0 + %r = call { i64* } @self_caller(i1 false, i64* %x) + ret { i64* } %r + +else: + %r2 = insertvalue { i64* } undef, i64* %a, 0 + load volatile i64, i64* %a, !alias.scope !0 + ret { i64* } %r2 +} + +declare void @llvm.experimental.noalias.scope.decl(metadata) + +!0 = !{!1} +!1 = !{!1, !2, !"scope"} +!2 = !{!2, !"domain"} From 877a07bfb3b9a0bbaa3b63a222448b11d85821f0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 26 Mar 2021 17:29:36 -0400 Subject: [PATCH 249/318] GlobalISel: Restrict narrow scalar for fptoui/fptosi results This practically only works for the f16 case AMDGPU uses, not wider types. Fixes bug 49710 by failing legalization. (cherry picked from commit 83a25a101051b404bec1a5ba9cb867705f31262d) --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 44 ++++++++++++------- .../AArch64/GlobalISel/legalize-fptoi.mir | 28 ++++++++++++ 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index c3b494e94ff1..4a982b00125d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -316,6 +316,7 @@ class LegalizerHelper { LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty); + LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3178ee16af2b..66871ca3b926 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1257,22 +1257,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_FPTOUI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); - Observer.changedInstr(MI); - return Legalized; - } - case TargetOpcode::G_FPTOSI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI: + return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); case TargetOpcode::G_FPEXT: if (TypeIdx != 0) return UnableToLegalize; @@ -4496,6 +4483,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; + + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // If all finite floats fit into the narrowed integer type, we can just swap + // out the result type. This is practically only useful for conversions from + // half to at least 16-bits, so just handle the one case. + if (SrcTy.getScalarType() != LLT::scalar(16) || + NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, + IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir index 9bc639679bea..c82cedd08580 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir @@ -265,3 +265,31 @@ body: | %1:_(<4 x s32>) = G_FPTOSI %0 $q0 = COPY %1 ... + +--- +name: test_fptoui_s128_s32 +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: test_fptoui_s128_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s128) = G_FPTOUI [[COPY]](s32) + ; CHECK: $q0 = COPY [[FPTOUI]](s128) + %0:_(s32) = COPY $w0 + %1:_(s128) = G_FPTOUI %0 + $q0 = COPY %1 +... + +--- +name: test_fptosi_s128_s32 +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: test_fptosi_s128_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s128) = G_FPTOSI [[COPY]](s32) + ; CHECK: $q0 = COPY [[FPTOSI]](s128) + %0:_(s32) = COPY $w0 + %1:_(s128) = G_FPTOSI %0 + $q0 = COPY %1 +... From 6336c6eec1a1c12205d102c5555101c100c5ec73 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 27 Mar 2021 10:39:27 -0400 Subject: [PATCH 250/318] AArch64/GlobalISel: Remove IR section from test (cherry picked from commit 2f779e79d50114830c02cdb9e77bd851e13d9fc1) --- .../AArch64/GlobalISel/legalize-fptoi.mir | 29 +------------------ 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir index c82cedd08580..b2ee3a6cc777 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir @@ -1,32 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s - ---- | - target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - target triple = "aarch64--" - - define void @test_fptosi_s32_s32() { ret void } - define void @test_fptoui_s32_s32() { ret void } - define void @test_fptosi_s32_s64() { ret void } - define void @test_fptoui_s32_s64() { ret void } - - define void @test_fptosi_s64_s32() { ret void } - define void @test_fptoui_s64_s32() { ret void } - define void @test_fptosi_s64_s64() { ret void } - define void @test_fptoui_s64_s64() { ret void } - - define void @test_fptosi_s1_s32() { ret void } - define void @test_fptoui_s1_s32() { ret void } - - define void @test_fptosi_s8_s64() { ret void } - define void @test_fptoui_s8_s64() { ret void } - - define void @test_fptosi_s16_s32() { ret void } - define void @test_fptoui_s16_s32() { ret void } - - define void @test_fptoui_v4s32() { ret void } - define void @test_fptosi_v4s32() { ret void } -... +# RUN: llc -mtriple=aarch64-- -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_fptosi_s32_s32 From de579bae6eabd02b815e549776b8c680957a0769 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 14 May 2021 12:36:41 -0700 Subject: [PATCH 251/318] [LowerConstantIntrinsics] reuse isManifestLogic from ConstantFolding GlobalVariables are Constants, yet should not unconditionally be considered true for __builtin_constant_p. Via the LangRef https://llvm.org/docs/LangRef.html#llvm-is-constant-intrinsic: This intrinsic generates no code. If its argument is known to be a manifest compile-time constant value, then the intrinsic will be converted to a constant true value. Otherwise, it will be converted to a constant false value. In particular, note that if the argument is a constant expression which refers to a global (the address of which _is_ a constant, but not manifest during the compile), then the intrinsic evaluates to false. Move isManifestConstant from ConstantFolding to be a method of Constant so that we can reuse the same logic in LowerConstantIntrinsics. pr/41459 Reviewed By: rsmith, george.burgess.iv Differential Revision: https://reviews.llvm.org/D102367 (cherry picked from commit 8c72749bd92d35397e93908bc5a504d4cbcef1cb) --- llvm/include/llvm/IR/Constant.h | 4 ++++ llvm/lib/Analysis/ConstantFolding.cpp | 15 +-------------- llvm/lib/IR/Constants.cpp | 12 ++++++++++++ .../Transforms/Scalar/LowerConstantIntrinsics.cpp | 8 ++++---- .../constant-intrinsics.ll | 8 ++++++++ 5 files changed, 29 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index 0190aca27b72..71692c746015 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -214,6 +214,10 @@ class Constant : public User { /// both must either be scalars or vectors with the same element count. If no /// changes are made, the constant C is returned. static Constant *mergeUndefsWith(Constant *C, Constant *Other); + + /// Return true if a constant is ConstantData or a ConstantAggregate or + /// ConstantExpr that contain only ConstantData. + bool isManifestConstant() const; }; } // end namespace llvm diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index f73890d548f0..cc1ce4c65821 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1808,19 +1808,6 @@ double getValueAsDouble(ConstantFP *Op) { return APF.convertToDouble(); } -static bool isManifestConstant(const Constant *c) { - if (isa(c)) { - return true; - } else if (isa(c) || isa(c)) { - for (const Value *subc : c->operand_values()) { - if (!isManifestConstant(cast(subc))) - return false; - } - return true; - } - return false; -} - static bool getConstIntOrUndef(Value *Op, const APInt *&C) { if (auto *CI = dyn_cast(Op)) { C = &CI->getValue(); @@ -1845,7 +1832,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, // We know we have a "Constant" argument. But we want to only // return true for manifest constants, not those that depend on // constants with unknowable values, e.g. GlobalValue or BlockAddress. - if (isManifestConstant(Operands[0])) + if (Operands[0]->isManifestConstant()) return ConstantInt::getTrue(Ty->getContext()); return nullptr; } diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 6fd205c654a8..9f05917cf7cc 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -803,6 +803,18 @@ Constant *Constant::mergeUndefsWith(Constant *C, Constant *Other) { return C; } +bool Constant::isManifestConstant() const { + if (isa(this)) + return true; + if (isa(this) || isa(this)) { + for (const Value *Op : operand_values()) + if (!cast(Op)->isManifestConstant()) + return false; + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // ConstantInt //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp index bfe8db83b027..bb30c48127a0 100644 --- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp @@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled, "Number of 'objectsize' intrinsic calls handled"); static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) { - Value *Op = II->getOperand(0); - - return isa(Op) ? ConstantInt::getTrue(II->getType()) - : ConstantInt::getFalse(II->getType()); + if (auto *C = dyn_cast(II->getOperand(0))) + if (C->isManifestConstant()) + return ConstantInt::getTrue(II->getType()); + return ConstantInt::getFalse(II->getType()); } static bool replaceConditionalBranchesOnConstant(Instruction *II, diff --git a/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll b/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll index b2c98d2049cd..e19dce1b5543 100644 --- a/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll +++ b/llvm/test/Transforms/LowerConstantIntrinsics/constant-intrinsics.ll @@ -112,3 +112,11 @@ define i1 @test_various_types(i256 %int, float %float, <2 x i64> %vec, {i32, i32 ret i1 %res6 } + +@real_mode_blob_end = external dso_local global [0 x i8], align 1 +define i1 @global_array() { +; CHECK-LABEL: @global_array( +; CHECK-NEXT: ret i1 false + %1 = call i1 @llvm.is.constant.i64(i64 ptrtoint ([0 x i8]* @real_mode_blob_end to i64)) + ret i1 %1 +} From 4973ce53ca8abfc14233a3d8b3045673e0e8543c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 12 May 2021 16:09:37 +0300 Subject: [PATCH 252/318] ~(C + X) --> ~C - X (PR50308) We can not rely on (C+X)-->(X+C) already happening, because we might not have visited that `add` yet. The added testcase would get stuck in an endless combine loop. (cherry-picked from 554b1bced325) --- .../InstCombine/InstCombineAndOrXor.cpp | 17 +++++------ llvm/test/Transforms/InstCombine/not-add.ll | 28 +++++++++++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 68c4156af2c4..85a7abe211b3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3221,11 +3221,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { } } - // ~(X - Y) --> ~X + Y - if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) - if (isa(X) || NotVal->hasOneUse()) - return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); - // ~(~X >>s Y) --> (X >>s Y) if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y)))) return BinaryOperator::CreateAShr(X, Y); @@ -3256,9 +3251,15 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y); } - // ~(X + C) --> -(C + 1) - X - if (match(Op0, m_Add(m_Value(X), m_Constant(C)))) - return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X); + // ~(X + C) --> ~C - X + if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C)))) + return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X); + + // ~(X - Y) --> ~X + Y + // FIXME: is it really beneficial to sink the `not` here? + if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) + if (isa(X) || NotVal->hasOneUse()) + return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); // ~(~X + Y) --> X - Y if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y)))) diff --git a/llvm/test/Transforms/InstCombine/not-add.ll b/llvm/test/Transforms/InstCombine/not-add.ll index 6891fdd5fcc5..d372e7603724 100644 --- a/llvm/test/Transforms/InstCombine/not-add.ll +++ b/llvm/test/Transforms/InstCombine/not-add.ll @@ -137,3 +137,31 @@ define <4 x i32> @vector_test_undef_nsw_nuw(<4 x i32> %x, <4 x i32> %y) { %nota = xor <4 x i32> %a, ret <4 x i32> %nota } + +define i32 @pr50308(i1 %c1, i32 %v1, i32 %v2, i32 %v3) { +; CHECK-LABEL: @pr50308( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: [[ADD_NOT:%.*]] = sub i32 -2, [[V1:%.*]] +; CHECK-NEXT: [[ADD1_NEG:%.*]] = xor i32 [[ADD_NOT]], [[V2:%.*]] +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND_NEG:%.*]] = phi i32 [ [[ADD1_NEG]], [[COND_TRUE]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[COND_NEG]], [[V3:%.*]] +; CHECK-NEXT: ret i32 [[SUB]] +; +entry: + br i1 %c1, label %cond.true, label %cond.end + +cond.true: + %add = add nsw i32 1, %v1 + %xor = xor i32 %add, %v2 + %add1 = add nsw i32 1, %xor + br label %cond.end + +cond.end: + %cond = phi i32 [ %add1, %cond.true ], [ 0, %entry ] + %sub = sub nsw i32 %v3, %cond + ret i32 %sub +} From 328a6ec955327c6d56b6bc3478c723dd3cd468ef Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Tue, 11 May 2021 20:46:58 +0200 Subject: [PATCH 253/318] Force visibility of llvm::Any to external llvm::Any::TypeId::Id relies on the uniqueness of the address of a static variable defined in a template function. hidden visibility implies vague linkage for that variable, which does not guarantee the uniqueness of the address across a binary and a shared library. This totally breaks the implementation of llvm::Any. Ideally, setting visibility to llvm::Any::TypeId::Id should be enough, unfortunately this doesn't work as expected and we lack time (before 12.0.1 release) to understand why setting the visibility to llvm::Any does work. See https://gcc.gnu.org/wiki/Visibility and https://gcc.gnu.org/onlinedocs/gcc/Vague-Linkage.html for more information on that topic. Differential Revision: https://reviews.llvm.org/D101972 (cherry picked from commit 3d3abc22b3ef189813a3b9061c2a90ba86a32f44) --- llvm/include/llvm/ADT/Any.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h index 0aded628cda4..1e3abca70679 100644 --- a/llvm/include/llvm/ADT/Any.h +++ b/llvm/include/llvm/ADT/Any.h @@ -23,7 +23,12 @@ namespace llvm { -class Any { +class LLVM_EXTERNAL_VISIBILITY Any { + + // The `Typeid::Id` static data member below is a globally unique + // identifier for the type `T`. It is explicitly marked with default + // visibility so that when `-fvisibility=hidden` is used, the loader still + // merges duplicate definitions across DSO boundaries. template struct TypeId { static const char Id; }; struct StorageBase { From 77b63ce55e4d8db1ea1ef45d519b9d49d760d7bb Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Tue, 9 Mar 2021 11:55:19 -0500 Subject: [PATCH 254/318] [MemoryDependence] Fix invariant group store Fix bug in MemoryDependence [and thus GVN] for invariant group. Previously MemDep didn't verify that the store was storing into a pointer rather than a store simply using a pointer. Differential Revision: https://reviews.llvm.org/D98267 (cherry picked from commit 875891a10d50a791d3f076c9259e60af6c9af18c) --- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +++- llvm/test/Transforms/GVN/storeinvgroup.ll | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/GVN/storeinvgroup.ll diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 895936d47175..886b5bf4acd3 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -344,7 +344,9 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // If we hit load/store with the same invariant.group metadata (and the // same pointer operand) we can assume that value pointed by pointer // operand didn't change. - if ((isa(U) || isa(U)) && + if ((isa(U) || + (isa(U) && + cast(U)->getPointerOperand() == Ptr)) && U->hasMetadata(LLVMContext::MD_invariant_group)) ClosestDependency = GetClosestDependency(ClosestDependency, U); } diff --git a/llvm/test/Transforms/GVN/storeinvgroup.ll b/llvm/test/Transforms/GVN/storeinvgroup.ll new file mode 100644 index 000000000000..16da4b8cd424 --- /dev/null +++ b/llvm/test/Transforms/GVN/storeinvgroup.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S -o - < %s | FileCheck %s + +define double @code(double* %a1) { +; CHECK-LABEL: @code( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[META:%.*]] = alloca double*, align 8 +; CHECK-NEXT: store double 1.234500e+00, double* [[A1:%.*]], align 8 +; CHECK-NEXT: store double* [[A1]], double** [[META]], align 8, !invariant.group !0 +; CHECK-NEXT: ret double 1.234500e+00 +; +entry: + %meta = alloca double* + store double 1.23450000e+00, double* %a1, align 8 + store double* %a1, double** %meta, align 8, !invariant.group !0 + %iload = load double, double* %a1, align 8, !invariant.group !1 + ret double %iload +} + +!0 = distinct !{} +!1 = distinct !{} From f2ce10d14b7c07b93b7726615aeb40ad04837a88 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Thu, 13 May 2021 05:49:19 -0500 Subject: [PATCH 255/318] [PowerPC] Handle inline assembly clobber of link regsiter This patch adds the handling of clobbers of the link register LR for inline assembly. This patch is to fix: https://bugs.llvm.org/show_bug.cgi?id=50147 Reviewed By: nemanjai, #powerpc Differential Revision: https://reviews.llvm.org/D101657 (cherry picked from commit 15051f0b4a2e0a0af9da7cd5e5cfaabb9f6aaa3d) --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 62 ++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 7 + .../PowerPC/ppc64-inlineasm-clobber.ll | 264 ++++++++++++++++++ 4 files changed, 334 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 929a72ac687e..7833bfc1d1b6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + // Custom lower inline assembly to check for special registers. + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -3461,6 +3465,57 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, return Op.getOperand(0); } +SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo &MFI = *MF.getInfo(); + + assert((Op.getOpcode() == ISD::INLINEASM || + Op.getOpcode() == ISD::INLINEASM_BR) && + "Expecting Inline ASM node."); + + // If an LR store is already known to be required then there is not point in + // checking this ASM as well. + if (MFI.isLRStoreRequired()) + return Op; + + // Inline ASM nodes have an optional last operand that is an incoming Flag of + // type MVT::Glue. We want to ignore this last operand if that is the case. + unsigned NumOps = Op.getNumOperands(); + if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) + --NumOps; + + // Check all operands that may contain the LR. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = cast(Op.getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: + llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Mem: + i += NumVals; + break; + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + Register Reg = cast(Op.getOperand(i))->getReg(); + if (Reg != PPC::LR && Reg != PPC::LR8) + continue; + MFI.setLRStoreRequired(); + return Op; + } + break; + } + } + } + + return Op; +} + SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) @@ -10316,6 +10371,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INLINEASM: + case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); // Variable argument lowering. case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); @@ -15090,6 +15147,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); + } else if (Constraint == "lr") { + if (VT == MVT::i64) + return std::make_pair(0U, &PPC::LR8RCRegClass); + else + return std::make_pair(0U, &PPC::LRRCRegClass); } // If we name a VSX register, we can't defer to the base class because it diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 0dda2c181572..836c52bdff95 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1128,6 +1128,7 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index e03617aa75ff..45d60369018b 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -409,6 +409,13 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> { let isAllocatable = 0; } +def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> { + let isAllocatable = 0; +} +def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> { + let isAllocatable = 0; +} + def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; diff --git a/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll new file mode 100644 index 000000000000..3d66683c4c75 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-unknown -verify-machineinstrs %s \ +; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64LE +; RUN: llc -mtriple=powerpc64-unknown-linux-unknown -verify-machineinstrs %s \ +; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64BE + +define dso_local void @ClobberLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{lr}"() + ret void +} + +define dso_local void @ClobberR5() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberR5: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR5: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{r5}"() + ret void +} + +define dso_local void @ClobberR15() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberR15: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR15: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{r15}"() + ret void +} + +;; Test for INLINEASM_BR +define dso_local signext i32 @ClobberLR_BR(i32 signext %in) #0 { +; PPC64LE-LABEL: ClobberLR_BR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: # %bb.1: # %return +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .Ltmp0: # Block address taken +; PPC64LE-NEXT: .LBB3_2: # %return_early +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: li r3, 0 +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberLR_BR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: nop +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: # %bb.1: # %return +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +; PPC64BE-NEXT: .Ltmp0: # Block address taken +; PPC64BE-NEXT: .LBB3_2: # %return_early +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: li r3, 0 +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +entry: + callbr void asm sideeffect "nop", "X,~{lr}"(i8* blockaddress(@ClobberLR_BR, %return_early)) + to label %return [label %return_early] + +return_early: + br label %return + +return: + %retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ] + ret i32 %retval.0 +} + +define dso_local signext i32 @ClobberR5_BR(i32 signext %in) #0 { +; PPC64LE-LABEL: ClobberR5_BR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: # %bb.1: # %return +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .Ltmp1: # Block address taken +; PPC64LE-NEXT: .LBB4_2: # %return_early +; PPC64LE-NEXT: li r3, 0 +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR5_BR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: nop +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: # %bb.1: # %return +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +; PPC64BE-NEXT: .Ltmp1: # Block address taken +; PPC64BE-NEXT: .LBB4_2: # %return_early +; PPC64BE-NEXT: li r3, 0 +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +entry: + callbr void asm sideeffect "nop", "X,~{r5}"(i8* blockaddress(@ClobberR5_BR, %return_early)) + to label %return [label %return_early] + +return_early: + br label %return + +return: + %retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ] + ret i32 %retval.0 +} + + + +define dso_local void @DefLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: DefLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: DefLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call i64 asm sideeffect "", "={lr}"() + ret void +} + +define dso_local void @EarlyClobberLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: EarlyClobberLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: EarlyClobberLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call i64 asm sideeffect "", "=&{lr}"() + ret void +} + +define dso_local void @ClobberMulti() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberMulti: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: std r16, -128(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -176(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 176 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: ld r16, -128(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberMulti: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -192(r1) +; PPC64BE-NEXT: std r15, 56(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: std r16, 64(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: ld r16, 64(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: ld r15, 56(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: addi r1, r1, 192 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{lr},~{r15},~{r16}"() + ret void +} + +attributes #0 = { nounwind } From e673593742e7dc1f4141d0b6391b995d5152fa6c Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Sat, 15 May 2021 11:29:56 +0100 Subject: [PATCH 256/318] [clang-format] PR50326 AlignAfterOpenBracket AlwaysBreak does not keep to the ColumnLimit https://bugs.llvm.org/show_bug.cgi?id=50326 {D93626} caused a regression in terms of formatting a function ptr, incorrectly thinking it was a C-Style cast. This cased a formatter regression between clang-format-11 and clang-format-12 ``` void bar() { size_t foo = function(Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong); size_t foo = function( Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, BarrrrrrrrrrrrLong, FoooooooooLooooong); size_t foo = (*(function))(Foooo, Barrrrr, Foooo, FoooooooooLooooong); size_t foo = (*( function))(Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, BarrrrrrrrrrrrLong, FoooooooooLooooong); } ``` became ``` void bar() { size_t foo1 = function(Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong); size_t foo2 = function( Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, BarrrrrrrrrrrrLong, FoooooooooLooooong); size_t foo3 = (*(function))(Foooo, Barrrrr, Foooo, FoooooooooLooooong); size_t foo4 = (*( function))(Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, BarrrrrrrrrrrrLong, FoooooooooLooooong); } ``` This fixes this issue by simplify the clause to be specific about what is wanted rather than what is not. Reviewed By: curdeius, HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D102392 (cherry picked from commit eae445f65d077304703e3290ddb4ff28f6d65ff4) --- clang/lib/Format/TokenAnnotator.cpp | 12 ++++++------ clang/unittests/Format/FormatTest.cpp | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 34c291ecc492..82d6cfed308d 100755 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1917,12 +1917,12 @@ class AnnotatingParser { if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) return true; - if (Tok.Next->is(tok::l_paren) && - !(Tok.Previous && Tok.Previous->is(tok::identifier) && - Tok.Previous->Previous && - Tok.Previous->Previous->isOneOf(tok::arrowstar, tok::arrow, - tok::star))) - return true; + // Look for a cast `( x ) (`. + if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) { + if (Tok.Previous->is(tok::identifier) && + Tok.Previous->Previous->is(tok::l_paren)) + return true; + } if (!Tok.Next->Next) return false; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index c1f88b9ae17a..ed26bb8a7150 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -12368,6 +12368,17 @@ TEST_F(FormatTest, ConfigurableSpacesInParentheses) { verifyFormat("size_t idx = (a->*foo)(a - 1);", Spaces); verifyFormat("size_t idx = (a->foo)(a - 1);", Spaces); verifyFormat("size_t idx = (*foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (*(foo))(a - 1);", Spaces); + Spaces.ColumnLimit = 80; + Spaces.IndentWidth = 4; + Spaces.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + verifyFormat("void foo( ) {\n" + " size_t foo = (*(function))(\n" + " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " + "BarrrrrrrrrrrrLong,\n" + " FoooooooooLooooong);\n" + "}", + Spaces); Spaces.SpaceAfterCStyleCast = false; verifyFormat("size_t idx = (size_t)(ptr - ((char *)file));", Spaces); verifyFormat("size_t idx = (size_t)a;", Spaces); @@ -12375,6 +12386,15 @@ TEST_F(FormatTest, ConfigurableSpacesInParentheses) { verifyFormat("size_t idx = (a->*foo)(a - 1);", Spaces); verifyFormat("size_t idx = (a->foo)(a - 1);", Spaces); verifyFormat("size_t idx = (*foo)(a - 1);", Spaces); + verifyFormat("size_t idx = (*(foo))(a - 1);", Spaces); + + verifyFormat("void foo( ) {\n" + " size_t foo = (*(function))(\n" + " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " + "BarrrrrrrrrrrrLong,\n" + " FoooooooooLooooong);\n" + "}", + Spaces); } TEST_F(FormatTest, ConfigurableSpacesInSquareBrackets) { From 6279fd114acba298b9b743755d7031d615c92d23 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Fri, 28 May 2021 00:10:04 +0800 Subject: [PATCH 257/318] [SPE] Disable strict-fp for SPE by default As discussed in PR50385, strict-fp on PowerPC SPE has not been handled well. This patch disables it by default for SPE. Reviewed By: nemanjai, vit9696, jhibbits Differential Revision: https://reviews.llvm.org/D103235 (cherry picked from commit 5c18d1136665f74b15c0df599f56ac3e2e947fb8) --- clang/lib/Basic/Targets/PPC.cpp | 1 + clang/test/CodeGen/builtins-ppc-fpconstrained.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index ff09c0fa2a23..6c3036836c6d 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -57,6 +57,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector &Features, } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; } else if (Feature == "+spe" || Feature == "+efpu2") { + HasStrictFP = false; HasSPE = true; LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); diff --git a/clang/test/CodeGen/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/builtins-ppc-fpconstrained.c index 880c0c339ef3..909210996064 100644 --- a/clang/test/CodeGen/builtins-ppc-fpconstrained.c +++ b/clang/test/CodeGen/builtins-ppc-fpconstrained.c @@ -11,6 +11,9 @@ // RUN: -fallow-half-arguments-and-returns -S -ffp-exception-behavior=strict \ // RUN: -o - %s | FileCheck --check-prefix=CHECK-ASM \ // RUN: --check-prefix=FIXME-CHECK %s +// RUN: %clang_cc1 -triple powerpcspe -S -ffp-exception-behavior=strict \ +// RUN: -target-feature +spe -fexperimental-strict-floating-point -emit-llvm \ +// RUN: %s -o - | FileCheck --check-prefix=CHECK-CONSTRAINED %s typedef __attribute__((vector_size(4 * sizeof(float)))) float vec_float; typedef __attribute__((vector_size(2 * sizeof(double)))) double vec_double; From f1b1151b61b19391b083ced5d4b0b710ada07f6c Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Mon, 31 May 2021 12:36:57 -0400 Subject: [PATCH 258/318] [libc++] [test] Fix a few tests for 32-bit x86 Fixes bug https://llvm.org/PR48939. Differential Revision: https://reviews.llvm.org/D102359 (cherry picked from commit 73cdc7599bf053c3e22ce6bb15a4266f66bd5e69) --- libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp | 10 +++++----- .../libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp | 4 ++++ .../libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp | 5 +++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index fed6a1618483..cf560ce31097 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -427,9 +427,9 @@ void multiset_test() { void vector_test() { std::vector test0 = {true, false}; - ComparePrettyPrintToChars(test0, + ComparePrettyPrintToRegex(test0, "std::vector of " - "length 2, capacity 64 = {1, 0}"); + "length 2, capacity (32|64) = {1, 0}"); for (int i = 0; i < 31; ++i) { test0.push_back(true); test0.push_back(false); @@ -444,9 +444,9 @@ void vector_test() { ComparePrettyPrintToRegex( test0, "std::vector of length 65, " - "capacity 128 = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, " - "1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, " - "1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}"); + "capacity (96|128) = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, " + "0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, " + "0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}"); std::vector test1; ComparePrettyPrintToChars(test1, "std::vector of length 0, capacity 0"); diff --git a/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp b/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp index 73fe18becc3a..5cda71371fa6 100644 --- a/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp +++ b/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp @@ -46,7 +46,11 @@ int main(int, char**) { // // With trivial_abi, local_addr is the address of a local variable in // make_val, and hence different from &ret. +#if !defined(__i386__) + // On X86, structs are never returned in registers. + // Thus, unique_ptr will be passed indirectly even if it is trivial. assert((void*)&ret != local_addr); +#endif return 0; } diff --git a/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp b/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp index e69c94506f2a..9b4e95e249e2 100644 --- a/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp +++ b/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp @@ -49,9 +49,10 @@ int main(int, char**) { // // With trivial_abi, local_addr is the address of a local variable in // make_val, and hence different from &ret. -#ifndef __arm__ +#if !defined(__i386__) && !defined(__arm__) + // On X86, structs are never returned in registers. // On ARM32, structs larger than 4 bytes cannot be returned in registers. - // Thus, weak_ptr will be passed indrectly even if it is trivial. + // Thus, weak_ptr will be passed indirectly even if it is trivial. assert((void*)&ret != local_addr); #endif return 0; From 6a86669a6d99179997feaa015b92423cba64ad96 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 19 May 2021 11:22:25 -0700 Subject: [PATCH 259/318] [WebAssembly] Ignore filters in Emscripten EH landingpads We have been handling filters and landingpads incorrectly all along. We pass clauses' (catches') types to `__cxa_find_matching_catch` in JS glue code, which returns the thrown pointer and sets the selector using `setTempRet0()`. We apparently have been doing the same for filters' (exception specs') types; we pass them to `__cxa_find_matching_catch` just the same way as clauses. And `__cxa_find_matching_catch` treats all given types as clauses. So it is a little surprising; maybe we intended to do something from the JS side and didn't end up doing? So anyway, I don't think supporting exception specs in Emscripten EH is a priority, but this can actually cause incorrect results for normal catches when functions are inlined and the inlined spec type has a parent-child relationship with the catch's type. --- The below is an example of a bug that can happen when inlining and class hierarchy is mixed. If you are busy you can skip this part: ``` struct A {}; struct B : A {}; void bar() throw (B) { throw B(); } void foo() { try { bar(); } catch (A &) { fputs ("Expected result\n", stdout); } } ``` In the unoptimized code, `bar`'s landingpad will have a filter for `B` and `foo`'s landingpad will have a clause for `A`. But when `bar` is inlined into `foo`, `foo`'s landingpad has both a filter for `B` and a clause for `A`, and it passes the both types to `__cxa_find_matching_catch`: ``` __cxa_find_matching_catch(typeinfo for B, typeinfo for A) ``` `__cxa_find_matching_catch` thinks both are clauses, and looks at the first type `B`, which belongs to a filter. And the thrown type is `B`, so it thinks the first type `B` is caught. But this makes it return an incorrect selector, because it is supposed to catch the exception using the second type `A`, which is a parent of `B`. As a result, the `foo` in the example program above does not print "Expected result" but just throws the exception to the caller. (This wouldn't have happened if `A` and `B` are completely disjoint types, such as `float` and `int`) Fixes https://bugs.llvm.org/show_bug.cgi?id=50357. Reviewed By: dschuff, kripken Differential Revision: https://reviews.llvm.org/D102795 (cherry picked from commit 412a3381f721452fb6cd33bc30e7700102639e3f) --- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 13 +++---------- .../test/CodeGen/WebAssembly/lower-em-exceptions.ll | 12 ++++++------ 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index d3bbadf27478..ff6404c30971 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -885,16 +885,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { SmallVector FMCArgs; for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { Constant *Clause = LPI->getClause(I); - // As a temporary workaround for the lack of aggregate varargs support - // in the interface between JS and wasm, break out filter operands into - // their component elements. - if (LPI->isFilter(I)) { - auto *ATy = cast(Clause->getType()); - for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) { - Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter"); - FMCArgs.push_back(EV); - } - } else + // TODO Handle filters (= exception specifications). + // https://bugs.llvm.org/show_bug.cgi?id=50396 + if (LPI->isCatch(I)) FMCArgs.push_back(Clause); } diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll index 88073f3a926f..b2e344e88b33 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-exceptions.ll @@ -68,6 +68,9 @@ catch: ; preds = %catch.dispatch } ; Test invoke instruction with filters (functions with throw(...) declaration) +; Currently we don't support exception specifications correctly in JS glue code, +; so we ignore all filters here. +; See https://bugs.llvm.org/show_bug.cgi?id=50396. define void @filter() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @filter( entry: @@ -91,12 +94,9 @@ lpad: ; preds = %entry %2 = extractvalue { i8*, i32 } %0, 1 br label %filter.dispatch ; CHECK: lpad: -; CHECK-NEXT: %[[FMC:.*]] = call i8* @__cxa_find_matching_catch_4(i8* bitcast (i8** @_ZTIi to i8*), i8* bitcast (i8** @_ZTIc to i8*)) -; CHECK-NEXT: %[[IVI1:.*]] = insertvalue { i8*, i32 } undef, i8* %[[FMC]], 0 -; CHECK-NEXT: %[[TEMPRET0_VAL:.*]] = call i32 @getTempRet0() -; CHECK-NEXT: %[[IVI2:.*]] = insertvalue { i8*, i32 } %[[IVI1]], i32 %[[TEMPRET0_VAL]], 1 -; CHECK-NEXT: extractvalue { i8*, i32 } %[[IVI2]], 0 -; CHECK-NEXT: extractvalue { i8*, i32 } %[[IVI2]], 1 +; We now temporarily ignore filters because of the bug, so we pass nothing to +; __cxa_find_matching_catch +; CHECK-NEXT: %[[FMC:.*]] = call i8* @__cxa_find_matching_catch_2() filter.dispatch: ; preds = %lpad %ehspec.fails = icmp slt i32 %2, 0 From 0826268d59c6e1bb3530dffd9dc5f6038774486d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 1 Jun 2021 01:15:49 +0800 Subject: [PATCH 260/318] [PowerPC] Fix x86 vector intrinsics wrapper compilation under C++ Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D103386 (cherry picked from commit c0b3071833a80121a5a7ca9ea54fd59a59806acc) --- clang/lib/Headers/ppc_wrappers/xmmintrin.h | 9 ++++----- clang/test/CodeGen/ppc-xmmintrin.c | 6 +++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h index 0f429fa04081..0e45b96769f8 100644 --- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h @@ -28,7 +28,7 @@ Most SSE scalar float intrinsic operations can be performed more efficiently as C language float scalar operations or optimized to use vector SIMD operations. We recommend this for new applications. */ -#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." +#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef _XMMINTRIN_H_INCLUDED @@ -62,14 +62,13 @@ /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ -typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef vector float __m128 __attribute__((__may_alias__)); /* Unaligned version of the same type. */ -typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, - __aligned__ (1))); +typedef vector float __m128_u __attribute__((__may_alias__, __aligned__(1))); /* Internal data types for implementing the intrinsics. */ -typedef float __v4sf __attribute__ ((__vector_size__ (16))); +typedef vector float __v4sf; /* Create an undefined vector. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/ppc-xmmintrin.c index e9466b32257f..32a5e5a7cdaf 100644 --- a/clang/test/CodeGen/ppc-xmmintrin.c +++ b/clang/test/CodeGen/ppc-xmmintrin.c @@ -3,8 +3,12 @@ // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE +// RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \ +// RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns #include @@ -1426,7 +1430,7 @@ test_mul() { void __attribute__((noinline)) test_prefetch() { - _mm_prefetch(ms, i); + _mm_prefetch(ms, _MM_HINT_NTA); } // CHECK-LABEL: @test_prefetch From cde86632a772b523ba3db7039f75d979f557b57c Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Fri, 5 Feb 2021 10:59:37 -0800 Subject: [PATCH 261/318] Various minor fixes for python 3 Switch StdTuple printer from python 2-style "next" to python 3. Nested iteration changed enough to make the original bitset iteration code a bit trickier than it needs to be, so unnest. The end node of a map iterator is sometimes hard to detect in isolation, don't fail in that case. Differential Revision: https://reviews.llvm.org/D96167 (cherry picked from commit a34b8b879e345397880c1f9f8de4c294dd0b370c) --- .../libcxx/gdb/gdb_pretty_printer_test.sh.cpp | 5 ++-- libcxx/utils/gdb/libcxx/printers.py | 23 +++++++------------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index cf560ce31097..d8a02a384563 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -489,8 +489,9 @@ void map_iterator_test() { auto not_found = one_two_three.find(7); MarkAsLive(not_found); - CompareExpressionPrettyPrintToRegex("not_found", - R"(std::__map_iterator = {\[0x[a-f0-9]+\] = end\(\)})"); + // Because the end_node is not easily detected, just be sure it doesn't crash. + CompareExpressionPrettyPrintToRegex( + "not_found", R"(std::__map_iterator ( = {\[0x[a-f0-9]+\] = .*}|))"); } void unordered_set_test() { diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py index 0ee446f46c51..9b413a86b159 100644 --- a/libcxx/utils/gdb/libcxx/printers.py +++ b/libcxx/utils/gdb/libcxx/printers.py @@ -13,6 +13,7 @@ from __future__ import print_function +import math import re import gdb @@ -141,7 +142,7 @@ def __iter__(self): def __next__(self): # child_iter raises StopIteration when appropriate. - field_name = self.child_iter.next() + field_name = next(self.child_iter) child = self.val["__base_"][field_name]["__value_"] self.count += 1 return ("[%d]" % self.count, child) @@ -425,6 +426,7 @@ def __init__(self, val): self.val = val self.n_words = int(self.val["__n_words"]) self.bits_per_word = int(self.val["__bits_per_word"]) + self.bit_count = self.val.type.template_argument(0) if self.n_words == 1: self.values = [int(self.val["__first_"])] else: @@ -435,21 +437,12 @@ def to_string(self): typename = _prettify_typename(self.val.type) return "%s" % typename - def _byte_it(self, value): - index = -1 - while value: - index += 1 - will_yield = value % 2 - value /= 2 - if will_yield: - yield index - def _list_it(self): - for word_index in range(self.n_words): - current = self.values[word_index] - if current: - for n in self._byte_it(current): - yield ("[%d]" % (word_index * self.bits_per_word + n), 1) + for bit in range(self.bit_count): + word = math.floor(bit / self.bits_per_word) + word_bit = bit % self.bits_per_word + if self.values[word] & (1 << word_bit): + yield ("[%d]" % bit, 1) def __iter__(self): return self._list_it() From cf3e126e6449255ef183df71f5c54020d7a87ee5 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Mon, 29 Mar 2021 12:10:18 -0700 Subject: [PATCH 262/318] [libcxx] Make the GDB pretty printer test less strict This is a workaround for PR48937. GDB can sometimes print additional warnings which currently fails the test. Use re.search instead of re.match to ignore this additional output. Differential Revision: https://reviews.llvm.org/D99532 (cherry picked from commit 9ac988f6a80aa1dd25594a8b4c86c0380ac99466) --- libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py index af473c48ea9a..19e9a4a793bf 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py @@ -49,7 +49,7 @@ def invoke(self, arg, from_tty): expectation_val = compare_frame.read_var("expectation") check_literal = expectation_val.string(encoding="utf-8") if "PrettyPrintToRegex" in compare_frame.name(): - test_fails = not re.match(check_literal, value) + test_fails = not re.search(check_literal, value) else: test_fails = value != check_literal From bf25180e6727f53b916c73e53212b274fe64ded6 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 15 Apr 2021 18:03:01 -0700 Subject: [PATCH 263/318] Tolerate missing debug info in the shared_ptr pretty printer. Certain fields of shared ptr have virtual functions and therefore have their debug info homed in libc++. But if libc++ wasn't built with debug info, the pretty printer would fail. This patch makes the pretty printer tolerate such conditions and updates the test harness. This patch significantly reworks a previous attempt. This addresses https://bugs.llvm.org/show_bug.cgi?id=48937 Differential Revision: https://reviews.llvm.org/D100610 (cherry picked from commit 55b7061116b5f0f839bd4240c3c6fba63918b816) --- .../libcxx/gdb/gdb_pretty_printer_test.py | 1 - .../libcxx/gdb/gdb_pretty_printer_test.sh.cpp | 10 +++++---- libcxx/utils/gdb/libcxx/printers.py | 21 +++++++++++++------ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py index 19e9a4a793bf..b9d00371418e 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py @@ -45,7 +45,6 @@ def invoke(self, arg, from_tty): # Ignore the convenience variable name and newline value = value_str[value_str.find("= ") + 2:-1] gdb.newest_frame().select() - expectation_val = compare_frame.read_var("expectation") check_literal = expectation_val.string(encoding="utf-8") if "PrettyPrintToRegex" in compare_frame.name(): diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index d8a02a384563..2d8e9620089a 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -608,25 +608,27 @@ void shared_ptr_test() { // due to which there is one more count for the pointer. Hence, all the // following tests are testing with expected count plus 1. std::shared_ptr test0 = std::make_shared(5); + // The python regular expression matcher treats newlines as significant, so + // these regular expressions should be on one line. ComparePrettyPrintToRegex( test0, - R"(std::shared_ptr count 2, weak 0 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [2\?], weak [0\?]( \(libc\+\+ missing debug info\))? containing = {__ptr_ = 0x[a-f0-9]+})"); std::shared_ptr test1(test0); ComparePrettyPrintToRegex( test1, - R"(std::shared_ptr count 3, weak 0 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [3\?], weak [0\?]( \(libc\+\+ missing debug info\))? containing = {__ptr_ = 0x[a-f0-9]+})"); { std::weak_ptr test2 = test1; ComparePrettyPrintToRegex( test0, - R"(std::shared_ptr count 3, weak 1 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [3\?], weak [1\?]( \(libc\+\+ missing debug info\))? containing = {__ptr_ = 0x[a-f0-9]+})"); } ComparePrettyPrintToRegex( test0, - R"(std::shared_ptr count 3, weak 0 containing = {__ptr_ = 0x[a-f0-9]+})"); + R"(std::shared_ptr count [3\?], weak [0\?]( \(libc\+\+ missing debug info\))? containing = {__ptr_ = 0x[a-f0-9]+})"); std::shared_ptr test3; ComparePrettyPrintToChars(test3, "std::shared_ptr is nullptr"); diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py index 9b413a86b159..9d9a96a3e36f 100644 --- a/libcxx/utils/gdb/libcxx/printers.py +++ b/libcxx/utils/gdb/libcxx/printers.py @@ -312,12 +312,21 @@ def to_string(self): return "%s is nullptr" % typename refcount = self.val["__cntrl_"] if refcount != 0: - usecount = refcount["__shared_owners_"] + 1 - weakcount = refcount["__shared_weak_owners_"] - if usecount == 0: - state = "expired, weak %d" % weakcount - else: - state = "count %d, weak %d" % (usecount, weakcount) + try: + usecount = refcount["__shared_owners_"] + 1 + weakcount = refcount["__shared_weak_owners_"] + if usecount == 0: + state = "expired, weak %d" % weakcount + else: + state = "count %d, weak %d" % (usecount, weakcount) + except: + # Debug info for a class with virtual functions is emitted + # in the same place as its key function. That means that + # for std::shared_ptr, __shared_owners_ is emitted into + # into libcxx.[so|a] itself, rather than into the shared_ptr + # instantiation point. So if libcxx.so was built without + # debug info, these fields will be missing. + state = "count ?, weak ? (libc++ missing debug info)" return "%s<%s> %s containing" % (typename, pointee_type, state) def __iter__(self): From 1539c543dbe5edb9d6809aa8ad60b32dba5888d7 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Thu, 15 Apr 2021 18:03:01 -0700 Subject: [PATCH 264/318] Don't fail the shared_ptr test if libc++ has insufficient debug info. Don't fail the shared_ptr test if libc++ has insufficient debug info. This addresses https://bugs.llvm.org/show_bug.cgi?id=48937 Differential Revision: https://reviews.llvm.org/D100610 (cherry picked from commit 0e8378032597bcaccb948de88e965ad75bfaeb7b) --- .../test/libcxx/gdb/gdb_pretty_printer_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py index b9d00371418e..4621e6388343 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.py @@ -37,6 +37,23 @@ def invoke(self, arg, from_tty): compare_frame = gdb.newest_frame().older() testcase_frame = compare_frame.older() test_loc = testcase_frame.find_sal() + + expectation_val = compare_frame.read_var("expectation") + check_literal = expectation_val.string(encoding="utf-8") + + # Heuristic to determine if libc++ itself has debug + # info. If it doesn't, then anything normally homed there + # won't be found, and the printer will error. We don't + # want to fail the test in this case--the printer itself + # is probably fine, or at least we can't tell. + if check_literal.startswith("std::shared_ptr"): + shared_ptr = compare_frame.read_var("value") + if not "__shared_owners_" in shared_ptr.type.fields(): + print("IGNORED (no debug info in libc++): " + + test_loc.symtab.filename + ":" + + str(test_loc.line)) + return + # Use interactive commands in the correct context to get the pretty # printed version From fec90b2cebc3d1bb7d9562bec5170d697db39a84 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Tue, 4 May 2021 16:58:54 +0200 Subject: [PATCH 265/318] Reland "[MC][ELF] Work around R_MIPS_LO16 relocation handling problem" This fixes PR49821, and avoids "ld.lld: error: test.o:(.rodata.str1.1): offset is outside the section" errors when linking MIPS objects with negative R_MIPS_LO16 implicit addends. ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in range of a MergeInputSection. We could introduce a new RelExpr member (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12) but the complexity is unnecessary given that GNU as keeps the original symbol for this case as well. Adds a new test case for PR49821, and also updates two other test cases that are affected by this change. Reviewed By: atanasyan, MaskRay Differential Revision: https://reviews.llvm.org/D101773 (cherry picked from commit 7e83a7f1fdfcc2edde61f0a535f9d7a56f531db9) --- llvm/lib/MC/ELFObjectWriter.cpp | 11 +++++++++++ llvm/test/MC/Mips/elf-relsym.s | 10 ++++++++-- llvm/test/MC/Mips/mips_lo16.s | 22 ++++++++++++++++++++++ llvm/test/MC/Mips/xgot.s | 4 ++-- 4 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 llvm/test/MC/Mips/mips_lo16.s diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 69307b617552..2d810ffd350b 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1397,6 +1397,17 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, if (TargetObjectWriter->getEMachine() == ELF::EM_386 && Type == ELF::R_386_GOTOFF) return true; + + // ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so + // it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an + // R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in + // range of a MergeInputSection. We could introduce a new RelExpr member + // (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12) + // but the complexity is unnecessary given that GNU as keeps the original + // symbol for this case as well. + if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS && + !hasRelocationAddend()) + return true; } // Most TLS relocations use a got, so they need the symbol. Even those that diff --git a/llvm/test/MC/Mips/elf-relsym.s b/llvm/test/MC/Mips/elf-relsym.s index d19b4e3c4820..b8c2f89e82e6 100644 --- a/llvm/test/MC/Mips/elf-relsym.s +++ b/llvm/test/MC/Mips/elf-relsym.s @@ -4,10 +4,16 @@ // CHECK: Symbols [ // CHECK: Symbol { -// CHECK: Name: .rodata.cst8 +// CHECK: Name: $.str // CHECK: } // CHECK: Symbol { -// CHECK: Name: .rodata.str1.1 +// CHECK: Name: $.str1 +// CHECK: } +// CHECK: Symbol { +// CHECK: Name: $CPI0_0 +// CHECK: } +// CHECK: Symbol { +// CHECK: Name: $CPI0_1 // CHECK: } // CHECK: ] diff --git a/llvm/test/MC/Mips/mips_lo16.s b/llvm/test/MC/Mips/mips_lo16.s new file mode 100644 index 000000000000..a400f9206116 --- /dev/null +++ b/llvm/test/MC/Mips/mips_lo16.s @@ -0,0 +1,22 @@ +# PR49821: Check that R_MIPS_LO16 relocs do not wrap around with large addends. + +# RUN: llvm-mc %s -triple mips-unknown-unknown -filetype=obj | \ +# RUN: llvm-objdump -d -r --no-show-raw-insn - | \ +# RUN: FileCheck -check-prefix=MIPS32 %s + +# RUN: llvm-mc %s -triple mips64-unknown-unknown -filetype=obj | \ +# RUN: llvm-objdump -d -r --no-show-raw-insn - | \ +# RUN: FileCheck -check-prefix=MIPS64 %s + + .text +foo: + lui $2, %hi(bar) +# MIPS32: 00000000: R_MIPS_HI16 bar +# MIPS64: 0000000000000000: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE .rodata.str1.1+0x8000 + addiu $2, $2, %lo(bar) +# MIPS32: 00000004: R_MIPS_LO16 bar +# MIPS64: 0000000000000004: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE .rodata.str1.1+0x8000 + .section .rodata.str1.1,"aMS",@progbits,1 + .zero 0x8000 +bar: + .asciz "hello" diff --git a/llvm/test/MC/Mips/xgot.s b/llvm/test/MC/Mips/xgot.s index 0c29582d681c..100d25e67223 100644 --- a/llvm/test/MC/Mips/xgot.s +++ b/llvm/test/MC/Mips/xgot.s @@ -10,8 +10,8 @@ // CHECK: 0x1C R_MIPS_GOT_LO16 ext_1 // CHECK: 0x24 R_MIPS_CALL_HI16 printf // CHECK: 0x30 R_MIPS_CALL_LO16 printf -// CHECK: 0x2C R_MIPS_GOT16 .rodata.str1.1 -// CHECK: 0x38 R_MIPS_LO16 .rodata.str1.1 +// CHECK: 0x2C R_MIPS_GOT16 $.str +// CHECK: 0x38 R_MIPS_LO16 $.str // CHECK: ] .text From 28730bc82ac00125d1f5894464294583abf786ef Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sat, 1 May 2021 17:13:50 +0200 Subject: [PATCH 266/318] [AArch64] Prevent spilling between ldxr/stxr pairs Apply the same logic used to check if CMPXCHG nodes should be expanded at -O0: the register allocator may end up spilling some register in between the atomic load/store pairs, breaking the atomicity and possibly stalling the execution. Fixes PR48017 Reviewed By: efriedman Differential Revision: https://reviews.llvm.org/D101163 (cherry picked from commit 4751cadcca45984d7671e594ce95aed8fe030bf1) --- .../Target/AArch64/AArch64ISelLowering.cpp | 43 +- llvm/test/CodeGen/AArch64/atomicrmw-O0.ll | 697 ++++++++++++++++++ .../AArch64/expand-atomicrmw-xchg-fp.ll | 4 +- 3 files changed, 726 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/atomicrmw-O0.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1451151f4dc5..c522ee76626d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16335,25 +16335,36 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; - // Nand not supported in LSE. - if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; - // Leave 128 bits to LLSC. - if (Subtarget->hasLSE() && Size < 128) - return AtomicExpansionKind::None; - if (Subtarget->outlineAtomics() && Size < 128) { - // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. - // Don't outline them unless - // (1) high level support approved: - // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf - // (2) low level libgcc and compiler-rt support implemented by: - // min/max outline atomics helpers - if (AI->getOperation() != AtomicRMWInst::Min && - AI->getOperation() != AtomicRMWInst::Max && - AI->getOperation() != AtomicRMWInst::UMin && - AI->getOperation() != AtomicRMWInst::UMax) { + + // Nand is not supported in LSE. + // Leave 128 bits to LLSC or CmpXChg. + if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { + if (Subtarget->hasLSE()) return AtomicExpansionKind::None; + if (Subtarget->outlineAtomics()) { + // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. + // Don't outline them unless + // (1) high level support approved: + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf + // (2) low level libgcc and compiler-rt support implemented by: + // min/max outline atomics helpers + if (AI->getOperation() != AtomicRMWInst::Min && + AI->getOperation() != AtomicRMWInst::Max && + AI->getOperation() != AtomicRMWInst::UMin && + AI->getOperation() != AtomicRMWInst::UMax) { + return AtomicExpansionKind::None; + } } } + + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on the + // stack and close enough to the spill slot, this can lead to a situation + // where the monitor always gets cleared and the atomic operation can never + // succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; } diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll new file mode 100644 index 000000000000..d3a1f144b851 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll @@ -0,0 +1,697 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE +; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE + +; Ensure there's no stack spill in between ldxr/stxr pairs. + +define i8 @test_rmw_add_8(i8* %dst) { +; NOLSE-LABEL: test_rmw_add_8: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrb w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB0_1 +; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB0_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB0_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrb w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxtb +; NOLSE-NEXT: b.ne .LBB0_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2 +; NOLSE-NEXT: stlxrb w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB0_2 +; NOLSE-NEXT: .LBB0_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxtb +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB0_1 +; NOLSE-NEXT: b .LBB0_5 +; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_8: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddalb w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i8* %dst, i8 1 seq_cst + ret i8 %res +} + +define i16 @test_rmw_add_16(i16* %dst) { +; NOLSE-LABEL: test_rmw_add_16: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrh w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB1_1 +; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB1_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB1_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrh w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxth +; NOLSE-NEXT: b.ne .LBB1_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2 +; NOLSE-NEXT: stlxrh w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB1_2 +; NOLSE-NEXT: .LBB1_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxth +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB1_1 +; NOLSE-NEXT: b .LBB1_5 +; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_16: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddalh w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i16* %dst, i16 1 seq_cst + ret i16 %res +} + +define i32 @test_rmw_add_32(i32* %dst) { +; NOLSE-LABEL: test_rmw_add_32: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB2_1 +; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB2_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add w12, w9, #1 // =1 +; NOLSE-NEXT: .LBB2_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB2_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr w8, [x11] +; NOLSE-NEXT: cmp w8, w9 +; NOLSE-NEXT: b.ne .LBB2_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2 +; NOLSE-NEXT: stlxr w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB2_2 +; NOLSE-NEXT: .LBB2_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB2_1 +; NOLSE-NEXT: b .LBB2_5 +; NOLSE-NEXT: .LBB2_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_32: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: ldaddal w8, w0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i32* %dst, i32 1 seq_cst + ret i32 %res +} + +define i64 @test_rmw_add_64(i64* %dst) { +; NOLSE-LABEL: test_rmw_add_64: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0] +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB3_1 +; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB3_2 Depth 2 +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add x12, x9, #1 // =1 +; NOLSE-NEXT: .LBB3_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB3_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr x8, [x11] +; NOLSE-NEXT: cmp x8, x9 +; NOLSE-NEXT: b.ne .LBB3_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2 +; NOLSE-NEXT: stlxr w10, x12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB3_2 +; NOLSE-NEXT: .LBB3_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1 +; NOLSE-NEXT: subs x9, x8, x9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB3_1 +; NOLSE-NEXT: b .LBB3_5 +; NOLSE-NEXT: .LBB3_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_64: +; LSE: // %bb.0: // %entry +; LSE-NEXT: mov w8, #1 +; LSE-NEXT: // kill: def $x8 killed $w8 +; LSE-NEXT: ldaddal x8, x0, [x0] +; LSE-NEXT: ret +entry: + %res = atomicrmw add i64* %dst, i64 1 seq_cst + ret i64 %res +} + +define i128 @test_rmw_add_128(i128* %dst) { +; NOLSE-LABEL: test_rmw_add_128: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #48 // =48 +; NOLSE-NEXT: .cfi_def_cfa_offset 48 +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0, #8] +; NOLSE-NEXT: ldr x9, [x0] +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB4_1 +; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB4_2 Depth 2 +; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: adds x14, x8, #1 // =1 +; NOLSE-NEXT: mov x9, xzr +; NOLSE-NEXT: adcs x15, x11, x9 +; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB4_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB4_2 +; NOLSE-NEXT: .LBB4_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 +; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: cbnz x8, .LBB4_1 +; NOLSE-NEXT: b .LBB4_5 +; NOLSE-NEXT: .LBB4_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #48 // =48 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_add_128: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #80 // =80 +; LSE-NEXT: .cfi_def_cfa_offset 80 +; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0, #8] +; LSE-NEXT: ldr x9, [x0] +; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: b .LBB4_1 +; LSE-NEXT: .LBB4_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: adds x2, x8, #1 // =1 +; LSE-NEXT: mov x11, xzr +; LSE-NEXT: adcs x11, x10, x11 +; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 +; LSE-NEXT: mov x3, x11 +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill +; LSE-NEXT: caspal x0, x1, x2, x3, [x9] +; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill +; LSE-NEXT: mov x9, x1 +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 +; LSE-NEXT: mov x10, x0 +; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 +; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: cbnz x8, .LBB4_1 +; LSE-NEXT: b .LBB4_2 +; LSE-NEXT: .LBB4_2: // %atomicrmw.end +; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #80 // =80 +; LSE-NEXT: ret +entry: + %res = atomicrmw add i128* %dst, i128 1 seq_cst + ret i128 %res +} +define i8 @test_rmw_nand_8(i8* %dst) { +; NOLSE-LABEL: test_rmw_nand_8: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrb w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB5_1 +; NOLSE-NEXT: .LBB5_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB5_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB5_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB5_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrb w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxtb +; NOLSE-NEXT: b.ne .LBB5_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2 +; NOLSE-NEXT: stlxrb w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB5_2 +; NOLSE-NEXT: .LBB5_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxtb +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB5_1 +; NOLSE-NEXT: b .LBB5_5 +; NOLSE-NEXT: .LBB5_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_8: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldrb w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB5_1 +; LSE-NEXT: .LBB5_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casalb w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9, uxtb +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB5_1 +; LSE-NEXT: b .LBB5_2 +; LSE-NEXT: .LBB5_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i8* %dst, i8 1 seq_cst + ret i8 %res +} + +define i16 @test_rmw_nand_16(i16* %dst) { +; NOLSE-LABEL: test_rmw_nand_16: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldrh w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB6_1 +; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB6_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB6_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxrh w8, [x11] +; NOLSE-NEXT: cmp w8, w9, uxth +; NOLSE-NEXT: b.ne .LBB6_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2 +; NOLSE-NEXT: stlxrh w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB6_2 +; NOLSE-NEXT: .LBB6_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9, uxth +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB6_1 +; NOLSE-NEXT: b .LBB6_5 +; NOLSE-NEXT: .LBB6_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_16: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldrh w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB6_1 +; LSE-NEXT: .LBB6_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casalh w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9, uxth +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB6_1 +; LSE-NEXT: b .LBB6_2 +; LSE-NEXT: .LBB6_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i16* %dst, i16 1 seq_cst + ret i16 %res +} + +define i32 @test_rmw_nand_32(i32* %dst) { +; NOLSE-LABEL: test_rmw_nand_32: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr w8, [x0] +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b .LBB7_1 +; NOLSE-NEXT: .LBB7_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB7_2 Depth 2 +; NOLSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mvn w8, w9 +; NOLSE-NEXT: orr w12, w8, #0xfffffffe +; NOLSE-NEXT: .LBB7_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB7_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr w8, [x11] +; NOLSE-NEXT: cmp w8, w9 +; NOLSE-NEXT: b.ne .LBB7_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2 +; NOLSE-NEXT: stlxr w10, w12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB7_2 +; NOLSE-NEXT: .LBB7_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1 +; NOLSE-NEXT: subs w9, w8, w9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB7_1 +; NOLSE-NEXT: b .LBB7_5 +; NOLSE-NEXT: .LBB7_5: // %atomicrmw.end +; NOLSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_32: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldr w8, [x0] +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b .LBB7_1 +; LSE-NEXT: .LBB7_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr w9, [sp, #28] // 4-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mvn w8, w9 +; LSE-NEXT: orr w10, w8, #0xfffffffe +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: casal w8, w10, [x11] +; LSE-NEXT: str w8, [sp, #8] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w8, w9 +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str w8, [sp, #12] // 4-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; LSE-NEXT: b.ne .LBB7_1 +; LSE-NEXT: b .LBB7_2 +; LSE-NEXT: .LBB7_2: // %atomicrmw.end +; LSE-NEXT: ldr w0, [sp, #12] // 4-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i32* %dst, i32 1 seq_cst + ret i32 %res +} + +define i64 @test_rmw_nand_64(i64* %dst) { +; NOLSE-LABEL: test_rmw_nand_64: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #32 // =32 +; NOLSE-NEXT: .cfi_def_cfa_offset 32 +; NOLSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0] +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB8_1 +; NOLSE-NEXT: .LBB8_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB8_2 Depth 2 +; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: mov w8, w9 +; NOLSE-NEXT: mvn w10, w8 +; NOLSE-NEXT: // implicit-def: $x8 +; NOLSE-NEXT: mov w8, w10 +; NOLSE-NEXT: orr x12, x8, #0xfffffffffffffffe +; NOLSE-NEXT: .LBB8_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB8_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr x8, [x11] +; NOLSE-NEXT: cmp x8, x9 +; NOLSE-NEXT: b.ne .LBB8_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2 +; NOLSE-NEXT: stlxr w10, x12, [x11] +; NOLSE-NEXT: cbnz w10, .LBB8_2 +; NOLSE-NEXT: .LBB8_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1 +; NOLSE-NEXT: subs x9, x8, x9 +; NOLSE-NEXT: cset w9, eq +; NOLSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: subs w9, w9, #1 // =1 +; NOLSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: b.ne .LBB8_1 +; NOLSE-NEXT: b .LBB8_5 +; NOLSE-NEXT: .LBB8_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #32 // =32 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_64: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #32 // =32 +; LSE-NEXT: .cfi_def_cfa_offset 32 +; LSE-NEXT: str x0, [sp, #16] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0] +; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: b .LBB8_1 +; LSE-NEXT: .LBB8_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload +; LSE-NEXT: ldr x11, [sp, #16] // 8-byte Folded Reload +; LSE-NEXT: mov w8, w9 +; LSE-NEXT: mvn w10, w8 +; LSE-NEXT: // implicit-def: $x8 +; LSE-NEXT: mov w8, w10 +; LSE-NEXT: orr x10, x8, #0xfffffffffffffffe +; LSE-NEXT: mov x8, x9 +; LSE-NEXT: casal x8, x10, [x11] +; LSE-NEXT: str x8, [sp] // 8-byte Folded Spill +; LSE-NEXT: subs x9, x8, x9 +; LSE-NEXT: cset w9, eq +; LSE-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; LSE-NEXT: subs w9, w9, #1 // =1 +; LSE-NEXT: str x8, [sp, #24] // 8-byte Folded Spill +; LSE-NEXT: b.ne .LBB8_1 +; LSE-NEXT: b .LBB8_2 +; LSE-NEXT: .LBB8_2: // %atomicrmw.end +; LSE-NEXT: ldr x0, [sp, #8] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #32 // =32 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i64* %dst, i64 1 seq_cst + ret i64 %res +} + +define i128 @test_rmw_nand_128(i128* %dst) { +; NOLSE-LABEL: test_rmw_nand_128: +; NOLSE: // %bb.0: // %entry +; NOLSE-NEXT: sub sp, sp, #48 // =48 +; NOLSE-NEXT: .cfi_def_cfa_offset 48 +; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill +; NOLSE-NEXT: ldr x8, [x0, #8] +; NOLSE-NEXT: ldr x9, [x0] +; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: b .LBB9_1 +; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB9_2 Depth 2 +; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload +; NOLSE-NEXT: mov w9, w8 +; NOLSE-NEXT: mvn w10, w9 +; NOLSE-NEXT: // implicit-def: $x9 +; NOLSE-NEXT: mov w9, w10 +; NOLSE-NEXT: orr x14, x9, #0xfffffffffffffffe +; NOLSE-NEXT: mov x15, #-1 +; NOLSE-NEXT: .LBB9_2: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB9_1 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxp x10, x9, [x13] +; NOLSE-NEXT: cmp x10, x8 +; NOLSE-NEXT: cset w12, ne +; NOLSE-NEXT: cmp x9, x11 +; NOLSE-NEXT: cinc w12, w12, ne +; NOLSE-NEXT: cbnz w12, .LBB9_4 +; NOLSE-NEXT: // %bb.3: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2 +; NOLSE-NEXT: stlxp w12, x14, x15, [x13] +; NOLSE-NEXT: cbnz w12, .LBB9_2 +; NOLSE-NEXT: .LBB9_4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1 +; NOLSE-NEXT: eor x11, x9, x11 +; NOLSE-NEXT: eor x8, x10, x8 +; NOLSE-NEXT: orr x8, x8, x11 +; NOLSE-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #16] // 8-byte Folded Spill +; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill +; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; NOLSE-NEXT: cbnz x8, .LBB9_1 +; NOLSE-NEXT: b .LBB9_5 +; NOLSE-NEXT: .LBB9_5: // %atomicrmw.end +; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload +; NOLSE-NEXT: add sp, sp, #48 // =48 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_rmw_nand_128: +; LSE: // %bb.0: // %entry +; LSE-NEXT: sub sp, sp, #80 // =80 +; LSE-NEXT: .cfi_def_cfa_offset 80 +; LSE-NEXT: str x0, [sp, #56] // 8-byte Folded Spill +; LSE-NEXT: ldr x8, [x0, #8] +; LSE-NEXT: ldr x9, [x0] +; LSE-NEXT: str x9, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x8, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: b .LBB9_1 +; LSE-NEXT: .LBB9_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload +; LSE-NEXT: ldr x8, [sp, #64] // 8-byte Folded Reload +; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload +; LSE-NEXT: mov x0, x8 +; LSE-NEXT: mov x1, x10 +; LSE-NEXT: stp x0, x1, [sp, #8] // 16-byte Folded Spill +; LSE-NEXT: mov w11, w8 +; LSE-NEXT: mvn w12, w11 +; LSE-NEXT: // implicit-def: $x11 +; LSE-NEXT: mov w11, w12 +; LSE-NEXT: orr x2, x11, #0xfffffffffffffffe +; LSE-NEXT: mov x11, #-1 +; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3 +; LSE-NEXT: mov x3, x11 +; LSE-NEXT: caspal x0, x1, x2, x3, [x9] +; LSE-NEXT: stp x0, x1, [sp, #24] // 16-byte Folded Spill +; LSE-NEXT: mov x9, x1 +; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill +; LSE-NEXT: eor x11, x9, x10 +; LSE-NEXT: mov x10, x0 +; LSE-NEXT: str x10, [sp, #48] // 8-byte Folded Spill +; LSE-NEXT: eor x8, x10, x8 +; LSE-NEXT: orr x8, x8, x11 +; LSE-NEXT: str x10, [sp, #64] // 8-byte Folded Spill +; LSE-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; LSE-NEXT: cbnz x8, .LBB9_1 +; LSE-NEXT: b .LBB9_2 +; LSE-NEXT: .LBB9_2: // %atomicrmw.end +; LSE-NEXT: ldr x1, [sp, #40] // 8-byte Folded Reload +; LSE-NEXT: ldr x0, [sp, #48] // 8-byte Folded Reload +; LSE-NEXT: add sp, sp, #80 // =80 +; LSE-NEXT: ret +entry: + %res = atomicrmw nand i128* %dst, i128 1 seq_cst + ret i128 %res +} diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll index ece757f215a0..86b4cff46980 100644 --- a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s -; RUN: opt -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS +; RUN: opt -O1 -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s +; RUN: opt -O1 -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS define void @atomic_swap_f16(half* %ptr, half %val) nounwind { ; CHECK-LABEL: @atomic_swap_f16( From 84e8b1cf07b9845ea8e1e07ed5ccc3c5a70d975b Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 15 Apr 2021 12:52:58 +0200 Subject: [PATCH 267/318] [clangd] Only allow remote index to be enabled from user config. Differential Revision: https://reviews.llvm.org/D100542 (cherry picked from commit ecf93a716c9ecf2e38898547df90323e239a623c) --- clang-tools-extra/clangd/ConfigCompile.cpp | 13 +++++++-- clang-tools-extra/clangd/ConfigFragment.h | 3 +++ clang-tools-extra/clangd/ConfigProvider.cpp | 27 +++++++++++-------- clang-tools-extra/clangd/ConfigProvider.h | 6 +++-- clang-tools-extra/clangd/tool/ClangdMain.cpp | 4 +-- .../clangd/unittests/ConfigCompileTests.cpp | 14 ++++++++++ 6 files changed, 50 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index b4f0d6186886..7d5466778a81 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -101,6 +101,7 @@ struct FragmentCompiler { llvm::SourceMgr *SourceMgr; // Normalized Fragment::SourceInfo::Directory. std::string FragmentDirectory; + bool Trusted = false; llvm::Optional compileRegex(const Located &Text, @@ -183,6 +184,7 @@ struct FragmentCompiler { } void compile(Fragment &&F) { + Trusted = F.Source.Trusted; if (!F.Source.Directory.empty()) { FragmentDirectory = llvm::sys::path::convert_to_slash(F.Source.Directory); if (FragmentDirectory.back() != '/') @@ -319,6 +321,13 @@ struct FragmentCompiler { void compile(Fragment::IndexBlock::ExternalBlock &&External, llvm::SMRange BlockRange) { + if (External.Server && !Trusted) { + diag(Error, + "Remote index may not be specified by untrusted configuration. " + "Copy this into user config to use it.", + External.Server->Range); + return; + } #ifndef CLANGD_ENABLE_REMOTE if (External.Server) { elog("Clangd isn't compiled with remote index support, ignoring Server: " @@ -489,8 +498,8 @@ CompiledFragment Fragment::compile(DiagnosticCallback D) && { trace::Span Tracer("ConfigCompile"); SPAN_ATTACH(Tracer, "ConfigFile", ConfigFile); auto Result = std::make_shared(); - vlog("Config fragment: compiling {0}:{1} -> {2}", ConfigFile, LineCol.first, - Result.get()); + vlog("Config fragment: compiling {0}:{1} -> {2} (trusted={3})", ConfigFile, + LineCol.first, Result.get(), Source.Trusted); FragmentCompiler{*Result, D, Source.Manager.get()}.compile(std::move(*this)); // Return as cheaply-copyable wrapper. diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index c36b07f5e8e2..c98ca3a2dd52 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -94,6 +94,9 @@ struct Fragment { /// Absolute path to directory the fragment is associated with. Relative /// paths mentioned in the fragment are resolved against this. std::string Directory; + /// Whether this fragment is allowed to make critical security/privacy + /// decisions. + bool Trusted = false; }; SourceInfo Source; diff --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp index 05b2ba50566d..6dfb00b14fc6 100644 --- a/clang-tools-extra/clangd/ConfigProvider.cpp +++ b/clang-tools-extra/clangd/ConfigProvider.cpp @@ -34,7 +34,7 @@ class FileConfigCache : public FileCache { : FileCache(Path), Directory(Directory) {} void get(const ThreadsafeFS &TFS, DiagnosticCallback DC, - std::chrono::steady_clock::time_point FreshTime, + std::chrono::steady_clock::time_point FreshTime, bool Trusted, std::vector &Out) const { read( TFS, FreshTime, @@ -43,6 +43,7 @@ class FileConfigCache : public FileCache { if (Data) for (auto &Fragment : Fragment::parseYAML(*Data, path(), DC)) { Fragment.Source.Directory = Directory; + Fragment.Source.Trusted = Trusted; CachedValue.push_back(std::move(Fragment).compile(DC)); } }, @@ -52,35 +53,38 @@ class FileConfigCache : public FileCache { std::unique_ptr Provider::fromYAMLFile(llvm::StringRef AbsPath, llvm::StringRef Directory, - const ThreadsafeFS &FS) { + const ThreadsafeFS &FS, + bool Trusted) { class AbsFileProvider : public Provider { mutable FileConfigCache Cache; // threadsafe const ThreadsafeFS &FS; + bool Trusted; std::vector getFragments(const Params &P, DiagnosticCallback DC) const override { std::vector Result; - Cache.get(FS, DC, P.FreshTime, Result); + Cache.get(FS, DC, P.FreshTime, Trusted, Result); return Result; }; public: AbsFileProvider(llvm::StringRef Path, llvm::StringRef Directory, - const ThreadsafeFS &FS) - : Cache(Path, Directory), FS(FS) { + const ThreadsafeFS &FS, bool Trusted) + : Cache(Path, Directory), FS(FS), Trusted(Trusted) { assert(llvm::sys::path::is_absolute(Path)); } }; - return std::make_unique(AbsPath, Directory, FS); + return std::make_unique(AbsPath, Directory, FS, Trusted); } std::unique_ptr Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, - const ThreadsafeFS &FS) { + const ThreadsafeFS &FS, bool Trusted) { class RelFileProvider : public Provider { std::string RelPath; const ThreadsafeFS &FS; + bool Trusted; mutable std::mutex Mu; // Keys are the (posix-style) ancestor directory, not the config within it. @@ -128,18 +132,19 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, // This will take a (per-file) lock for each file that actually exists. std::vector Result; for (FileConfigCache *Cache : Caches) - Cache->get(FS, DC, P.FreshTime, Result); + Cache->get(FS, DC, P.FreshTime, Trusted, Result); return Result; }; public: - RelFileProvider(llvm::StringRef RelPath, const ThreadsafeFS &FS) - : RelPath(RelPath), FS(FS) { + RelFileProvider(llvm::StringRef RelPath, const ThreadsafeFS &FS, + bool Trusted) + : RelPath(RelPath), FS(FS), Trusted(Trusted) { assert(llvm::sys::path::is_relative(RelPath)); } }; - return std::make_unique(RelPath, FS); + return std::make_unique(RelPath, FS, Trusted); } std::unique_ptr diff --git a/clang-tools-extra/clangd/ConfigProvider.h b/clang-tools-extra/clangd/ConfigProvider.h index 25d9450f28a7..428438b67f14 100644 --- a/clang-tools-extra/clangd/ConfigProvider.h +++ b/clang-tools-extra/clangd/ConfigProvider.h @@ -69,7 +69,8 @@ class Provider { /// Directory will be used to resolve relative paths in the fragments. static std::unique_ptr fromYAMLFile(llvm::StringRef AbsPath, llvm::StringRef Directory, - const ThreadsafeFS &); + const ThreadsafeFS &, + bool Trusted = false); // Reads fragments from YAML files found relative to ancestors of Params.Path. // // All fragments that exist are returned, starting from distant ancestors. @@ -78,7 +79,8 @@ class Provider { // // If Params does not specify a path, no fragments are returned. static std::unique_ptr - fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, const ThreadsafeFS &); + fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath, const ThreadsafeFS &, + bool Trusted = false); /// A provider that includes fragments from all the supplied providers. /// Order is preserved; later providers take precedence over earlier ones. diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index fe69079bfe67..99c3d97ce35d 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -831,8 +831,8 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var if (llvm::sys::path::user_config_directory(UserConfig)) { llvm::sys::path::append(UserConfig, "clangd", "config.yaml"); vlog("User config file is {0}", UserConfig); - ProviderStack.push_back( - config::Provider::fromYAMLFile(UserConfig, /*Directory=*/"", TFS)); + ProviderStack.push_back(config::Provider::fromYAMLFile( + UserConfig, /*Directory=*/"", TFS, /*Trusted=*/true)); } else { elog("Couldn't determine user config file, not loading"); } diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index 4961d3474fd9..a3738681bec5 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -318,7 +318,21 @@ TEST_F(ConfigCompileTests, TidyBadChecks) { DiagKind(llvm::SourceMgr::DK_Warning)))); } +TEST_F(ConfigCompileTests, ExternalServerNeedsTrusted) { + Fragment::IndexBlock::ExternalBlock External; + External.Server.emplace("xxx"); + Frag.Index.External = std::move(External); + compileAndApply(); + EXPECT_THAT( + Diags.Diagnostics, + ElementsAre(DiagMessage( + "Remote index may not be specified by untrusted configuration. " + "Copy this into user config to use it."))); + EXPECT_FALSE(Conf.Index.External.hasValue()); +} + TEST_F(ConfigCompileTests, ExternalBlockWarnOnMultipleSource) { + Frag.Source.Trusted = true; Fragment::IndexBlock::ExternalBlock External; External.File.emplace(""); External.Server.emplace(""); From 42326932eca6d49fc59f8f560917be16764e087e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 9 Jun 2021 08:57:38 -0400 Subject: [PATCH 268/318] [x86] add tests for store merging miscompile (PR50623); NFC (cherry picked from commit 2ef81cb297954cdbc2eca2f204a5ecba4ec1ccd8) --- llvm/test/CodeGen/X86/stores-merging.ll | 95 +++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index d92342ab7fa8..d4857a6645af 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -14,7 +14,7 @@ define dso_local void @redundant_stores_merging() { ; CHECK-LABEL: redundant_stores_merging: ; CHECK: # %bb.0: ; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001 -; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) +; CHECK-NEXT: movq %rax, e+4(%rip) ; CHECK-NEXT: retq store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4 store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 @@ -27,8 +27,8 @@ define dso_local void @redundant_stores_merging_reverse() { ; CHECK-LABEL: redundant_stores_merging_reverse: ; CHECK: # %bb.0: ; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 -; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) -; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8 +; CHECK-NEXT: movq %rax, e+4(%rip) +; CHECK-NEXT: movl $456, e+8(%rip) # imm = 0x1C8 ; CHECK-NEXT: retq store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 @@ -46,8 +46,8 @@ define dso_local void @redundant_stores_merging_reverse() { define dso_local void @overlapping_stores_merging() { ; CHECK-LABEL: overlapping_stores_merging: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $1, {{.*}}(%rip) -; CHECK-NEXT: movw $2, b+{{.*}}(%rip) +; CHECK-NEXT: movl $1, b(%rip) +; CHECK-NEXT: movw $2, b+3(%rip) ; CHECK-NEXT: retq store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2 store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1 @@ -612,3 +612,88 @@ define dso_local void @be_i64_to_i32_order(i64 %x, i32* %p0) { store i32 %t0, i32* %p1, align 4 ret void } + +; https://llvm.org/PR50623 +; FIXME: +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: retq + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + ret void +} + +; Change the order of the stores. +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole2(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole2: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: movw %ax, 2(%rsi) +; CHECK-NEXT: movb %dil, (%rsi) +; CHECK-NEXT: retq + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p, align 1 + ret void +} + +; Change offset. +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole3(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole3: +; CHECK: # %bb.0: +; CHECK-NEXT: movb %dil, 1(%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: retq + %p1 = getelementptr inbounds i8, i8* %p, i64 1 + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p1, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %p2, align 1 + ret void +} + +; Change offset. +; FIXME: +; It is a miscompile to merge the stores if we are not +; writing all of the bytes from the source value. + +define void @merge_hole4(i32 %x, i8* %p) { +; CHECK-LABEL: merge_hole4: +; CHECK: # %bb.0: +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: retq + %pcast = bitcast i8* %p to i16* + %p2 = getelementptr inbounds i8, i8* %p, i64 2 + %x3 = trunc i32 %x to i8 + store i8 %x3, i8* %p2, align 1 + %sh = lshr i32 %x, 16 + %x01 = trunc i32 %sh to i16 + store i16 %x01, i16* %pcast, align 1 + ret void +} From b54ccef144d2753a9742a3c0e75bcf377120fc6c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 9 Jun 2021 09:44:58 -0400 Subject: [PATCH 269/318] [SDAG] fix miscompile from merging stores of different sizes As shown in: https://llvm.org/PR50623 ...and the similar tests here, we were not accounting for store merging of different sizes that do not cover the entire range of the wide value to be stored. This is the easy fix: just make sure that all of the original stores are the same size, so when we calculate the wide width, it's a simple N * M check. This still allows all of the motivating optimizations from: D86420 / 54a5dd485c4d D87112 / 7a06b166b1af We could enhance this code to track individual bytes and allow merging multiple sizes. (cherry picked from commit dd763ac79196b3d3bc0370b9dbd35e0c083e52a4) --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 ++++++++++++------- llvm/test/CodeGen/X86/stores-merging.ll | 11 +++++---- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6a6f83827f72..7f2add81e80d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7105,14 +7105,22 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { if (LegalOperations) return SDValue(); - // Collect all the stores in the chain. - SDValue Chain; - SmallVector Stores; - for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) { - // TODO: Allow unordered atomics when wider type is legal (see D66309) - EVT MemVT = Store->getMemoryVT(); - if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || - !Store->isSimple() || Store->isIndexed()) + // We only handle merging simple stores of 1-4 bytes. + // TODO: Allow unordered atomics when wider type is legal (see D66309) + EVT MemVT = N->getMemoryVT(); + if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || + !N->isSimple() || N->isIndexed()) + return SDValue(); + + // Collect all of the stores in the chain. + SDValue Chain = N->getChain(); + SmallVector Stores = {N}; + while (auto *Store = dyn_cast(Chain)) { + // All stores must be the same size to ensure that we are writing all of the + // bytes in the wide value. + // TODO: We could allow multiple sizes by tracking each stored byte. + if (Store->getMemoryVT() != MemVT || !Store->isSimple() || + Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index d4857a6645af..f738710ab8f3 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -614,14 +614,15 @@ define dso_local void @be_i64_to_i32_order(i64 %x, i32* %p0) { } ; https://llvm.org/PR50623 -; FIXME: ; It is a miscompile to merge the stores if we are not ; writing all of the bytes from the source value. define void @merge_hole(i32 %x, i8* %p) { ; CHECK-LABEL: merge_hole: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: movb %dil, (%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, 2(%rsi) ; CHECK-NEXT: retq %pcast = bitcast i8* %p to i16* %p2 = getelementptr inbounds i16, i16* %pcast, i64 1 @@ -678,15 +679,15 @@ define void @merge_hole3(i32 %x, i8* %p) { } ; Change offset. -; FIXME: ; It is a miscompile to merge the stores if we are not ; writing all of the bytes from the source value. define void @merge_hole4(i32 %x, i8* %p) { ; CHECK-LABEL: merge_hole4: ; CHECK: # %bb.0: -; CHECK-NEXT: rorl $16, %edi -; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: movb %dil, 2(%rsi) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, (%rsi) ; CHECK-NEXT: retq %pcast = bitcast i8* %p to i16* %p2 = getelementptr inbounds i8, i8* %p, i64 2 From 319a27b4211f49f6c8808be2b193208b62fb53c0 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 5 Mar 2021 14:29:57 +0100 Subject: [PATCH 270/318] [BPF] Add support for floats and doubles Some BPF programs compiled on s390 fail to load, because s390 arch-specific linux headers contain float and double types. At the moment there is no BTF_KIND for floats and doubles, so the release version of LLVM ends up emitting type id 0 for them, which the in-kernel verifier does not accept. Introduce support for such types to libbpf by representing them using the new BTF_KIND_FLOAT. Reviewed By: yonghong-song Differential Revision: https://reviews.llvm.org/D83289 (cherry picked from commit a7137b238a07d9399d3ae96c0b461571bd5aa8b2) --- llvm/lib/Target/BPF/BTF.def | 1 + llvm/lib/Target/BPF/BTFDebug.cpp | 43 ++++++++++++++++----- llvm/lib/Target/BPF/BTFDebug.h | 9 +++++ llvm/test/CodeGen/BPF/BTF/double.ll | 58 +++++++++++++++++++++++++++++ llvm/test/CodeGen/BPF/BTF/float.ll | 58 +++++++++++++++++++++++++++++ 5 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/BTF/double.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/float.ll diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def index 2d2e9a04aa6d..66cf2c90ead4 100644 --- a/llvm/lib/Target/BPF/BTF.def +++ b/llvm/lib/Target/BPF/BTF.def @@ -30,5 +30,6 @@ HANDLE_BTF_KIND(12, FUNC) HANDLE_BTF_KIND(13, FUNC_PROTO) HANDLE_BTF_KIND(14, VAR) HANDLE_BTF_KIND(15, DATASEC) +HANDLE_BTF_KIND(16, FLOAT) #undef HANDLE_BTF_KIND diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index f9bdffe7cbae..da7ec32703a5 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -371,6 +371,21 @@ void BTFKindDataSec::emitType(MCStreamer &OS) { } } +BTFTypeFloat::BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName) + : Name(TypeName) { + Kind = BTF::BTF_KIND_FLOAT; + BTFType.Info = Kind << 24; + BTFType.Size = roundupToBytes(SizeInBits); +} + +void BTFTypeFloat::completeType(BTFDebug &BDebug) { + if (IsCompleted) + return; + IsCompleted = true; + + BTFType.NameOff = BDebug.addString(Name); +} + uint32_t BTFStringTable::addString(StringRef S) { // Check whether the string already exists. for (auto &OffsetM : OffsetToIdMap) { @@ -409,18 +424,28 @@ uint32_t BTFDebug::addType(std::unique_ptr TypeEntry) { } void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) { - // Only int types are supported in BTF. + // Only int and binary floating point types are supported in BTF. uint32_t Encoding = BTy->getEncoding(); - if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed && - Encoding != dwarf::DW_ATE_signed_char && - Encoding != dwarf::DW_ATE_unsigned && - Encoding != dwarf::DW_ATE_unsigned_char) + std::unique_ptr TypeEntry; + switch (Encoding) { + case dwarf::DW_ATE_boolean: + case dwarf::DW_ATE_signed: + case dwarf::DW_ATE_signed_char: + case dwarf::DW_ATE_unsigned: + case dwarf::DW_ATE_unsigned_char: + // Create a BTF type instance for this DIBasicType and put it into + // DIToIdMap for cross-type reference check. + TypeEntry = std::make_unique( + Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); + break; + case dwarf::DW_ATE_float: + TypeEntry = + std::make_unique(BTy->getSizeInBits(), BTy->getName()); + break; + default: return; + } - // Create a BTF type instance for this DIBasicType and put it into - // DIToIdMap for cross-type reference check. - auto TypeEntry = std::make_unique( - Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); TypeId = addType(std::move(TypeEntry), BTy); } diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index 1bad0d11fee4..fb20ec59574b 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -195,6 +195,15 @@ class BTFKindDataSec : public BTFTypeBase { void emitType(MCStreamer &OS) override; }; +/// Handle binary floating point type. +class BTFTypeFloat : public BTFTypeBase { + StringRef Name; + +public: + BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName); + void completeType(BTFDebug &BDebug) override; +}; + /// String table. class BTFStringTable { /// String table size in bytes. diff --git a/llvm/test/CodeGen/BPF/BTF/double.ll b/llvm/test/CodeGen/BPF/BTF/double.ll new file mode 100644 index 000000000000..655c3710d597 --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/double.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s + +; Source code: +; double a; +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +@a = dso_local local_unnamed_addr global double 0.000000e+00, align 8, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +; CHECK: .section .BTF,"",@progbits +; CHECK-NEXT: .short 60319 # 0xeb9f +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .long 24 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 15 +; [1] double, size=8 bytes (64 bits) +; CHECK-NEXT: .long 1 # BTF_KIND_FLOAT(id = 1) +; CHECK-NEXT: .long 268435456 # 0x10000000 +; CHECK-NEXT: .long 8 +; [2] a, type=double (1), global +; CHECK-NEXT: .long 8 # BTF_KIND_VAR(id = 2) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 1 +; [3] .bss, 1 var, {a, offset=&a, size=8 bytes} +; CHECK-NEXT: .long 10 # BTF_KIND_DATASEC(id = 3) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long a +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 0 # string offset=0 +; CHECK-NEXT: .ascii "double" # string offset=1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 97 # string offset=8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .ascii ".bss" # string offset=10 +; CHECK-NEXT: .byte 0 + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "a", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t.c", directory: "/home/yhs/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 11.0.0 "} diff --git a/llvm/test/CodeGen/BPF/BTF/float.ll b/llvm/test/CodeGen/BPF/BTF/float.ll new file mode 100644 index 000000000000..a061263eed7d --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/float.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s + +; Source code: +; float a; +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +@a = dso_local local_unnamed_addr global float 0.000000e+00, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +; CHECK: .section .BTF,"",@progbits +; CHECK-NEXT: .short 60319 # 0xeb9f +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .long 24 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 52 +; CHECK-NEXT: .long 14 +; [1] float, size=4 bytes (32 bits) +; CHECK-NEXT: .long 1 # BTF_KIND_FLOAT(id = 1) +; CHECK-NEXT: .long 268435456 # 0x10000000 +; CHECK-NEXT: .long 4 +; [2] a, type=float (1), global +; CHECK-NEXT: .long 7 # BTF_KIND_VAR(id = 2) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 1 +; [3] .bss, 1 var, {a, offset=&a, size=4 bytes} +; CHECK-NEXT: .long 9 # BTF_KIND_DATASEC(id = 3) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long a +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 0 # string offset=0 +; CHECK-NEXT: .ascii "float" # string offset=1 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 97 # string offset=7 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .ascii ".bss" # string offset=9 +; CHECK-NEXT: .byte 0 + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "a", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t.c", directory: "/home/yhs/tmp") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{!"clang version 11.0.0 "} From 7f6ceec93541633993b08cc97703c0771c1977c8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 25 Mar 2021 14:09:19 -0700 Subject: [PATCH 271/318] BPF: add extern func to data sections if specified This permits extern function (BTF_KIND_FUNC) be added to BTF_KIND_DATASEC if a section name is specified. For example, -bash-4.4$ cat t.c void foo(int) __attribute__((section(".kernel.funcs"))); int test(void) { foo(5); return 0; } The extern function foo (BTF_KIND_FUNC) will be put into BTF_KIND_DATASEC with name ".kernel.funcs". This will help to differentiate two kinds of external functions, functions in kernel and functions defined in other bpf programs. Differential Revision: https://reviews.llvm.org/D93563 (cherry picked from commit 886f9ff53155075bd5f1e994f17b85d1e1b7470c) --- llvm/lib/Target/BPF/BTFDebug.cpp | 18 +++++++++++++++--- llvm/lib/Target/BPF/BTFDebug.h | 2 +- .../BPF/BTF/extern-var-func-weak-section.ll | 13 ++++++++++--- .../test/CodeGen/BPF/BTF/extern-var-section.ll | 9 ++++++--- .../CodeGen/BPF/BTF/extern-var-weak-section.ll | 9 ++++++--- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index da7ec32703a5..bedf159430dc 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -1224,8 +1224,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { const DataLayout &DL = Global.getParent()->getDataLayout(); uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType()); - DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global), - Size); + DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId, + Asm->getSymbol(&Global), Size); } } @@ -1303,7 +1303,19 @@ void BTFDebug::processFuncPrototypes(const Function *F) { uint8_t Scope = BTF::FUNC_EXTERN; auto FuncTypeEntry = std::make_unique(SP->getName(), ProtoTypeId, Scope); - addType(std::move(FuncTypeEntry)); + uint32_t FuncId = addType(std::move(FuncTypeEntry)); + if (F->hasSection()) { + StringRef SecName = F->getSection(); + + if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) { + DataSecEntries[std::string(SecName)] = + std::make_unique(Asm, std::string(SecName)); + } + + // We really don't know func size, set it to 0. + DataSecEntries[std::string(SecName)]->addDataSecEntry(FuncId, + Asm->getSymbol(F), 0); + } } void BTFDebug::endModule() { diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index fb20ec59574b..76f1901779bb 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -187,7 +187,7 @@ class BTFKindDataSec : public BTFTypeBase { uint32_t getSize() override { return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size(); } - void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) { + void addDataSecEntry(uint32_t Id, const MCSymbol *Sym, uint32_t Size) { Vars.push_back(std::make_tuple(Id, Sym, Size)); } std::string getName() { return Name; } diff --git a/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll b/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll index 23332c9d9aa1..d47a9d6c504a 100644 --- a/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll +++ b/llvm/test/CodeGen/BPF/BTF/extern-var-func-weak-section.ll @@ -23,9 +23,9 @@ declare !dbg !4 extern_weak dso_local i32 @global_func(i8 signext) local_unnamed ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .long 24 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 88 -; CHECK-NEXT: .long 88 -; CHECK-NEXT: .long 72 +; CHECK-NEXT: .long 112 +; CHECK-NEXT: .long 112 +; CHECK-NEXT: .long 76 ; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) ; CHECK-NEXT: .long 218103808 # 0xd000000 ; CHECK-NEXT: .long 2 @@ -48,6 +48,12 @@ declare !dbg !4 extern_weak dso_local i32 @global_func(i8 signext) local_unnamed ; CHECK-NEXT: .long 60 # BTF_KIND_FUNC(id = 6) ; CHECK-NEXT: .long 201326594 # 0xc000002 ; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 72 # BTF_KIND_DATASEC(id = 7) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long global_func +; CHECK-NEXT: .long 0 ; CHECK-NEXT: .byte 0 # string offset=0 ; CHECK-NEXT: .ascii "int" # string offset=1 ; CHECK-NEXT: .byte 0 @@ -61,6 +67,7 @@ declare !dbg !4 extern_weak dso_local i32 @global_func(i8 signext) local_unnamed ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .ascii "global_func" # string offset=60 ; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .ascii "abc" # string offset=72 attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll b/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll index e01da7e209fd..520847449950 100644 --- a/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll +++ b/llvm/test/CodeGen/BPF/BTF/extern-var-section.ll @@ -28,8 +28,8 @@ entry: ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .long 24 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 128 -; CHECK-NEXT: .long 128 +; CHECK-NEXT: .long 140 +; CHECK-NEXT: .long 140 ; CHECK-NEXT: .long 79 ; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) ; CHECK-NEXT: .long 218103808 # 0xd000000 @@ -58,7 +58,10 @@ entry: ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 75 # BTF_KIND_DATASEC(id = 8) -; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 251658242 # 0xf000002 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long global_func ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 7 ; CHECK-NEXT: .long ch diff --git a/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll b/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll index 6e64d9b4e482..bdf7fb49c560 100644 --- a/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll +++ b/llvm/test/CodeGen/BPF/BTF/extern-var-weak-section.ll @@ -28,8 +28,8 @@ declare !dbg !6 extern_weak dso_local i32 @global_func(i8 signext) local_unnamed ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .long 24 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 128 -; CHECK-NEXT: .long 128 +; CHECK-NEXT: .long 140 +; CHECK-NEXT: .long 140 ; CHECK-NEXT: .long 79 ; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) ; CHECK-NEXT: .long 218103808 # 0xd000000 @@ -58,7 +58,10 @@ declare !dbg !6 extern_weak dso_local i32 @global_func(i8 signext) local_unnamed ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 75 # BTF_KIND_DATASEC(id = 8) -; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 251658242 # 0xf000002 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long global_func ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 7 ; CHECK-NEXT: .long ch From 5b149c437194d10877e9e45b3d8cc9252af1944b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 12 Apr 2021 22:45:51 -0700 Subject: [PATCH 272/318] BPF: generate proper BTF for globals with WeakODRLinkage For a global weak symbol defined as below: char g __attribute__((weak)) = 2; LLVM generates an allocated global with WeakAnyLinkage, for which BPF backend generates proper BTF info. For the above example, if a modifier "const" is added like const char g __attribute__((weak)) = 2; LLVM generates an allocated global with WeakODRLinkage, for which BPF backend didn't generate any BTF as it didn't handle WeakODRLinkage. This patch addes support for WeakODRLinkage and proper BTF info can be generated for weak symbol defined with "const" modifier. Differential Revision: https://reviews.llvm.org/D100362 (cherry picked from commit 968292cb93198442138128d850fd54dc7edc0035) --- llvm/lib/Target/BPF/BTFDebug.cpp | 1 + llvm/test/CodeGen/BPF/BTF/weak-global-3.ll | 86 ++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 llvm/test/CodeGen/BPF/BTF/weak-global-3.ll diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index bedf159430dc..9249d679c7bd 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -1196,6 +1196,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { if (Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::ExternalLinkage && Linkage != GlobalValue::WeakAnyLinkage && + Linkage != GlobalValue::WeakODRLinkage && Linkage != GlobalValue::ExternalWeakLinkage) continue; diff --git a/llvm/test/CodeGen/BPF/BTF/weak-global-3.ll b/llvm/test/CodeGen/BPF/BTF/weak-global-3.ll new file mode 100644 index 000000000000..dbd6380a9f1a --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/weak-global-3.ll @@ -0,0 +1,86 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; +; Source code: +; const volatile char g __attribute__((weak)) = 2; +; int test() { +; return g; +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm test.c + +@g = weak_odr dso_local constant i8 2, align 1, !dbg !0 + +; Function Attrs: nofree norecurse nounwind willreturn +define dso_local i32 @test() local_unnamed_addr #0 !dbg !13 { +entry: + %0 = load volatile i8, i8* @g, align 1, !dbg !17, !tbaa !18 + %conv = sext i8 %0 to i32, !dbg !17 + ret i32 %conv, !dbg !21 +} + +; CHECK: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) +; CHECK-NEXT: .long 218103808 # 0xd000000 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 1 # BTF_KIND_INT(id = 2) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 16777248 # 0x1000020 +; CHECK-NEXT: .long 5 # BTF_KIND_FUNC(id = 3) +; CHECK-NEXT: .long 201326593 # 0xc000001 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 0 # BTF_KIND_CONST(id = 4) +; CHECK-NEXT: .long 167772160 # 0xa000000 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 0 # BTF_KIND_VOLATILE(id = 5) +; CHECK-NEXT: .long 150994944 # 0x9000000 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long 47 # BTF_KIND_INT(id = 6) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 16777224 # 0x1000008 +; CHECK-NEXT: .long 52 # BTF_KIND_VAR(id = 7) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 54 # BTF_KIND_DATASEC(id = 8) +; CHECK-NEXT: .long 251658241 # 0xf000001 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long g +; CHECK-NEXT: .long 1 + +; CHECK: .ascii "int" # string offset=1 +; CHECK: .ascii "test" # string offset=5 +; CHECK: .ascii "char" # string offset=47 +; CHECK: .byte 103 # string offset=52 +; CHECK: .ascii ".rodata" # string offset=54 + +attributes #0 = { nofree norecurse nounwind willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!9, !10, !11} +!llvm.ident = !{!12} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9cc417cbca1cece0d55fa3d1e15682943a06139e)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/btf/tests") +!4 = !{} +!5 = !{!0} +!6 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !7) +!7 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !8) +!8 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!9 = !{i32 7, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"wchar_size", i32 4} +!12 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 9cc417cbca1cece0d55fa3d1e15682943a06139e)"} +!13 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 2, type: !14, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) +!14 = !DISubroutineType(types: !15) +!15 = !{!16} +!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!17 = !DILocation(line: 3, column: 10, scope: !13) +!18 = !{!19, !19, i64 0} +!19 = !{!"omnipotent char", !20, i64 0} +!20 = !{!"Simple C/C++ TBAA"} +!21 = !DILocation(line: 3, column: 3, scope: !13) From ce779098006e9de32678f28140f22de7e4a19189 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 30 Apr 2021 22:38:40 +0200 Subject: [PATCH 273/318] [ValueTracking] Limit scan when checking poison UB (PR50155) The current code can scan an unlimited number of instructions, if the containing basic block is very large. The test case from PR50155 contains a basic block with approximately 100k instructions. To avoid this, limit the number of instructions we inspect. At the same time, drop the limit on the number of basic blocks, as this will be implicitly limited by the number of instructions as well. (cherry picked from commit 2cd78686055f1badb9aa55cb95e189548ffc82f0) --- llvm/lib/Analysis/ValueTracking.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e174c5efe424..75486d3c80e7 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5150,6 +5150,9 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, return false; } + // Limit number of instructions we look at, to avoid scanning through large + // blocks. The current limit is chosen arbitrarily. + unsigned ScanLimit = 32; BasicBlock::const_iterator End = BB->end(); if (!PoisonOnly) { @@ -5160,6 +5163,11 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, // For example, 'udiv x, (undef | 1)' isn't UB. for (auto &I : make_range(Begin, End)) { + if (isa(I)) + continue; + if (--ScanLimit == 0) + break; + if (const auto *CB = dyn_cast(&I)) { for (unsigned i = 0; i < CB->arg_size(); ++i) { if (CB->paramHasAttr(i, Attribute::NoUndef) && @@ -5186,9 +5194,12 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, for_each(V->users(), Propagate); Visited.insert(BB); - unsigned Iter = 0; - while (Iter++ < MaxAnalysisRecursionDepth) { + while (true) { for (auto &I : make_range(Begin, End)) { + if (isa(I)) + continue; + if (--ScanLimit == 0) + return false; if (mustTriggerUB(&I, YieldsPoison)) return true; if (!isGuaranteedToTransferExecutionToSuccessor(&I)) From 7cc5b1593554c4a589e5e57c64c28bbd85c9987f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 24 Mar 2021 09:24:24 -0700 Subject: [PATCH 274/318] [OPENMP]Fix PR48571: critical/master in outlined contexts cause crash. If emit inlined region for master/critical directives, no need to clear lambda/block context data, otherwise the variables cannot be found and it causes a crash at compile time. Differential Revision: https://reviews.llvm.org/D99280 (cherry picked from commit 7654bb6303d290b19cad29137be810e69a0bf917) --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 32 ++++++++++++++--------- clang/test/OpenMP/critical_codegen.cpp | 25 ++++++++++++++++++ clang/test/OpenMP/master_codegen.cpp | 35 ++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 83dfa0780547..caa5291ff6fa 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -409,6 +409,7 @@ class InlinedOpenMPRegionRAII { llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; const CodeGen::CGBlockInfo *BlockInfo = nullptr; + bool NoInheritance = false; public: /// Constructs region for combined constructs. @@ -416,16 +417,19 @@ class InlinedOpenMPRegionRAII { /// a list of functions used for code generation of implicitly inlined /// regions. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) - : CGF(CGF) { + OpenMPDirectiveKind Kind, bool HasCancel, + bool NoInheritance = true) + : CGF(CGF), NoInheritance(NoInheritance) { // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - LambdaThisCaptureField = CGF.LambdaThisCaptureField; - CGF.LambdaThisCaptureField = nullptr; - BlockInfo = CGF.BlockInfo; - CGF.BlockInfo = nullptr; + if (NoInheritance) { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + LambdaThisCaptureField = CGF.LambdaThisCaptureField; + CGF.LambdaThisCaptureField = nullptr; + BlockInfo = CGF.BlockInfo; + CGF.BlockInfo = nullptr; + } } ~InlinedOpenMPRegionRAII() { @@ -434,9 +438,11 @@ class InlinedOpenMPRegionRAII { cast(CGF.CapturedStmtInfo)->getOldCSI(); delete CGF.CapturedStmtInfo; CGF.CapturedStmtInfo = OldCSI; - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - CGF.LambdaThisCaptureField = LambdaThisCaptureField; - CGF.BlockInfo = BlockInfo; + if (NoInheritance) { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + CGF.LambdaThisCaptureField = LambdaThisCaptureField; + CGF.BlockInfo = BlockInfo; + } } }; @@ -3853,7 +3859,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Processing for implicitly captured variables. InlinedOpenMPRegionRAII Region( CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, - /*HasCancel=*/false); + /*HasCancel=*/false, /*NoInheritance=*/true); SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); } if (Type->isArrayType()) { @@ -6214,7 +6220,9 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, bool HasCancel) { if (!CGF.HaveInsertPoint()) return; - InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); + InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, + InnerKind != OMPD_critical && + InnerKind != OMPD_master); CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp index 46fad63b3bd8..d84f2b2af22b 100644 --- a/clang/test/OpenMP/critical_codegen.cpp +++ b/clang/test/OpenMP/critical_codegen.cpp @@ -68,6 +68,31 @@ int main() { return a; } +// ALL-LABEL: lambda_critical +// TERM_DEBUG-LABEL: lambda_critical +void lambda_critical(int a, int b) { + auto l = [=]() { +#pragma omp critical + { + // ALL: call void @__kmpc_critical( + int c = a + b; + } + }; + + l(); + + auto l1 = [=]() { +#pragma omp parallel +#pragma omp critical + { + // ALL: call void @__kmpc_critical( + int c = a + b; + } + }; + + l1(); +} + struct S { int a; }; diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp index 8554ad8e7dec..353284ea8541 100644 --- a/clang/test/OpenMP/master_codegen.cpp +++ b/clang/test/OpenMP/master_codegen.cpp @@ -55,6 +55,41 @@ int main() { return a; } +// ALL-LABEL: lambda_master +// TERM_DEBUG-LABEL: lambda_master +void lambda_master(int a, int b) { + auto l = [=]() { +#pragma omp master + { + // ALL: call i32 @__kmpc_master( + int c = a + b; + } + }; + + l(); + + auto l1 = [=]() { +#pragma omp parallel +#pragma omp master + { + // ALL: call i32 @__kmpc_master( + int c = a + b; + } + }; + + l1(); + + auto l2 = [=]() { +#pragma omp parallel master + { + // ALL: call i32 @__kmpc_master( + int c = a + b; + } + }; + + l2(); +} + // ALL-LABEL: parallel_master // TERM_DEBUG-LABEL: parallel_master void parallel_master() { From 3be5dbbc32dd348dddcec9e3baec96b7dcef8a35 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Mon, 22 Mar 2021 10:05:25 +0100 Subject: [PATCH 275/318] Make clangd CompletionModel usable even with non-standard (but supported) layout llvm supports specifying a non-standard layout where each project lies in its own place. Do not assume a fixed layout and use the appropriate cmake variable instead. Differential Revision: https://reviews.llvm.org/D96787 (cherry picked from commit f51ab1871655a9a96134c2636c37dcb5a6b01ac3) --- clang-tools-extra/clangd/quality/CompletionModel.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake b/clang-tools-extra/clangd/quality/CompletionModel.cmake index 60c6d2aa8433..41bc2ed1890b 100644 --- a/clang-tools-extra/clangd/quality/CompletionModel.cmake +++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake @@ -5,8 +5,8 @@ # will define a C++ class called ${cpp_class} - which may be a # namespace-qualified class name. function(gen_decision_forest model filename cpp_class) - set(model_compiler ${CMAKE_SOURCE_DIR}/../clang-tools-extra/clangd/quality/CompletionModelCodegen.py) - + set(model_compiler ${LLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR}/clangd/quality/CompletionModelCodegen.py) + set(output_dir ${CMAKE_CURRENT_BINARY_DIR}) set(header_file ${output_dir}/${filename}.h) set(cpp_file ${output_dir}/${filename}.cpp) From adae17728bad62fa9ce1635905d0b0bde30e3eba Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Wed, 5 May 2021 19:25:34 +0100 Subject: [PATCH 276/318] Make clangd CompletionModel not depend on directory layout. The current code accounts for two possible layouts, but there is at least a third supported layout: clang-tools-extra may also be checked out as clang/tools/extra with the releases, which was not yet handled. Rather than treating that as a special case, use the location of CompletionModel.cmake to handle all three cases. This should address the problems that prompted D96787 and the problems that prompted the proposed revert D100625. Reviewed By: usaxena95 Differential Revision: https://reviews.llvm.org/D101851 (cherry picked from commit 7907c46fe6195728fafd843b8c0fb19a3e68e9ad) --- clang-tools-extra/clangd/quality/CompletionModel.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/quality/CompletionModel.cmake b/clang-tools-extra/clangd/quality/CompletionModel.cmake index 41bc2ed1890b..dc0c0cde4dab 100644 --- a/clang-tools-extra/clangd/quality/CompletionModel.cmake +++ b/clang-tools-extra/clangd/quality/CompletionModel.cmake @@ -4,8 +4,9 @@ # ${CMAKE_CURRENT_BINARY_DIR}. The generated header # will define a C++ class called ${cpp_class} - which may be a # namespace-qualified class name. +set(CLANGD_COMPLETION_MODEL_COMPILER ${CMAKE_CURRENT_LIST_DIR}/CompletionModelCodegen.py) function(gen_decision_forest model filename cpp_class) - set(model_compiler ${LLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR}/clangd/quality/CompletionModelCodegen.py) + set(model_compiler ${CLANGD_COMPLETION_MODEL_COMPILER}) set(output_dir ${CMAKE_CURRENT_BINARY_DIR}) set(header_file ${output_dir}/${filename}.h) From a066f4eb679488cfeed2fec24574adcb55f367a1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Jun 2021 09:49:32 -0700 Subject: [PATCH 277/318] [X86] Add ISD::FREEZE and ISD::AssertAlign to the list of opcodes that don't guarantee upper 32 bits are zero. The freeze issue was reported here https://llvm.discourse.group/t/bug-or-feature-freeze-instruction/3639 I don't have a test for AssertAlign. I just noticed it was missing and assume it should be similar to the other two Asserts. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D104178 (cherry picked from commit c997867dc084a1bcf631816f964b3ff49a297ba3) --- llvm/lib/Target/X86/X86InstrCompiler.td | 11 +++++++---- llvm/test/CodeGen/X86/freeze.ll | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 7a2facf226d8..dc6361aecc60 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1344,15 +1344,18 @@ def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>; // Any instruction that defines a 32-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. Any other 32-bit operation will zero-extend -// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper -// 32 bits, they're probably just qualifying a CopyFromReg. +// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying +// anything about the upper 32 bits, they're probably just qualifying a +// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit +// operation will zero-extend up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg && N->getOpcode() != ISD::AssertSext && - N->getOpcode() != ISD::AssertZext; + N->getOpcode() != ISD::AssertZext && + N->getOpcode() != ISD::AssertAlign && + N->getOpcode() != ISD::FREEZE; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, diff --git a/llvm/test/CodeGen/X86/freeze.ll b/llvm/test/CodeGen/X86/freeze.ll index cf015d3c892c..7131f3a5bc4f 100644 --- a/llvm/test/CodeGen/X86/freeze.ll +++ b/llvm/test/CodeGen/X86/freeze.ll @@ -122,3 +122,26 @@ define i64 @freeze_array() { %t1 = add i64 %v1, %v2 ret i64 %t1 } + +; Make sure we emit a movl to zext the input before the imulq. This previously +; failed because freeze was not listed in the instructions that don't zext their +; result in the def32 pattern X86InstrCompiler.td. +define i32 @freeze_zext(i64 %a) nounwind { +; X86ASM-LABEL: freeze_zext: +; X86ASM: # %bb.0: # %entry +; X86ASM-NEXT: movq %rdi, %rax +; X86ASM-NEXT: movl %eax, %ecx +; X86ASM-NEXT: movl $3435973837, %edx # imm = 0xCCCCCCCD +; X86ASM-NEXT: imulq %rcx, %rdx +; X86ASM-NEXT: shrq $35, %rdx +; X86ASM-NEXT: addl %edx, %edx +; X86ASM-NEXT: leal (%rdx,%rdx,4), %ecx +; X86ASM-NEXT: subl %ecx, %eax +; X86ASM-NEXT: # kill: def $eax killed $eax killed $rax +; X86ASM-NEXT: retq +entry: + %x = trunc i64 %a to i32 + %y = freeze i32 %x + %z = urem i32 %y, 10 + ret i32 %z +} From a95bf588bd727fa71486098d76b8a1bc00650361 Mon Sep 17 00:00:00 2001 From: Yvan Roux Date: Wed, 9 Jun 2021 15:30:50 +0200 Subject: [PATCH 278/318] [ARM] Fix Machine Outliner LDRD/STRD handling in Thumb mode. This is a fix for PR50481 Immediate values for AddrModeT2_i8s4 are already scaled in MCinst operand. This patch changes the number of bits and scale factor to reflect that state when checking stack offset status. AddrModeT2_i7s[2|4] also have this particularity but since MVE instructions are not outlined, just move these cases to the unhandled ones. Differential Revision: https://reviews.llvm.org/D103167 (cherry picked from commit 6c78dbd4ca1f2c25cdc276d646c7920afe856ca3) --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 22 ++++++++----------- .../machine-outliner-stack-fixup-thumb.mir | 20 ++++++++--------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 112eb59e173d..e418d53b56a4 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5934,6 +5934,9 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register || AddrMode == ARMII::AddrModeT2_pc // PCrel access || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST + || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE + || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE + || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR || AddrMode == ARMII::AddrModeNone) return false; @@ -5976,6 +5979,10 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, NumBits = 8; break; case ARMII::AddrModeT2_i8s4: + // FIXME: Values are already scaled in this addressing mode. + assert((Fixup & 3) == 0 && "Can't encode this offset!"); + NumBits = 10; + break; case ARMII::AddrModeT2_ldrex: NumBits = 8; Scale = 4; @@ -5984,17 +5991,6 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, case ARMII::AddrMode_i12: NumBits = 12; break; - case ARMII::AddrModeT2_i7: - NumBits = 7; - break; - case ARMII::AddrModeT2_i7s2: - NumBits = 7; - Scale = 2; - break; - case ARMII::AddrModeT2_i7s4: - NumBits = 7; - Scale = 4; - break; case ARMII::AddrModeT1_s: // SP-relative LD/ST NumBits = 8; Scale = 4; @@ -6004,8 +6000,8 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, } // Make sure the offset is encodable for instructions that scale the // immediate. - if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0) - return false; + assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && + "Can't encode this offset!"); OffVal += Fixup / Scale; unsigned Mask = (1 << NumBits) - 1; diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir index 7d9b19553b08..6c940f15eba6 100644 --- a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir @@ -81,23 +81,23 @@ body: | ;CHECK-LABEL: name: CheckAddrModeT2_i8s4 ;CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[I8S4:[0-9]+]] - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 254, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg $r0 = tMOVr $r1, 14, $noreg tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg t2STRDi8 $r0, $r1, $sp, 8, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 253, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 254, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1012, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1020, 14, $noreg tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg t2STRDi8 $r0, $r1, $sp, 8, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 253, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 254, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1012, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1020, 14, $noreg tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp t2STRDi8 $r0, $r1, $sp, 0, 14, $noreg t2STRDi8 $r0, $r1, $sp, 8, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 253, 14, $noreg - t2STRDi8 $r0, $r1, $sp, 254, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1012, 14, $noreg + t2STRDi8 $r0, $r1, $sp, 1020, 14, $noreg BX_RET 14, $noreg ... --- @@ -205,9 +205,9 @@ body: | ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 2, 14 /* CC::al */, $noreg - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 10, 14 /* CC::al */, $noreg - ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 255, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 16, 14 /* CC::al */, $noreg + ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I12]] From a37dff2bbf68b1a98ce6c48c6e4e5b32f7df0d9c Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Wed, 9 Jun 2021 02:22:48 +0000 Subject: [PATCH 279/318] [PowerPC][Dwarf] Assign MMA register's dwarf register number to negative value According to ELF V2 ABI, `0` should be the dwarf number of `r0`. Currently MMA's register also uses `0` as its dwarf number, this confuses `RegisterInfoEmitter` and generates wrong dwarf -> llvm mapping. ``` extern const MCRegisterInfo::DwarfLLVMRegPair PPCDwarfFlavour1Dwarf2L[] = { { 0U, PPC::VSRp31 }, ``` This leads to wrong cfi output in https://reviews.llvm.org/D100290. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D103761 (cherry picked from commit c87c294397ea4c3dae31f5a7fd6e38602338fd57) --- llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 36 +++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 45d60369018b..551735c85b51 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -173,7 +173,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in { foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { def VSRp#!srl(Index, 1) : VSRPair("VSL"#Index), !cast("VSL"#!add(Index, 1))]>, - DwarfRegNum<[0, 0]>; + DwarfRegNum<[-1, -1]>; } // VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63). @@ -181,7 +181,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in { def VSRp#!add(!srl(Index, 1), 16) : VSRPair("V"#Index), !cast("V"#!add(Index, 1))]>, - DwarfRegNum<[0, 0]>; + DwarfRegNum<[-1, -1]>; } } @@ -422,14 +422,14 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { } let SubRegIndices = [sub_pair0, sub_pair1] in { - def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>; - def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>; - def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>; - def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>; - def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>; - def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>; - def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>; - def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>; + def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; } def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, ACC4, ACC5, ACC6, ACC7)> { @@ -437,14 +437,14 @@ def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, } let SubRegIndices = [sub_pair0, sub_pair1] in { - def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>; - def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>; - def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>; - def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>; - def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>; - def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>; - def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>; - def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>; + def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; } def UACCRC : RegisterClass<"PPC", [v512i1], 128, (add UACC0, UACC1, UACC2, UACC3, From 04a68288ded459c7e76135a9ee4b7e9d4bf4cdc2 Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Wed, 9 Jun 2021 06:24:14 +0000 Subject: [PATCH 280/318] [PowerPC] Make sure the first probe is full size or is the last probe when stack is realigned When `-fstack-clash-protection` is enabled and stack has to be realigned, some parts of redzone is written prior the probe, so probe might overwrite content already written in redzone. To avoid it, we have to make sure the first probe is at full probe size or is the last probe so that we can skip redzone. It also fixes violation of ABI under PPC where `r1` isn't updated atomically. This fixes https://bugs.llvm.org/show_bug.cgi?id=49903. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D100290 (cherry picked from commit bf58600badb1138a501ad81b07298207a7a64b2a) --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 365 +++++------ llvm/test/CodeGen/PowerPC/pr46759.ll | 46 +- .../PowerPC/stack-clash-prologue-nounwind.ll | 222 +++---- .../CodeGen/PowerPC/stack-clash-prologue.ll | 577 +++++++++--------- 4 files changed, 596 insertions(+), 614 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 50ce11b8374f..16536bf23deb 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -859,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 : PPC::PROBED_STACKALLOC_32)) - .addDef(ScratchReg) - .addDef(TempReg) // TempReg stores the old sp. + .addDef(TempReg) + .addDef(ScratchReg) // ScratchReg stores the old sp. .addImm(NegFrameSize); // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we // update the ScratchReg to meet the assumption that ScratchReg contains // the NegFrameSize. This solution is rather tricky. if (!HasRedZone) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) - .addReg(TempReg) + .addReg(ScratchReg) .addReg(SPReg); HasSTUX = true; } @@ -1187,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - // TODO: Generate CFI instructions. bool isPPC64 = Subtarget.isPPC64(); const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1219,6 +1218,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, bool HasBP = RegInfo->hasBasePointer(MF); Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); + bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, @@ -1272,212 +1272,221 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, .addReg(SPReg) .addReg(NegSizeReg); }; - // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) - // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 - // available and r1 is already copied to r30 which is BPReg. So BPReg stores - // the value of stackptr. - // First we have to probe tail interval whose size is less than probesize, - // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, - // ScratchReg stores the value of ((stackptr % align) % probesize). Then we - // probe each block sized probesize until stackptr meets - // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized - // as negprobesize. At both stages, TempReg stores the value of - // (stackptr - (stackptr % align)). - auto dynamicProbe = [&](MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, Register ScratchReg, - Register TempReg) { - assert(HasBP && isPPC64 && "Probe alignment part not available"); + // Used to probe stack when realignment is required. + // Note that, according to ABI's requirement, *sp must always equals the + // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. + // Following is pseudo code: + // final_sp = (sp & align) + negframesize; + // neg_gap = final_sp - sp; + // while (neg_gap < negprobesize) { + // stdu fp, negprobesize(sp); + // neg_gap -= negprobesize; + // } + // stdux fp, sp, neg_gap + // + // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg + // before probe code, we don't need to save it, so we get one additional reg + // that can be used to materialize the probeside if needed to use xform. + // Otherwise, we can NOT materialize probeside, so we can only use Dform for + // now. + // + // The allocations are: + // if (HasBP && HasRedzone) { + // r0: materialize the probesize if needed so that we can use xform. + // r12: `neg_gap` + // } else { + // r0: back-chain pointer + // r12: `neg_gap`. + // } + auto probeRealignedStack = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register ScratchReg, Register TempReg) { + assert(HasBP && "The function is supposed to have base pointer when its " + "stack is realigned."); assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); - // ScratchReg = stackptr % align - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(BPReg) - .addImm(0) - .addImm(64 - Log2(MaxAlign)); - // TempReg = stackptr - (stackptr % align) - BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) - .addReg(ScratchReg) - .addReg(BPReg); - // ScratchReg = (stackptr % align) % probesize - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(ScratchReg) - .addImm(0) - .addImm(64 - Log2(ProbeSize)); + + // FIXME: We can eliminate this limitation if we get more infomation about + // which part of redzone are already used. Used redzone can be treated + // probed. But there might be `holes' in redzone probed, this could + // complicate the implementation. + assert(ProbeSize >= Subtarget.getRedZoneSize() && + "Probe size should be larger or equal to the size of red-zone so " + "that red-zone is not clobbered by probing."); + + Register &FinalStackPtr = TempReg; + // FIXME: We only support NegProbeSize materializable by DForm currently. + // When HasBP && HasRedzone, we can use xform if we have an additional idle + // register. + NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); + assert(isInt<16>(NegProbeSize) && + "NegProbeSize should be materializable by DForm"); Register CRReg = PPC::CR0; - // If (stackptr % align) % probesize == 0, we should not generate probe - // code. Layout of output assembly kinda like: + // Layout of output assembly kinda like: // bb.0: // ... - // cmpldi $scratchreg, 0 - // beq bb.2 - // bb.1: # Probe tail interval - // neg $scratchreg, $scratchreg - // stdux $bpreg, r1, $scratchreg + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, + // bge bb.2 + // bb.1: + // stdu , (r1) + // sub $scratchreg, $scratchreg, negprobesize + // cmpdi $scratchreg, + // blt bb.1 // bb.2: - // - // cmpd r1, $tempreg - // beq bb.4 - // bb.3: # Loop to probe each block - // stdux $bpreg, r1, $scratchreg - // cmpd r1, $tempreg - // bne bb.3 - // bb.4: - // ... + // stdux , r1, $scratchreg MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeResidualMBB); - MachineBasicBlock *ProbeLoopPreHeaderMBB = - MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeExitMBB); - // bb.4 - ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); - ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + // bb.2 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, + BackChainPointer); + if (HasRedZone) + // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg + // to TempReg to satisfy it. + BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) + .addReg(BPReg) + .addReg(BPReg); + ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); + ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + } // bb.0 - BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); - BuildMI(&MBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeLoopPreHeaderMBB); - MBB.addSuccessor(ProbeResidualMBB); - MBB.addSuccessor(ProbeLoopPreHeaderMBB); + { + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) + .addReg(SPReg) + .addReg(FinalStackPtr); + if (!HasRedZone) + BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_GE) + .addReg(CRReg) + .addMBB(ProbeExitMBB); + MBB.addSuccessor(ProbeLoopBodyMBB); + MBB.addSuccessor(ProbeExitMBB); + } // bb.1 - BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) - .addReg(ScratchReg); - allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, - false, BPReg); - ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); - // bb.2 - MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), - NegProbeSize, ScratchReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeExitMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); - // bb.3 - allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, - false, BPReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_NE) - .addReg(CRReg) - .addMBB(ProbeLoopBodyMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, + 0, true /*UseDForm*/, BackChainPointer); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), + ScratchReg) + .addReg(ScratchReg) + .addImm(-NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), + CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_LT) + .addReg(CRReg) + .addMBB(ProbeLoopBodyMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + } // Update liveins. - recomputeLiveIns(*ProbeResidualMBB); - recomputeLiveIns(*ProbeLoopPreHeaderMBB); recomputeLiveIns(*ProbeLoopBodyMBB); recomputeLiveIns(*ProbeExitMBB); return ProbeExitMBB; }; // For case HasBP && MaxAlign > 1, we have to realign the SP by performing - // SP = SP - SP % MaxAlign. + // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since + // the offset subtracted from SP is determined by SP's runtime value. if (HasBP && MaxAlign > 1) { - // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in - // 64-bit mode. - if (isPPC64) { - // Use BPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); - // Since we have SPReg copied to BPReg at the moment, FPReg can be used as - // TempReg. - Register TempReg = FPReg; - CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); - // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) - .addReg(BPReg) - .addReg(BPReg); - } else { - // Initialize current frame pointer. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) + // Calculate final stack pointer. + if (isPPC64) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) .addReg(SPReg) - .addReg(SPReg); - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + .addImm(0) + .addImm(64 - Log2(MaxAlign)); + else BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) - .addReg(FPReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) - .addReg(ScratchReg) - .addReg(SPReg); - } + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + FPReg) + .addReg(ScratchReg) + .addReg(SPReg); + MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FPReg) + .addReg(ScratchReg) + .addReg(FPReg); + CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); + if (needsCFI) + buildDefCFAReg(*CurrentMBB, {MI}, FPReg); } else { // Initialize current frame pointer. BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Use FPReg to calculate CFA. if (needsCFI) buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); - } - // Probe residual part. - if (NegResidualSize) { - bool ResidualUseDForm = CanUseDForm(NegResidualSize); - if (!ResidualUseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm, FPReg); - } - bool UseDForm = CanUseDForm(NegProbeSize); - // If number of blocks is small, just probe them directly. - if (NumBlocks < 3) { - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, - FPReg); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); } - } else { - // Since CTR is a volatile register and current shrinkwrap implementation - // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a - // CTR loop to probe. - // Calculate trip count and stores it in CTRReg. - MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) - .addReg(ScratchReg, RegState::Kill); - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - // Create MBBs of the loop. - MachineFunction::iterator MBBInsertPoint = - std::next(CurrentMBB->getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ExitMBB); - // Synthesize the loop body. - allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm, FPReg); - BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) - .addMBB(LoopMBB); - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), CurrentMBB, - std::next(MachineBasicBlock::iterator(MI)), - CurrentMBB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); - CurrentMBB->addSuccessor(LoopMBB); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + bool UseDForm = CanUseDForm(NegProbeSize); + // If number of blocks is small, just probe them directly. + if (NumBlocks < 3) { + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + for (int i = 0; i < NumBlocks; ++i) + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + } + } else { + // Since CTR is a volatile register and current shrinkwrap implementation + // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a + // CTR loop to probe. + // Calculate trip count and stores it in CTRReg. + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + .addReg(ScratchReg, RegState::Kill); + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + // Create MBBs of the loop. + MachineFunction::iterator MBBInsertPoint = + std::next(CurrentMBB->getIterator()); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, LoopMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ExitMBB); + // Synthesize the loop body. + allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, + UseDForm, FPReg); + BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) + .addMBB(LoopMBB); + LoopMBB->addSuccessor(ExitMBB); + LoopMBB->addSuccessor(LoopMBB); + // Synthesize the exit MBB. + ExitMBB->splice(ExitMBB->end(), CurrentMBB, + std::next(MachineBasicBlock::iterator(MI)), + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } + // Update liveins. + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); } - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); } ++NumPrologProbed; MI.eraseFromParent(); diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll index 33b44b720b6e..d6d02921efca 100644 --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -6,32 +6,26 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-LABEL: foo: ; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: clrldi r12, r1, 53 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB0_2 -; CHECK-LE-NEXT: # %bb.1: # %entry -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: .LBB0_2: # %entry -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB0_4 -; CHECK-LE-NEXT: .LBB0_3: # %entry +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -6144 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB0_2 +; CHECK-LE-NEXT: .LBB0_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB0_3 -; CHECK-LE-NEXT: .LBB0_4: # %entry -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB0_1 +; CHECK-LE-NEXT: .LBB0_2: # %entry +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -52,13 +46,13 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-NEXT: add r4, r1, r4 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB0_6 -; CHECK-LE-NEXT: .LBB0_5: # %entry +; CHECK-LE-NEXT: beq cr0, .LBB0_4 +; CHECK-LE-NEXT: .LBB0_3: # %entry ; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB0_5 -; CHECK-LE-NEXT: .LBB0_6: # %entry +; CHECK-LE-NEXT: bne cr0, .LBB0_3 +; CHECK-LE-NEXT: .LBB0_4: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll index e595d8a732a5..4a8de768d82a 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll @@ -44,12 +44,12 @@ entry: define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: li r0, 259 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: li r12, 259 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB1_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -16(r1) +; CHECK-LE-NEXT: stdu r0, -16(r1) ; CHECK-LE-NEXT: bdnz .LBB1_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -60,12 +60,12 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; ; CHECK-BE-LABEL: f1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: li r0, 260 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: li r12, 260 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB1_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -16(r1) +; CHECK-BE-NEXT: stdu r0, -16(r1) ; CHECK-BE-NEXT: bdnz .LBB1_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -76,16 +76,16 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: li r0, 257 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: li r12, 257 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB1_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: stwu r0, -16(r1) ; CHECK-32-NEXT: bdnz .LBB1_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: li r3, 3 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -102,13 +102,13 @@ entry: define i8 @f2() #0 nounwind { ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB2_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -119,13 +119,13 @@ define i8 @f2() #0 nounwind { ; ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB2_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -136,16 +136,16 @@ define i8 @f2() #0 nounwind { ; ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB2_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -166,10 +166,10 @@ entry: define i8 @f3() #0 "stack-probe-size"="32768" nounwind { ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -178,10 +178,10 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind { ; ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -190,11 +190,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind { ; ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -261,15 +261,15 @@ entry: define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB5_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -280,15 +280,15 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB5_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -299,18 +299,18 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB5_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -331,14 +331,14 @@ entry: define i8 @f6() #0 nounwind { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: lis r0, 4 -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: lis r12, 4 +; CHECK-LE-NEXT: ori r12, r12, 0 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB6_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -349,14 +349,14 @@ define i8 @f6() #0 nounwind { ; ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: lis r0, 4 -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: lis r12, 4 +; CHECK-BE-NEXT: ori r12, r12, 0 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB6_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -367,17 +367,17 @@ define i8 @f6() #0 nounwind { ; ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: lis r0, 4 -; CHECK-32-NEXT: nop -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: lis r12, 4 +; CHECK-32-NEXT: ori r12, r12, 0 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB6_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -398,17 +398,17 @@ entry: define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 -; CHECK-LE-NEXT: li r0, 15258 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: ori r12, r12, 13776 +; CHECK-LE-NEXT: stdux r0, r1, r12 +; CHECK-LE-NEXT: li r12, 15258 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB7_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -419,17 +419,17 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 -; CHECK-BE-NEXT: li r0, 15258 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: ori r12, r12, 13760 +; CHECK-BE-NEXT: stdux r0, r1, r12 +; CHECK-BE-NEXT: li r12, 15258 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB7_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -440,20 +440,20 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 -; CHECK-32-NEXT: li r0, 15258 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: ori r12, r12, 13808 +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: li r12, 15258 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB7_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 9(r1) diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll index 6443059c9704..7e4556c59737 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -44,13 +44,13 @@ entry: define i8 @f1() #0 "stack-probe-size"="0" { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: li r0, 259 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: li r12, 259 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB1_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -16(r1) +; CHECK-LE-NEXT: stdu r0, -16(r1) ; CHECK-LE-NEXT: bdnz .LBB1_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -63,13 +63,13 @@ define i8 @f1() #0 "stack-probe-size"="0" { ; ; CHECK-BE-LABEL: f1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: li r0, 260 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: li r12, 260 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB1_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -16(r1) +; CHECK-BE-NEXT: stdu r0, -16(r1) ; CHECK-BE-NEXT: bdnz .LBB1_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -82,17 +82,17 @@ define i8 @f1() #0 "stack-probe-size"="0" { ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: li r0, 257 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: li r12, 257 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB1_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: stwu r0, -16(r1) ; CHECK-32-NEXT: bdnz .LBB1_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 4112 ; CHECK-32-NEXT: li r3, 3 @@ -111,14 +111,14 @@ entry: define i8 @f2() #0 { ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB2_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -131,14 +131,14 @@ define i8 @f2() #0 { ; ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB2_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -151,18 +151,18 @@ define i8 @f2() #0 { ; ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB2_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-32-NEXT: li r3, 3 @@ -184,11 +184,11 @@ entry: define i8 @f3() #0 "stack-probe-size"="32768" { ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 @@ -199,11 +199,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" { ; ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 @@ -214,13 +214,13 @@ define i8 @f3() #0 "stack-probe-size"="32768" { ; ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-32-NEXT: li r3, 3 @@ -291,16 +291,16 @@ entry: define i8 @f5() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB5_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -313,16 +313,16 @@ define i8 @f5() #0 "stack-probe-size"="65536" { ; ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB5_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -335,20 +335,20 @@ define i8 @f5() #0 "stack-probe-size"="65536" { ; ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB5_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1048592 ; CHECK-32-NEXT: li r3, 3 @@ -370,15 +370,15 @@ entry: define i8 @f6() #0 { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: lis r0, 4 -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: lis r12, 4 +; CHECK-LE-NEXT: ori r12, r12, 0 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB6_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -391,15 +391,15 @@ define i8 @f6() #0 { ; ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: lis r0, 4 -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: lis r12, 4 +; CHECK-BE-NEXT: ori r12, r12, 0 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB6_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -412,19 +412,19 @@ define i8 @f6() #0 { ; ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: lis r0, 4 -; CHECK-32-NEXT: nop -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: lis r12, 4 +; CHECK-32-NEXT: ori r12, r12, 0 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB6_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1073741840 ; CHECK-32-NEXT: li r3, 3 @@ -446,18 +446,18 @@ entry: define i8 @f7() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 -; CHECK-LE-NEXT: li r0, 15258 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 13776 +; CHECK-LE-NEXT: stdux r0, r1, r12 +; CHECK-LE-NEXT: li r12, 15258 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB7_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -470,18 +470,18 @@ define i8 @f7() #0 "stack-probe-size"="65536" { ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 -; CHECK-BE-NEXT: li r0, 15258 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 13760 +; CHECK-BE-NEXT: stdux r0, r1, r12 +; CHECK-BE-NEXT: li r12, 15258 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB7_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -494,22 +494,22 @@ define i8 @f7() #0 "stack-probe-size"="65536" { ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 -; CHECK-32-NEXT: li r0, 15258 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 13808 +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: li r12, 15258 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB7_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1000000016 ; CHECK-32-NEXT: li r3, 3 @@ -599,31 +599,24 @@ define i32 @f8(i64 %i) local_unnamed_addr #0 { define i32 @f9(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f9: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 53 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB9_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -10240 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB9_2 +; CHECK-LE-NEXT: .LBB9_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB9_1 ; CHECK-LE-NEXT: .LBB9_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB9_4 -; CHECK-LE-NEXT: .LBB9_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB9_3 -; CHECK-LE-NEXT: .LBB9_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 2048 @@ -637,31 +630,24 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { ; ; CHECK-BE-LABEL: f9: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 53 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 53 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB9_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: li r12, -10240 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB9_2 +; CHECK-BE-NEXT: .LBB9_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB9_1 ; CHECK-BE-NEXT: .LBB9_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB9_4 -; CHECK-BE-NEXT: .LBB9_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB9_3 -; CHECK-BE-NEXT: .LBB9_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -2048(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 2048 @@ -675,15 +661,23 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { ; ; CHECK-32-LABEL: f9: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 21 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -2048(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 21 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r12, -10240 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB9_2 +; CHECK-32-NEXT: .LBB9_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB9_1 +; CHECK-32-NEXT: .LBB9_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 ; CHECK-32-NEXT: stwx r30, 0, r0 @@ -712,30 +706,24 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { define i32 @f10(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f10: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 54 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 54 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB10_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -5120 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB10_2 +; CHECK-LE-NEXT: .LBB10_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB10_1 ; CHECK-LE-NEXT: .LBB10_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB10_4 -; CHECK-LE-NEXT: .LBB10_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB10_3 -; CHECK-LE-NEXT: .LBB10_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -1024(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 1024 @@ -749,30 +737,24 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { ; ; CHECK-BE-LABEL: f10: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 54 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 54 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB10_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: li r12, -5120 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB10_2 +; CHECK-BE-NEXT: .LBB10_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB10_1 ; CHECK-BE-NEXT: .LBB10_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB10_4 -; CHECK-BE-NEXT: .LBB10_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB10_3 -; CHECK-BE-NEXT: .LBB10_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -1024(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 1024 @@ -786,14 +768,23 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { ; ; CHECK-32-LABEL: f10: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 22 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -1024(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 22 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r12, -5120 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB10_2 +; CHECK-32-NEXT: .LBB10_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB10_1 +; CHECK-32-NEXT: .LBB10_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 ; CHECK-32-NEXT: stwx r30, 0, r0 @@ -821,35 +812,26 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-LABEL: f11: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 49 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 49 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB11_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: lis r12, -2 +; CHECK-LE-NEXT: ori r12, r12, 32768 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB11_2 +; CHECK-LE-NEXT: .LBB11_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB11_1 ; CHECK-LE-NEXT: .LBB11_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB11_4 -; CHECK-LE-NEXT: .LBB11_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB11_3 -; CHECK-LE-NEXT: .LBB11_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: li r0, 24 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: .LBB11_5: -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: bdnz .LBB11_5 -; CHECK-LE-NEXT: # %bb.6: -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -876,12 +858,12 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-NEXT: add r4, r1, r7 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB11_8 -; CHECK-LE-NEXT: .LBB11_7: +; CHECK-LE-NEXT: beq cr0, .LBB11_4 +; CHECK-LE-NEXT: .LBB11_3: ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB11_7 -; CHECK-LE-NEXT: .LBB11_8: +; CHECK-LE-NEXT: bne cr0, .LBB11_3 +; CHECK-LE-NEXT: .LBB11_4: ; CHECK-LE-NEXT: addi r3, r1, -32768 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 @@ -891,35 +873,26 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; ; CHECK-BE-LABEL: f11: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 49 ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 49 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB11_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: lis r12, -2 +; CHECK-BE-NEXT: ori r12, r12, 32768 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB11_2 +; CHECK-BE-NEXT: .LBB11_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB11_1 ; CHECK-BE-NEXT: .LBB11_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB11_4 -; CHECK-BE-NEXT: .LBB11_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB11_3 -; CHECK-BE-NEXT: .LBB11_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: li r0, 24 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: .LBB11_5: -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: bdnz .LBB11_5 -; CHECK-BE-NEXT: # %bb.6: -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r31, -8 ; CHECK-BE-NEXT: .cfi_offset r30, -16 @@ -946,12 +919,12 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-BE-NEXT: add r4, r1, r7 ; CHECK-BE-NEXT: stdux r3, r1, r5 ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: beq cr0, .LBB11_8 -; CHECK-BE-NEXT: .LBB11_7: +; CHECK-BE-NEXT: beq cr0, .LBB11_4 +; CHECK-BE-NEXT: .LBB11_3: ; CHECK-BE-NEXT: stdu r3, -4096(r1) ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: bne cr0, .LBB11_7 -; CHECK-BE-NEXT: .LBB11_8: +; CHECK-BE-NEXT: bne cr0, .LBB11_3 +; CHECK-BE-NEXT: .LBB11_4: ; CHECK-BE-NEXT: addi r3, r1, -32768 ; CHECK-BE-NEXT: lbz r3, 0(r3) ; CHECK-BE-NEXT: mr r1, r30 @@ -961,18 +934,24 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; ; CHECK-32-LABEL: f11: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 17 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: li r0, 24 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: .LBB11_1: -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: bdnz .LBB11_1 -; CHECK-32-NEXT: # %bb.2: -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 17 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: lis r12, -2 +; CHECK-32-NEXT: ori r12, r12, 32768 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB11_2 +; CHECK-32-NEXT: .LBB11_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB11_1 +; CHECK-32-NEXT: .LBB11_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -4 ; CHECK-32-NEXT: stwx r31, 0, r0 From 0e164144c7081fc0a96a6bc20f1f25b3f9e7f517 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 7 Jun 2021 08:08:29 -0400 Subject: [PATCH 281/318] [OpenMP] Fix typo in libomptarge for the wrong environment variable Summary: There was a typo in libomptarget that told users to use LIBOMPTARGET_DEBUG instead of LIBOMPTARGET_INFO. (cherry picked from commit 0af4e74aef2eaddc17e1e92eb6d1102cdb5f8ff4) --- openmp/libomptarget/src/interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index b97676a6981b..fba97380a4b2 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -61,7 +61,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) { for (auto &Device : PM->Devices) dumpTargetPointerMappings(loc, Device); else - FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target " + FAILURE_MESSAGE("Run with LIBOMPTARGET_INFO=%d to dump host-target " "pointer mappings.\n", OMP_INFOTYPE_DUMP_TABLE); From c7d7ace46258b04aa4b5df08952bfebc6fc4ce94 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Fri, 5 Mar 2021 21:30:47 +0100 Subject: [PATCH 282/318] [clang-format] Rework Whitesmiths mode to use line-level values in UnwrappedLineParser This commit removes the old way of handling Whitesmiths mode in favor of just setting the levels during parsing and letting the formatter handle it from there. It requires a bit of special-casing during the parsing, but ends up a bit cleaner than before. It also removes some of switch/case unit tests that don't really make much sense when dealing with Whitesmiths. Differential Revision: https://reviews.llvm.org/D94500 (cherry picked from commit f7f9f94b2e2b4c714bac9036f6b73a3df42daaff) --- clang/docs/ReleaseNotes.rst | 3 + clang/lib/Format/UnwrappedLineFormatter.cpp | 7 -- clang/lib/Format/UnwrappedLineParser.cpp | 89 ++++++++++++----- clang/lib/Format/UnwrappedLineParser.h | 12 ++- clang/unittests/Format/FormatTest.cpp | 100 ++++++++++++++++++-- 5 files changed, 170 insertions(+), 41 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4cc1b0b9d2cf..460a62734e90 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -389,6 +389,9 @@ clang-format ``AlignConsecutiveDeclarations`` and ``AlignConsecutiveMacros`` have been modified to allow alignment across empty lines and/or comments. +- Support for Whitesmiths has been improved, with fixes for ``namespace`` blocks + and ``case`` blocks and labels. + libclang -------- diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 5dd0ccdfa6fd..7d197310e65b 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -1281,13 +1281,6 @@ void UnwrappedLineFormatter::formatFirstToken( if (Newlines) Indent = NewlineIndent; - // If in Whitemsmiths mode, indent start and end of blocks - if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { - if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case, - tok::kw_default)) - Indent += Style.IndentWidth; - } - // Preprocessor directives get indented before the hash only if specified if (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && (Line.Type == LT_PreprocessorDirective || diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index f689a6361a3a..bec18bd5d8df 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -579,17 +579,23 @@ size_t UnwrappedLineParser::computePPHash() const { return h; } -void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, - bool MunchSemi) { +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, + bool MunchSemi, + bool UnindentWhitesmithsBraces) { assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && "'{' or macro block token expected"); const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); FormatTok->setBlockKind(BK_Block); + // For Whitesmiths mode, jump to the next level prior to skipping over the + // braces. + if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + ++Line->Level; + size_t PPStartHash = computePPHash(); unsigned InitialLevel = Line->Level; - nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); + nextToken(/*LevelDifference=*/AddLevels); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -602,10 +608,16 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, ? (UnwrappedLine::kInvalidIndex) : (CurrentLines->size() - 1 - NbPreprocessorDirectives); + // Whitesmiths is weird here. The brace needs to be indented for the namespace + // block, but the block itself may not be indented depending on the style + // settings. This allows the format to back up one level in those cases. + if (UnindentWhitesmithsBraces) + --Line->Level; + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); - if (AddLevel) - ++Line->Level; + if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) + Line->Level += AddLevels; parseLevel(/*HasOpeningBrace=*/true); if (eof()) @@ -621,7 +633,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, size_t PPEndHash = computePPHash(); // Munch the closing brace. - nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); + nextToken(/*LevelDifference=*/-AddLevels); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -637,6 +649,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, nextToken(); Line->Level = InitialLevel; + FormatTok->setBlockKind(BK_Block); if (PPStartHash == PPEndHash) { Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; @@ -2128,15 +2141,34 @@ void UnwrappedLineParser::parseNamespace() { if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); - bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || - (Style.NamespaceIndentation == FormatStyle::NI_Inner && - DeclarationScopeStack.size() > 1); - parseBlock(/*MustBeDeclaration=*/true, AddLevel); + unsigned AddLevels = + Style.NamespaceIndentation == FormatStyle::NI_All || + (Style.NamespaceIndentation == FormatStyle::NI_Inner && + DeclarationScopeStack.size() > 1) + ? 1u + : 0u; + bool ManageWhitesmithsBraces = + AddLevels == 0u && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; + + // If we're in Whitesmiths mode, indent the brace if we're not indenting + // the whole block. + if (ManageWhitesmithsBraces) + ++Line->Level; + + parseBlock(/*MustBeDeclaration=*/true, AddLevels, + /*MunchSemi=*/true, + /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); + // Munch the semicolon after a namespace. This is more common than one would // think. Putting the semicolon into its own line is very ugly. if (FormatTok->Tok.is(tok::semi)) nextToken(); - addUnwrappedLine(); + + addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); + + if (ManageWhitesmithsBraces) + --Line->Level; } // FIXME: Add error handling. } @@ -2222,6 +2254,11 @@ void UnwrappedLineParser::parseDoWhile() { return; } + // If in Whitesmiths mode, the line with the while() needs to be indented + // to the same level as the block. + if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + ++Line->Level; + nextToken(); parseStructuralElement(); } @@ -2234,25 +2271,19 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { if (LeftAlignLabel) Line->Level = 0; - bool RemoveWhitesmithsCaseIndent = - (!Style.IndentCaseBlocks && - Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths); - - if (RemoveWhitesmithsCaseIndent) - --Line->Level; - if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter( - this, Line->Level, Style.BraceWrapping.AfterCaseLabel, - Style.BraceWrapping.IndentBraces || RemoveWhitesmithsCaseIndent); + CompoundStatementIndenter Indenter(this, Line->Level, + Style.BraceWrapping.AfterCaseLabel, + Style.BraceWrapping.IndentBraces); parseBlock(/*MustBeDeclaration=*/false); if (FormatTok->Tok.is(tok::kw_break)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); - if (RemoveWhitesmithsCaseIndent) { + if (!Style.IndentCaseBlocks && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { Line->Level++; } } @@ -2920,17 +2951,29 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, llvm::dbgs() << "\n"; } -void UnwrappedLineParser::addUnwrappedLine() { +void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ if (CurrentLines == &Lines) printDebugInfo(*Line); }); + + // If this line closes a block when in Whitesmiths mode, remember that + // information so that the level can be decreased after the line is added. + // This has to happen after the addition of the line since the line itself + // needs to be indented. + bool ClosesWhitesmithsBlock = + Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; + CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; + + if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) + --Line->Level; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 02b328cb72de..ce135fac5e57 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -85,8 +85,9 @@ class UnwrappedLineParser { void reset(); void parseFile(); void parseLevel(bool HasOpeningBrace); - void parseBlock(bool MustBeDeclaration, bool AddLevel = true, - bool MunchSemi = true); + void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1u, + bool MunchSemi = true, + bool UnindentWhitesmithsBraces = false); void parseChildBlock(); void parsePPDirective(); void parsePPDefine(); @@ -140,7 +141,12 @@ class UnwrappedLineParser { bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); bool tryToParseSimpleAttribute(); - void addUnwrappedLine(); + + // Used by addUnwrappedLine to denote whether to keep or remove a level + // when resetting the line state. + enum class LineLevel { Remove, Keep }; + + void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove); bool eof() const; // LevelDifference is the difference of levels after and before the current // token. For example: diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index ed26bb8a7150..45278fdb69e6 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -14741,6 +14741,7 @@ TEST_F(FormatTest, WhitesmithsBraceBreaking) { WhitesmithsBraceStyle); */ + WhitesmithsBraceStyle.NamespaceIndentation = FormatStyle::NI_None; verifyFormat("namespace a\n" " {\n" "class A\n" @@ -14765,6 +14766,89 @@ TEST_F(FormatTest, WhitesmithsBraceBreaking) { " } // namespace a", WhitesmithsBraceStyle); + verifyFormat("namespace a\n" + " {\n" + "namespace b\n" + " {\n" + "class A\n" + " {\n" + " void f()\n" + " {\n" + " if (true)\n" + " {\n" + " a();\n" + " b();\n" + " }\n" + " }\n" + " void g()\n" + " {\n" + " return;\n" + " }\n" + " };\n" + "struct B\n" + " {\n" + " int x;\n" + " };\n" + " } // namespace b\n" + " } // namespace a", + WhitesmithsBraceStyle); + + WhitesmithsBraceStyle.NamespaceIndentation = FormatStyle::NI_Inner; + verifyFormat("namespace a\n" + " {\n" + "namespace b\n" + " {\n" + " class A\n" + " {\n" + " void f()\n" + " {\n" + " if (true)\n" + " {\n" + " a();\n" + " b();\n" + " }\n" + " }\n" + " void g()\n" + " {\n" + " return;\n" + " }\n" + " };\n" + " struct B\n" + " {\n" + " int x;\n" + " };\n" + " } // namespace b\n" + " } // namespace a", + WhitesmithsBraceStyle); + + WhitesmithsBraceStyle.NamespaceIndentation = FormatStyle::NI_All; + verifyFormat("namespace a\n" + " {\n" + " namespace b\n" + " {\n" + " class A\n" + " {\n" + " void f()\n" + " {\n" + " if (true)\n" + " {\n" + " a();\n" + " b();\n" + " }\n" + " }\n" + " void g()\n" + " {\n" + " return;\n" + " }\n" + " };\n" + " struct B\n" + " {\n" + " int x;\n" + " };\n" + " } // namespace b\n" + " } // namespace a", + WhitesmithsBraceStyle); + verifyFormat("void f()\n" " {\n" " if (true)\n" @@ -14799,7 +14883,7 @@ TEST_F(FormatTest, WhitesmithsBraceBreaking) { " }\n", WhitesmithsBraceStyle); - WhitesmithsBraceStyle.IndentCaseBlocks = true; + WhitesmithsBraceStyle.IndentCaseLabels = true; verifyFormat("void switchTest1(int a)\n" " {\n" " switch (a)\n" @@ -14807,7 +14891,7 @@ TEST_F(FormatTest, WhitesmithsBraceBreaking) { " case 2:\n" " {\n" " }\n" - " break;\n" + " break;\n" " }\n" " }\n", WhitesmithsBraceStyle); @@ -14817,7 +14901,7 @@ TEST_F(FormatTest, WhitesmithsBraceBreaking) { " switch (a)\n" " {\n" " case 0:\n" - " break;\n" + " break;\n" " case 1:\n" " {\n" " break;\n" @@ -14825,9 +14909,9 @@ TEST_F(FormatTest, WhitesmithsBraceBreaking) { " case 2:\n" " {\n" " }\n" - " break;\n" + " break;\n" " default:\n" - " break;\n" + " break;\n" " }\n" " }\n", WhitesmithsBraceStyle); @@ -14840,17 +14924,17 @@ TEST_F(FormatTest, WhitesmithsBraceBreaking) { " {\n" " foo(x);\n" " }\n" - " break;\n" + " break;\n" " default:\n" " {\n" " foo(1);\n" " }\n" - " break;\n" + " break;\n" " }\n" " }\n", WhitesmithsBraceStyle); - WhitesmithsBraceStyle.IndentCaseBlocks = false; + WhitesmithsBraceStyle.IndentCaseLabels = false; verifyFormat("void switchTest4(int a)\n" " {\n" From f78f530bd38472f6bd058a0307484fc5edc57b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 10 Jun 2021 06:48:09 +0200 Subject: [PATCH 283/318] [llvm][PPC] Add missing case for 'I' asm memory operands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From https://llvm.org/docs/LangRef.html#asm-template-argument-modifiers: I: Print the letter ‘i’ if the operand is an integer constant, otherwise nothing. Used to print ‘addi’ vs ‘add’ instructions. Differential Revision: https://reviews.llvm.org/D103968 (cherry picked from commit a9e4f91adf59bbc72541b96dd30245eaeeedf3ce) --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 6 ++++++ llvm/test/CodeGen/PowerPC/asm-template-I.ll | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/asm-template-I.ll diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index cce21f32414a..6257709731b9 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -321,6 +321,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, O << "0, "; printOperand(MI, OpNo, O); return false; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + O << "i"; + return false; case 'U': // Print 'u' for update form. case 'X': // Print 'x' for indexed form. // FIXME: Currently for PowerPC memory operands are always loaded diff --git a/llvm/test/CodeGen/PowerPC/asm-template-I.ll b/llvm/test/CodeGen/PowerPC/asm-template-I.ll new file mode 100644 index 000000000000..f77e6900efc0 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/asm-template-I.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-- | FileCheck %s +; https://bugs.llvm.org/show_bug.cgi?id=50608 + +define dso_local signext i32 @main(i32 signext %argc, i8** %argv) { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stw 3, -4(1) +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: addi 4, 1, -4 +; CHECK-NEXT: #APP +; CHECK-NEXT: .ascii "-1@0(4)" +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr +entry: + call void asm sideeffect " .asciz \22${0:n}@${1:I}$1\22 ", "n,nZr"(i32 1, i32 %argc) + ret i32 0 +} From e7dac564cd0ed9dee74ef972c46622743d90915d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 15 Jun 2021 17:55:27 -0400 Subject: [PATCH 284/318] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20Un-deprecate?= =?UTF-8?q?=20std::allocator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a cherry-pick of 87784cc6fb3453a17e0e78 on 'main' for backporting to LLVM 12. Differential Revision: https://reviews.llvm.org/D104324 --- libcxx/include/memory | 32 +++++------ .../allocator_types.cxx2a.pass.cpp | 23 ++++---- .../PR50299.compile.pass.cpp | 20 +++++++ .../default.allocator/allocator.ctor.pass.cpp | 44 ++++++++------- .../default.allocator/allocator.dtor.pass.cpp | 3 +- .../allocator_pointers.pass.cpp | 4 +- ...cator_types.deprecated_in_cxx17.verify.cpp | 35 +++++++----- .../allocator_types.pass.cpp | 54 ++++++++++--------- ...llocator_types.removed_in_cxx20.verify.cpp | 11 ++-- .../allocator_types.void.compile.pass.cpp} | 28 ++++------ ...ocator_void.deprecated_in_cxx17.verify.cpp | 24 --------- 11 files changed, 137 insertions(+), 141 deletions(-) create mode 100644 libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp rename libcxx/test/{libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp => std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp} (51%) delete mode 100644 libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp diff --git a/libcxx/include/memory b/libcxx/include/memory index 39d0f5bee6a5..efb10c8fd25b 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -99,14 +99,14 @@ struct allocator_traits }; template <> -class allocator // deprecated in C++17, removed in C++20 +class allocator // removed in C++20 { public: - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; + typedef void* pointer; // deprecated in C++17 + typedef const void* const_pointer; // deprecated in C++17 + typedef void value_type; // deprecated in C++17 - template struct rebind {typedef allocator<_Up> other;}; + template struct rebind {typedef allocator<_Up> other;}; // deprecated in C++17 }; template @@ -786,27 +786,27 @@ to_address(const _Pointer& __p) _NOEXCEPT template class allocator; -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS) +#if _LIBCPP_STD_VER <= 17 template <> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator +class _LIBCPP_TEMPLATE_VIS allocator { public: - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef void value_type; - template struct rebind {typedef allocator<_Up> other;}; + template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; template <> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator +class _LIBCPP_TEMPLATE_VIS allocator { public: - typedef const void* pointer; - typedef const void* const_pointer; - typedef const void value_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type; - template struct rebind {typedef allocator<_Up> other;}; + template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; #endif diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp index bfff214127ba..a6134b04a8f5 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp +++ b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.cxx2a.pass.cpp @@ -33,18 +33,19 @@ #include #include -#include "test_macros.h" - -int main(int, char**) -{ - static_assert((std::is_same::size_type, std::size_t>::value), ""); - static_assert((std::is_same::difference_type, std::ptrdiff_t>::value), ""); - static_assert((std::is_same::pointer, char*>::value), ""); - static_assert((std::is_same::const_pointer, const char*>::value), ""); - static_assert((std::is_same::reference, char&>::value), ""); - static_assert((std::is_same::const_reference, const char&>::value), ""); - static_assert((std::is_same::rebind::other, +template +void test() { + static_assert((std::is_same::size_type, std::size_t>::value), ""); + static_assert((std::is_same::difference_type, std::ptrdiff_t>::value), ""); + static_assert((std::is_same::pointer, T*>::value), ""); + static_assert((std::is_same::const_pointer, const T*>::value), ""); + static_assert((std::is_same::reference, T&>::value), ""); + static_assert((std::is_same::const_reference, const T&>::value), ""); + static_assert((std::is_same::template rebind::other, std::allocator >::value), ""); +} +int main(int, char**) { + test(); return 0; } diff --git a/libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp new file mode 100644 index 000000000000..245d3d9d320f --- /dev/null +++ b/libcxx/test/std/utilities/memory/default.allocator/PR50299.compile.pass.cpp @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Make sure we can use std::allocator in all Standard modes. While the +// explicit specialization for std::allocator was deprecated, using that +// specialization was neither deprecated nor removed (in C++20 it should simply +// start using the primary template). +// +// See https://llvm.org/PR50299. + +#include + +std::allocator a; diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp index 28bfe41680b6..6e6ff1f2d134 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.ctor.pass.cpp @@ -7,15 +7,14 @@ //===----------------------------------------------------------------------===// // -// UNSUPPORTED: c++03, c++11, c++14, c++17 // // template // class allocator // { // public: // All of these are constexpr after C++17 -// constexpr allocator() noexcept; -// constexpr allocator(const allocator&) noexcept; -// template constexpr allocator(const allocator&) noexcept; +// allocator() noexcept; +// allocator(const allocator&) noexcept; +// template allocator(const allocator&) noexcept; // ... // }; @@ -24,28 +23,27 @@ #include "test_macros.h" +template +TEST_CONSTEXPR_CXX20 bool test() { + typedef std::allocator A1; + typedef std::allocator A2; -int main(int, char**) -{ - { - typedef std::allocator AC; - typedef std::allocator AL; + A1 a1; + A1 a1_copy = a1; (void)a1_copy; + A2 a2 = a1; (void)a2; - constexpr AC a1; - constexpr AC a2{a1}; - constexpr AL a3{a2}; - (void) a3; - } - { - typedef std::allocator AC; - typedef std::allocator AL; - - constexpr AC a1; - constexpr AC a2{a1}; - constexpr AL a3{a2}; - (void) a3; - } + return true; +} +int main(int, char**) { + test(); + test(); + test(); +#if TEST_STD_VER > 17 + static_assert(test()); + static_assert(test()); + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp index 7ae87dd45353..a095ca102491 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.dtor.pass.cpp @@ -13,7 +13,6 @@ #include - template constexpr bool test() { std::allocator alloc; @@ -26,11 +25,13 @@ constexpr bool test() { int main(int, char**) { test(); + test(); #ifdef _LIBCPP_VERSION // extension test(); #endif // _LIBCPP_VERSION static_assert(test()); + static_assert(test()); #ifdef _LIBCPP_VERSION // extension static_assert(test()); #endif // _LIBCPP_VERSION diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp index 27e91a650e69..777e5dfc44f5 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp @@ -11,9 +11,9 @@ #include #include -// #include - #include "test_macros.h" + +// // // template // struct allocator_traits diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp index f09651a81eee..88af53a4eeaa 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.deprecated_in_cxx17.verify.cpp @@ -30,20 +30,27 @@ // UNSUPPORTED: clang-6 #include -#include "test_macros.h" -int main(int, char**) -{ - typedef std::allocator::pointer AP; // expected-warning {{'pointer' is deprecated}} - typedef std::allocator::const_pointer ACP; // expected-warning {{'const_pointer' is deprecated}} - typedef std::allocator::reference AR; // expected-warning {{'reference' is deprecated}} - typedef std::allocator::const_reference ACR; // expected-warning {{'const_reference' is deprecated}} - typedef std::allocator::rebind::other ARO; // expected-warning {{'rebind' is deprecated}} - - typedef std::allocator::pointer AP2; // expected-warning {{'pointer' is deprecated}} - typedef std::allocator::const_pointer ACP2; // expected-warning {{'const_pointer' is deprecated}} - typedef std::allocator::reference AR2; // expected-warning {{'reference' is deprecated}} - typedef std::allocator::const_reference ACR2; // expected-warning {{'const_reference' is deprecated}} - typedef std::allocator::rebind::other ARO2; // expected-warning {{'rebind' is deprecated}} +int main(int, char**) { + { + typedef std::allocator::pointer Pointer; // expected-warning {{'pointer' is deprecated}} + typedef std::allocator::const_pointer ConstPointer; // expected-warning {{'const_pointer' is deprecated}} + typedef std::allocator::reference Reference; // expected-warning {{'reference' is deprecated}} + typedef std::allocator::const_reference ConstReference; // expected-warning {{'const_reference' is deprecated}} + typedef std::allocator::rebind::other Rebind; // expected-warning {{'rebind' is deprecated}} + } + { + typedef std::allocator::pointer Pointer; // expected-warning {{'pointer' is deprecated}} + typedef std::allocator::const_pointer ConstPointer; // expected-warning {{'const_pointer' is deprecated}} + typedef std::allocator::reference Reference; // expected-warning {{'reference' is deprecated}} + typedef std::allocator::const_reference ConstReference; // expected-warning {{'const_reference' is deprecated}} + typedef std::allocator::rebind::other Rebind; // expected-warning {{'rebind' is deprecated}} + } + { + typedef std::allocator::pointer Pointer; // expected-warning {{'pointer' is deprecated}} + typedef std::allocator::const_pointer ConstPointer; // expected-warning {{'const_pointer' is deprecated}} + // reference and const_reference are not provided by std::allocator + typedef std::allocator::rebind::other Rebind; // expected-warning {{'rebind' is deprecated}} + } return 0; } diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp index 6a034935a30d..74adc6943594 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.pass.cpp @@ -18,47 +18,49 @@ // typedef ptrdiff_t difference_type; // typedef T value_type; // +// typedef T* pointer; // deprecated in C++17, removed in C++20 +// typedef T const* const_pointer; // deprecated in C++17, removed in C++20 +// typedef T& reference; // deprecated in C++17, removed in C++20 +// typedef T const& const_reference; // deprecated in C++17, removed in C++20 +// template< class U > struct rebind { typedef allocator other; }; // deprecated in C++17, removed in C++20 +// // typedef true_type propagate_on_container_move_assignment; // typedef true_type is_always_equal; // ... // }; +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + #include #include #include #include "test_macros.h" -template -TEST_CONSTEXPR_CXX20 bool test() -{ - static_assert((std::is_same::size_type, std::size_t>::value), ""); - static_assert((std::is_same::difference_type, std::ptrdiff_t>::value), ""); - static_assert((std::is_same::value_type, T>::value), ""); - static_assert((std::is_same::propagate_on_container_move_assignment, std::true_type>::value), ""); - static_assert((std::is_same::is_always_equal, std::true_type>::value), ""); +struct U; - std::allocator a; - std::allocator a2 = a; - a2 = a; - std::allocator a3 = a2; - (void)a3; +template +void test() { + typedef std::allocator Alloc; + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); - return true; +#if TEST_STD_VER <= 17 + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); + static_assert((std::is_same::other, std::allocator >::value), ""); +#endif } -int main(int, char**) -{ - test(); -#ifdef _LIBCPP_VERSION // extension - test(); -#endif // _LIBCPP_VERSION - -#if TEST_STD_VER > 17 - static_assert(test()); -#ifdef _LIBCPP_VERSION // extension - static_assert(test()); -#endif // _LIBCPP_VERSION +int main(int, char**) { + test(); +#ifdef _LIBCPP_VERSION + test(); // extension #endif return 0; } diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp index 9805accda20a..1d91a022867d 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.removed_in_cxx20.verify.cpp @@ -31,16 +31,17 @@ template void check() { - typedef typename std::allocator::pointer AP; // expected-error 2 {{no type named 'pointer'}} - typedef typename std::allocator::const_pointer ACP; // expected-error 2 {{no type named 'const_pointer'}} - typedef typename std::allocator::reference AR; // expected-error 2 {{no type named 'reference'}} - typedef typename std::allocator::const_reference ACR; // expected-error 2 {{no type named 'const_reference'}} - typedef typename std::allocator::template rebind::other ARO; // expected-error 2 {{no member named 'rebind'}} + typedef typename std::allocator::pointer AP; // expected-error 3 {{no type named 'pointer'}} + typedef typename std::allocator::const_pointer ACP; // expected-error 3 {{no type named 'const_pointer'}} + typedef typename std::allocator::reference AR; // expected-error 3 {{no type named 'reference'}} + typedef typename std::allocator::const_reference ACR; // expected-error 3 {{no type named 'const_reference'}} + typedef typename std::allocator::template rebind::other ARO; // expected-error 3 {{no member named 'rebind'}} } int main(int, char**) { check(); check(); + check(); return 0; } diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp similarity index 51% rename from libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp rename to libcxx/test/std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp index d03f90b506d5..d7b4a1b1bdf4 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.cxx2a.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator_types.void.compile.pass.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -// +// Check that the nested types of std::allocator are provided. +// After C++17, those are not provided in the primary template and the +// explicit specialization doesn't exist anymore, so this test is moot. -// Check that the following member types of allocator are provided -// regardless of the Standard when we request them from libc++. +// REQUIRES: c++03 || c++11 || c++14 || c++17 // template <> // class allocator @@ -22,24 +23,13 @@ // template struct rebind {typedef allocator<_Up> other;}; // }; -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include #include -#include "test_macros.h" - -int main(int, char**) -{ - static_assert((std::is_same::pointer, void*>::value), ""); - static_assert((std::is_same::const_pointer, const void*>::value), ""); - static_assert((std::is_same::value_type, void>::value), ""); - static_assert((std::is_same::rebind::other, - std::allocator >::value), ""); - std::allocator a; - std::allocator a2 = a; - a2 = a; - - return 0; -} +static_assert((std::is_same::pointer, void*>::value), ""); +static_assert((std::is_same::const_pointer, const void*>::value), ""); +static_assert((std::is_same::value_type, void>::value), ""); +static_assert((std::is_same::rebind::other, + std::allocator >::value), ""); diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp deleted file mode 100644 index cd98e6364b7e..000000000000 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator_void.deprecated_in_cxx17.verify.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// Check that allocator is deprecated in C++17. - -// REQUIRES: c++17 - -#include -#include "test_macros.h" - -int main(int, char**) -{ - typedef std::allocator::pointer AP; // expected-warning {{'allocator' is deprecated}} - typedef std::allocator::const_pointer ACP; // expected-warning {{'allocator' is deprecated}} - typedef std::allocator::rebind::other ARO; // expected-warning {{'allocator' is deprecated}} - return 0; -} From 6412392511340a7f1793a00b5b501692300089e6 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sat, 24 Apr 2021 08:20:13 -0400 Subject: [PATCH 285/318] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20=5F=5Fbit=5Fit?= =?UTF-8?q?erator=20mustn't=20rely=20on=20deprecated=20SMF=20generation.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to turn -Wdeprecated-copy back on. We turned it off in 3b71de41cc7c7 because Clang's implementation became more stringent and started diagnosing the old code here. Differential Revision: https://reviews.llvm.org/D101183 (cherry picked from commit 70d94c3f2cae71ade2ceacdceb3d2e9899d2289a) --- libcxx/include/__bit_reference | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 9cfb4b84e653..d44ad03d3134 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -1114,28 +1114,26 @@ public: #endif {} - // avoid re-declaring a copy constructor for the non-const version. - using __type_for_copy_to_const = - _If<_IsConst, __bit_iterator<_Cp, false>, struct __private_nat>; - + // When _IsConst=false, this is the copy constructor. + // It is non-trivial. Making it trivial would break ABI. + // When _IsConst=true, this is a converting constructor; + // the copy and move constructors are implicitly generated + // and trivial. _LIBCPP_INLINE_VISIBILITY - __bit_iterator(const __type_for_copy_to_const& __it) _NOEXCEPT + __bit_iterator(const __bit_iterator<_Cp, false>& __it) _NOEXCEPT : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - // The non-const __bit_iterator has historically had a non-trivial - // copy constructor (as a quirk of its construction). We need to maintain - // this for ABI purposes. - using __type_for_abi_non_trivial_copy_ctor = - _If; - - _LIBCPP_INLINE_VISIBILITY - __bit_iterator(__type_for_abi_non_trivial_copy_ctor const& __it) _NOEXCEPT - : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - - // Always declare the copy assignment operator since the implicit declaration - // is deprecated. + // When _IsConst=false, we have a user-provided copy constructor, + // so we must also provide a copy assignment operator because + // the implicit generation of a defaulted one is deprecated. + // When _IsConst=true, the assignment operators are + // implicitly generated and trivial. _LIBCPP_INLINE_VISIBILITY - __bit_iterator& operator=(__bit_iterator const&) = default; + __bit_iterator& operator=(const _If<_IsConst, struct __private_nat, __bit_iterator>& __it) { + __seg_ = __it.__seg_; + __ctz_ = __it.__ctz_; + return *this; + } _LIBCPP_INLINE_VISIBILITY reference operator*() const _NOEXCEPT {return reference(__seg_, __storage_type(1) << __ctz_);} From 82796b3c3dab0a109958f3bfbef7f798c6c8d3e2 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 16 Jun 2021 13:30:36 -0400 Subject: [PATCH 286/318] [libc++] Adjust XFAIL for std::tuple deduction tests with GCC GCC has been failing those tests, and we marked them as such in a3ab5120fd572215afeac190757834a041dda73a on 'main'. Cherry-pick only that part of the change into the LLVM 12 release so that we can get the CI green again. --- .../std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp index db0958ca6de1..4951ae22d406 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp @@ -13,7 +13,7 @@ // GCC's implementation of class template deduction is still immature and runs // into issues with libc++. However GCC accepts this code when compiling // against libstdc++. -// XFAIL: gcc-5, gcc-6, gcc-7 +// XFAIL: gcc-5, gcc-6, gcc-7, gcc-8, gcc-9, gcc-10, gcc-11 // From 0d6fc8550b58d5993858de52e330e01e109ddff9 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Wed, 26 May 2021 12:00:03 -0700 Subject: [PATCH 287/318] =?UTF-8?q?[=F0=9F=8D=92][libcxx][nfc]=20Fix=20the?= =?UTF-8?q?=20ASAN=20bots:=20update=20expected.pass.cpp.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensures that `get_return_object`'s return type is the same as the return type for the function calling `co_return`. Otherwise, we try to construct an object, then free it, then return it. Differential Revision: https://reviews.llvm.org/D103196 (cherry picked from commit 52123c96c016143ebfff6de76fe83cebd6c1d726) --- .../support.coroutines/end.to.end/expected.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp index 2bd297660c4e..5b307508abeb 100644 --- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp +++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp @@ -35,7 +35,7 @@ struct expected { struct promise_type { std::shared_ptr data; - std::shared_ptr get_return_object() { data = std::make_shared(); return data; } + expected get_return_object() { data = std::make_shared(); return {data}; } suspend_never initial_suspend() { return {}; } suspend_never final_suspend() noexcept { return {}; } void return_value(T v) { data->val = v; data->error = {}; } From 0193a7da8bdaa9ffcc5bdefd5516c162bb26ab6b Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Thu, 25 Feb 2021 16:05:43 -0600 Subject: [PATCH 288/318] [SystemZ] Assign the full space for promoted and split outgoing args. When a large "irregular" (e.g. i96) integer call argument is converted to indirect, 64-bit parts are stored to the stack. The full stack space (e.g. i128) was not allocated prior to this patch, but rather just the exact space of the original type. This caused neighboring values on the stack to be overwritten. Thanks to Josh Stone for reporting this. Review: Ulrich Weigand Fixes https://bugs.llvm.org/show_bug.cgi?id=49322 Differential Revision: https://reviews.llvm.org/D97514 (cherry picked from commit 52bbbf4d4459239e0f461bc302ada89e2c5d07fc) --- .../Target/SystemZ/SystemZISelLowering.cpp | 22 ++++++-- llvm/test/CodeGen/SystemZ/args-11.ll | 54 +++++++++++++++++++ 2 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/args-11.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 9ace36f344a5..270134d84c61 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1550,6 +1550,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(MF.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); // Detect unsupported vector argument and return types. if (Subtarget.hasVector()) { @@ -1559,7 +1560,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected @@ -1584,14 +1585,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT); + unsigned ArgIndex = Outs[I].OrigArgIndex; + EVT SlotVT; + if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { + // Allocate the full stack space for a promoted (and split) argument. + Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; + EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); + MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); + } else { + SlotVT = Outs[I].ArgVT; + } + SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, MachinePointerInfo::getFixedStack(MF, FI))); // If the original argument was split (e.g. i128), we need // to store all parts of it here (and pass just one address). - unsigned ArgIndex = Outs[I].OrigArgIndex; assert (Outs[I].PartOffset == 0); while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[I + 1]; @@ -1601,6 +1613,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); + assert((PartOffset + PartValue.getValueType().getStoreSize() <= + SlotVT.getStoreSize()) && "Not enough space for argument part!"); ++I; } ArgValue = SpillSlot; @@ -1694,7 +1708,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Assign locations to each value returned by this call. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. diff --git a/llvm/test/CodeGen/SystemZ/args-11.ll b/llvm/test/CodeGen/SystemZ/args-11.ll new file mode 100644 index 000000000000..b355f9d6da15 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/args-11.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test outgoing promoted arguments that are split (and passed by reference). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; The i96 arg is promoted to i128 and should get the full stack space. +declare void @fn1(i96) +define i32 @fn2() { +; CHECK-LABEL: fn2: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -184 +; CHECK-NEXT: .cfi_def_cfa_offset 344 +; CHECK-NEXT: mvhi 180(%r15), -1 +; CHECK-NEXT: mvghi 168(%r15), 0 +; CHECK-NEXT: la %r2, 160(%r15) +; CHECK-NEXT: mvghi 160(%r15), 0 +; CHECK-NEXT: brasl %r14, fn1@PLT +; CHECK-NEXT: l %r2, 180(%r15) +; CHECK-NEXT: lmg %r14, %r15, 296(%r15) +; CHECK-NEXT: br %r14 + %1 = alloca i32 + store i32 -1, i32* %1 + call void @fn1(i96 0) + %2 = load i32, i32* %1 + ret i32 %2 +} + +declare void @fn3(i136) +define i32 @fn4() { +; CHECK-LABEL: fn4: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -192 +; CHECK-NEXT: .cfi_def_cfa_offset 352 +; CHECK-NEXT: mvhi 188(%r15), -1 +; CHECK-NEXT: mvghi 176(%r15), 0 +; CHECK-NEXT: mvghi 168(%r15), 0 +; CHECK-NEXT: la %r2, 160(%r15) +; CHECK-NEXT: mvghi 160(%r15), 0 +; CHECK-NEXT: brasl %r14, fn3@PLT +; CHECK-NEXT: l %r2, 188(%r15) +; CHECK-NEXT: lmg %r14, %r15, 304(%r15) +; CHECK-NEXT: br %r14 + %1 = alloca i32 + store i32 -1, i32* %1 + call void @fn3(i136 0) + %2 = load i32, i32* %1 + ret i32 %2 +} From 9be9215b27196c52177c4fc6edebd31a8f4b7e49 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 11 Jun 2021 15:42:26 +0200 Subject: [PATCH 289/318] AMD k8 family does not support SSE4.x which are required by x86-64-v2+ So don't define __tune__k8__ for these micro architecture. SSE, SSE2 and SSE3 appear in https://www.amd.com/system/files/TechDocs/25112.PDF but not SSE4.x. Differential Revision: https://reviews.llvm.org/D104116 (cherry picked from commit 092c303955cd18be6c0b923b1c0a1b96e2c91893) --- clang/lib/Basic/Targets/X86.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 694a8095e336..80e160bd9190 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -513,9 +513,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_K8: case CK_K8SSE3: case CK_x86_64: - case CK_x86_64_v2: - case CK_x86_64_v3: - case CK_x86_64_v4: defineCPUMacros(Builder, "k8"); break; case CK_AMDFAM10: From 051126fe6ffe0f7c27b260ad29ecc84eb34fac0f Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 14 Jun 2021 11:51:00 +0200 Subject: [PATCH 290/318] Fix -Wswitch warning after 092c303955cd18be6c0b923b1c0a1b96e2c91893. (cherry picked from commit a83ef21ff82e4283044fd31470fc6c1bc4b99c51) --- clang/lib/Basic/Targets/X86.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 80e160bd9190..c5ad1c7d2c2e 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -515,6 +515,10 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_x86_64: defineCPUMacros(Builder, "k8"); break; + case CK_x86_64_v2: + case CK_x86_64_v3: + case CK_x86_64_v4: + break; case CK_AMDFAM10: defineCPUMacros(Builder, "amdfam10"); break; From cc08a27d2ecc1458a8871c989add0b49afc24f12 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 16 Apr 2021 09:50:24 -0700 Subject: [PATCH 291/318] Sanitizer built against glibc 2.34 doesn't work As mentioned in https://gcc.gnu.org/PR100114 , glibc starting with the https://sourceware.org/git/?p=glibc.git;a=commit;h=6c57d320484988e87e446e2e60ce42816bf51d53 change doesn't define SIGSTKSZ and MINSIGSTKSZ macros to constants, but to sysconf function call. sanitizer_posix_libcdep.cpp has static const uptr kAltStackSize = SIGSTKSZ * 4; // SIGSTKSZ is not enough. which is generally fine, just means that when SIGSTKSZ is not a compile time constant will be initialized later. The problem is that kAltStackSize is used in SetAlternateSignalStack which is called very early, from .preinit_array initialization, i.e. far before file scope variables are constructed, which means it is not initialized and mmapping 0 will fail: ==145==ERROR: AddressSanitizer failed to allocate 0x0 (0) bytes of SetAlternateSignalStack (error code: 22) Here is one possible fix, another one could be to make kAltStackSize a preprocessor macro if _SG_SIGSTKSZ is defined (but perhaps with having an automatic const variable initialized to it so that sysconf isn't at least called twice during SetAlternateSignalStack. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D100645 (cherry picked from commit 82150606fb11d28813ae6da1101f5bda638165fe) --- .../lib/sanitizer_common/sanitizer_posix_libcdep.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp index d29438cf9dbd..2b10bdd37293 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp @@ -165,7 +165,11 @@ bool SupportsColoredOutput(fd_t fd) { #if !SANITIZER_GO // TODO(glider): different tools may require different altstack size. -static const uptr kAltStackSize = SIGSTKSZ * 4; // SIGSTKSZ is not enough. +static uptr GetAltStackSize() { + // SIGSTKSZ is not enough. + static const uptr kAltStackSize = SIGSTKSZ * 4; + return kAltStackSize; +} void SetAlternateSignalStack() { stack_t altstack, oldstack; @@ -176,10 +180,10 @@ void SetAlternateSignalStack() { // TODO(glider): the mapped stack should have the MAP_STACK flag in the // future. It is not required by man 2 sigaltstack now (they're using // malloc()). - void* base = MmapOrDie(kAltStackSize, __func__); + void *base = MmapOrDie(GetAltStackSize(), __func__); altstack.ss_sp = (char*) base; altstack.ss_flags = 0; - altstack.ss_size = kAltStackSize; + altstack.ss_size = GetAltStackSize(); CHECK_EQ(0, sigaltstack(&altstack, nullptr)); } @@ -187,7 +191,7 @@ void UnsetAlternateSignalStack() { stack_t altstack, oldstack; altstack.ss_sp = nullptr; altstack.ss_flags = SS_DISABLE; - altstack.ss_size = kAltStackSize; // Some sane value required on Darwin. + altstack.ss_size = GetAltStackSize(); // Some sane value required on Darwin. CHECK_EQ(0, sigaltstack(&altstack, &oldstack)); UnmapOrDie(oldstack.ss_sp, oldstack.ss_size); } From 385a6f37fefbeb1397f1c3733f328bb2b0403e2b Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 14 May 2021 11:45:10 +0200 Subject: [PATCH 292/318] Prevent generation of dependency on _cxa_guard for static initialization This fixes an issue introduced by https://reviews.llvm.org/D70662 Function-scope static initialization are guarded in C++, so we should probably not use it because it introduces a dependency on __cxa_guard* symbols. In the context of clang, libasan is linked statically, and it currently needs to the odd situation where compiling C code with clang and asan requires -lstdc++ Differential Revision: https://reviews.llvm.org/D102475 (cherry picked from commit 414482751452e54710f16bae58458c66298aaf69) --- .../lib/sanitizer_common/sanitizer_posix_libcdep.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp index 2b10bdd37293..12603da1750d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp @@ -166,9 +166,10 @@ bool SupportsColoredOutput(fd_t fd) { #if !SANITIZER_GO // TODO(glider): different tools may require different altstack size. static uptr GetAltStackSize() { - // SIGSTKSZ is not enough. - static const uptr kAltStackSize = SIGSTKSZ * 4; - return kAltStackSize; + // Note: since GLIBC_2.31, SIGSTKSZ may be a function call, so this may be + // more costly that you think. However GetAltStackSize is only call 2-3 times + // per thread so don't cache the evaluation. + return SIGSTKSZ * 4; } void SetAlternateSignalStack() { From b690ec54817d9f4ad0e6e16f88a12044660b794f Mon Sep 17 00:00:00 2001 From: Joachim Meyer Date: Tue, 8 Jun 2021 18:16:08 +0200 Subject: [PATCH 293/318] [LV] Parallel annotated loop does not imply all loads can be hoisted. As noted in https://bugs.llvm.org/show_bug.cgi?id=46666, the current behavior of assuming if-conversion safety if a loop is annotated parallel (`!llvm.loop.parallel_accesses`), is not expectable, the documentation for this behavior was since removed from the LangRef again, and can lead to invalid reads. This was observed in POCL (https://github.com/pocl/pocl/issues/757) and would require similar workarounds in current work at hipSYCL. The question remains why this was initially added and what the implications of removing this optimization would be. Do we need an alternative mechanism to propagate the information about legality of if-conversion? Or is the idea that conditional loads in `#pragma clang loop vectorize(assume_safety)` can be executed unmasked without additional checks flawed in general? I think this implication is not part of what a user of that pragma (and corresponding metadata) would expect and thus dangerous. Only two additional tests failed, which are adapted in this patch. Depending on the further direction force-ifcvt.ll should be removed or further adapted. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D103907 (cherry picked from commit 4f01122c3f6c70beee8f736f196a09976602685f) --- .../Vectorize/LoopVectorizationLegality.h | 15 +++---- .../Vectorize/LoopVectorizationLegality.cpp | 14 ++----- .../LoopVectorize/X86/force-ifcvt.ll | 42 ------------------- .../X86/tail_folding_and_assume_safety.ll | 4 +- 4 files changed, 10 insertions(+), 65 deletions(-) delete mode 100644 llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 2f80b4373b46..246db0fd2dd9 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -396,22 +396,17 @@ class LoopVectorizationLegality { bool canVectorizeOuterLoop(); /// Return true if all of the instructions in the block can be speculatively - /// executed, and record the loads/stores that require masking. If's that - /// guard loads can be ignored under "assume safety" unless \p PreserveGuards - /// is true. This can happen when we introduces guards for which the original - /// "unguarded-loads are safe" assumption does not hold. For example, the - /// vectorizer's fold-tail transformation changes the loop to execute beyond - /// its original trip-count, under a proper guard, which should be preserved. + /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know /// that we can read from them without segfault. /// \p MaskedOp is a list of instructions that have to be transformed into /// calls to the appropriate masked intrinsic when the loop is vectorized. /// \p ConditionalAssumes is a list of assume instructions in predicated /// blocks that must be dropped if the CFG gets flattened. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs, - SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards = false) const; + bool blockCanBePredicated( + BasicBlock *BB, SmallPtrSetImpl &SafePtrs, + SmallPtrSetImpl &MaskedOp, + SmallPtrSetImpl &ConditionalAssumes) const; /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 2ab0848193f6..b8c21a0e1cd3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -925,10 +925,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { bool LoopVectorizationLegality::blockCanBePredicated( BasicBlock *BB, SmallPtrSetImpl &SafePtrs, SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards) const { - const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); - + SmallPtrSetImpl &ConditionalAssumes) const { for (Instruction &I : *BB) { // Check that we don't have a constant expression that can trap as operand. for (Value *Operand : I.operands()) { @@ -956,11 +953,7 @@ bool LoopVectorizationLegality::blockCanBePredicated( if (!LI) return false; if (!SafePtrs.count(LI->getPointerOperand())) { - // !llvm.mem.parallel_loop_access implies if-conversion safety. - // Otherwise, record that the load needs (real or emulated) masking - // and let the cost model decide. - if (!IsAnnotatedParallel || PreserveGuards) - MaskedOp.insert(LI); + MaskedOp.insert(LI); continue; } } @@ -1276,8 +1269,7 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() { // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp, - TmpConditionalAssumes, - /* MaskAllLoads= */ true)) { + TmpConditionalAssumes)) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n"); return false; } diff --git a/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll b/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll deleted file mode 100644 index 07b98b4fb001..000000000000 --- a/llvm/test/Transforms/LoopVectorize/X86/force-ifcvt.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt -loop-vectorize -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind uwtable -define void @Test(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p) #0 { -entry: - br label %for.body - -; CHECK-LABEL: @Test -; CHECK: <4 x i32> - -for.body: ; preds = %cond.end, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cond.end ] - %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !1 - %cmp1 = icmp eq i32 %0, 0 - %arrayidx3 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv - %1 = load i32, i32* %arrayidx3, align 4, !llvm.access.group !1 - br i1 %cmp1, label %cond.end, label %cond.false - -cond.false: ; preds = %for.body - %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv - %2 = load i32, i32* %arrayidx7, align 4, !llvm.access.group !1 - %add = add nsw i32 %2, %1 - br label %cond.end - -cond.end: ; preds = %for.body, %cond.false - %cond = phi i32 [ %add, %cond.false ], [ %1, %for.body ] - store i32 %cond, i32* %arrayidx3, align 4, !llvm.access.group !1 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 16 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %cond.end - ret void -} - -attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" } - -!0 = distinct !{!0, !{!"llvm.loop.parallel_accesses", !1}} -!1 = distinct !{} diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll index 10477ddce9c9..5f9c477746db 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll @@ -51,7 +51,7 @@ for.inc: br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !8 } -; Case2: With pragma assume_safety only the store is masked. +; Case2: With pragma assume_safety both, load and store are masked. ; void assume_safety(int * p, int * q1, int * q2, int guard) { ; #pragma clang loop vectorize(assume_safety) ; for(int ix=0; ix < 1021; ++ix) { @@ -63,7 +63,7 @@ for.inc: ;CHECK-LABEL: @assume_safety ;CHECK: vector.body: -;CHECK-NOT: @llvm.masked.load +;CHECK: call <8 x i32> @llvm.masked.load ;CHECK: call void @llvm.masked.store ; Function Attrs: norecurse nounwind uwtable From 275ffa580880f6e18bf9742cad8e5dcab67b1f1d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 16 Jun 2021 12:35:00 -0400 Subject: [PATCH 294/318] [libc++] Make sure std::allocator is always trivial When we removed the allocator specialization, the triviality of std::allocator changed because the primary template had a non-trivial default constructor and the specialization didn't (so std::allocator went from trivial to non-trivial). This commit fixes that oversight by giving a trivial constructor to the primary template when instantiated on cv-void. This was reported in https://llvm.org/PR50299. (cherry picked from commit 71e4d434dc83b02a853712a5cb026ee2fa9ba67f) Differential Revision: https://reviews.llvm.org/D104486 --- libcxx/include/memory | 30 ++++++++++++++-- .../allocator_void.trivial.compile.pass.cpp | 34 +++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp diff --git a/libcxx/include/memory b/libcxx/include/memory index efb10c8fd25b..e02846b4035c 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -810,10 +810,35 @@ public: }; #endif +// This class provides a non-trivial default constructor to the class that derives from it +// if the condition is satisfied. +// +// The second template parameter exists to allow giving a unique type to __non_trivial_if, +// which makes it possible to avoid breaking the ABI when making this a base class of an +// existing class. Without that, imagine we have classes D1 and D2, both of which used to +// have no base classes, but which now derive from __non_trivial_if. The layout of a class +// that inherits from both D1 and D2 will change because the two __non_trivial_if base +// classes are not allowed to share the same address. +// +// By making those __non_trivial_if base classes unique, we work around this problem and +// it is safe to start deriving from __non_trivial_if in existing classes. +template +struct __non_trivial_if { }; + +template +struct __non_trivial_if { + _LIBCPP_INLINE_VISIBILITY + _LIBCPP_CONSTEXPR __non_trivial_if() _NOEXCEPT { } +}; + // allocator +// +// Note: For ABI compatibility between C++20 and previous standards, we make +// allocator trivial in C++20. template class _LIBCPP_TEMPLATE_VIS allocator + : private __non_trivial_if::value, allocator<_Tp> > { public: typedef size_t size_type; @@ -823,7 +848,7 @@ public: typedef true_type is_always_equal; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - allocator() _NOEXCEPT { } + allocator() _NOEXCEPT _LIBCPP_DEFAULT template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 @@ -895,6 +920,7 @@ public: template class _LIBCPP_TEMPLATE_VIS allocator + : private __non_trivial_if::value, allocator > { public: typedef size_t size_type; @@ -904,7 +930,7 @@ public: typedef true_type is_always_equal; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - allocator() _NOEXCEPT { } + allocator() _NOEXCEPT _LIBCPP_DEFAULT template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 diff --git a/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp b/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp new file mode 100644 index 000000000000..f9d67c065de8 --- /dev/null +++ b/libcxx/test/libcxx/memory/allocator_void.trivial.compile.pass.cpp @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Make sure that std::allocator is trivial. This was the case before C++20 +// with the std::allocator explicit specialization, and this test makes sure +// that we maintain that property across all standards. +// +// This is important since triviality has implications on how the type is passed +// as a function argument in the ABI. + +#include +#include + +typedef std::allocator A1; +typedef std::allocator A2; +struct A3 : std::allocator { }; +struct A4 : std::allocator { }; + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); + +static_assert(std::is_trivially_default_constructible::value, ""); +static_assert(std::is_trivial::value, ""); From 894c0c889707dd7797c54af8ebe2e91f80e97236 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 22 Jun 2021 12:43:54 -0400 Subject: [PATCH 295/318] [libc++] Fix CI on release/12.x branch The new Docker images don't have gcc or g++, only the versioned equivalents. --- libcxx/utils/ci/run-buildbot | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 1fb09f3a91aa..d3880816779f 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -168,8 +168,8 @@ generic-32bit) check-cxx-cxxabi ;; generic-gcc) - export CC=gcc - export CXX=g++ + export CC=gcc-10 + export CXX=g++-10 clean # FIXME: Re-enable experimental testing on GCC. GCC cares about the order # in which we link -lc++experimental, which causes issues. From 884040db086936107ec81656aa5b4c607235fb9a Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 20 May 2021 18:55:11 +0100 Subject: [PATCH 296/318] libsanitizer: Remove cyclades inclusion in sanitizer The Linux kernel has removed the interface to cyclades from the latest kernel headers[1] due to them being orphaned for the past 13 years. libsanitizer uses this header when compiling against glibc, but glibcs itself doesn't seem to have any references to cyclades. Further more it seems that the driver is broken in the kernel and the firmware doesn't seem to be available anymore. As such since this is breaking the build of libsanitizer (and so the GCC bootstrap[2]) I propose to remove this. [1] https://lkml.org/lkml/2021/3/2/153 [2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100379 Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D102059 (cherry picked from commit 68d5235cb58f988c71b403334cd9482d663841ab) --- .../sanitizer_common_interceptors_ioctl.inc | 9 --------- .../sanitizer_platform_limits_posix.cpp | 11 ----------- .../sanitizer_platform_limits_posix.h | 10 ---------- 3 files changed, 30 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc index 7f181258eab5..b7da65987557 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc @@ -370,15 +370,6 @@ static void ioctl_table_fill() { #if SANITIZER_GLIBC // _(SIOCDEVPLIP, WRITE, struct_ifreq_sz); // the same as EQL_ENSLAVE - _(CYGETDEFTHRESH, WRITE, sizeof(int)); - _(CYGETDEFTIMEOUT, WRITE, sizeof(int)); - _(CYGETMON, WRITE, struct_cyclades_monitor_sz); - _(CYGETTHRESH, WRITE, sizeof(int)); - _(CYGETTIMEOUT, WRITE, sizeof(int)); - _(CYSETDEFTHRESH, NONE, 0); - _(CYSETDEFTIMEOUT, NONE, 0); - _(CYSETTHRESH, NONE, 0); - _(CYSETTIMEOUT, NONE, 0); _(EQL_EMANCIPATE, WRITE, struct_ifreq_sz); _(EQL_ENSLAVE, WRITE, struct_ifreq_sz); _(EQL_GETMASTRCFG, WRITE, struct_ifreq_sz); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 12dd39e674ac..7abaeb880bf3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -143,7 +143,6 @@ typedef struct user_fpregs elf_fpregset_t; # include #endif #include -#include #include #include #include @@ -459,7 +458,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); #if SANITIZER_GLIBC unsigned struct_ax25_parms_struct_sz = sizeof(struct ax25_parms_struct); - unsigned struct_cyclades_monitor_sz = sizeof(struct cyclades_monitor); #if EV_VERSION > (0x010000) unsigned struct_input_keymap_entry_sz = sizeof(struct input_keymap_entry); #else @@ -823,15 +821,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID - unsigned IOCTL_CYGETDEFTHRESH = CYGETDEFTHRESH; - unsigned IOCTL_CYGETDEFTIMEOUT = CYGETDEFTIMEOUT; - unsigned IOCTL_CYGETMON = CYGETMON; - unsigned IOCTL_CYGETTHRESH = CYGETTHRESH; - unsigned IOCTL_CYGETTIMEOUT = CYGETTIMEOUT; - unsigned IOCTL_CYSETDEFTHRESH = CYSETDEFTHRESH; - unsigned IOCTL_CYSETDEFTIMEOUT = CYSETDEFTIMEOUT; - unsigned IOCTL_CYSETTHRESH = CYSETTHRESH; - unsigned IOCTL_CYSETTIMEOUT = CYSETTIMEOUT; unsigned IOCTL_EQL_EMANCIPATE = EQL_EMANCIPATE; unsigned IOCTL_EQL_ENSLAVE = EQL_ENSLAVE; unsigned IOCTL_EQL_GETMASTRCFG = EQL_GETMASTRCFG; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 836b178c131b..8a156b7fcb80 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -983,7 +983,6 @@ extern unsigned struct_vt_mode_sz; #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned struct_ax25_parms_struct_sz; -extern unsigned struct_cyclades_monitor_sz; extern unsigned struct_input_keymap_entry_sz; extern unsigned struct_ipx_config_data_sz; extern unsigned struct_kbdiacrs_sz; @@ -1328,15 +1327,6 @@ extern unsigned IOCTL_VT_WAITACTIVE; #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID -extern unsigned IOCTL_CYGETDEFTHRESH; -extern unsigned IOCTL_CYGETDEFTIMEOUT; -extern unsigned IOCTL_CYGETMON; -extern unsigned IOCTL_CYGETTHRESH; -extern unsigned IOCTL_CYGETTIMEOUT; -extern unsigned IOCTL_CYSETDEFTHRESH; -extern unsigned IOCTL_CYSETDEFTIMEOUT; -extern unsigned IOCTL_CYSETTHRESH; -extern unsigned IOCTL_CYSETTIMEOUT; extern unsigned IOCTL_EQL_EMANCIPATE; extern unsigned IOCTL_EQL_ENSLAVE; extern unsigned IOCTL_EQL_GETMASTRCFG; From 0680e2b5a1182abc1d2fdb6730ef7d742a4d6345 Mon Sep 17 00:00:00 2001 From: jasonliu Date: Mon, 12 Apr 2021 19:22:12 +0000 Subject: [PATCH 297/318] [libc++] add `inline` for __open's definition in ifstream and ofstream Summary: When building with gcc on AIX, it seems that gcc does not like the `always_inline` without the `inline` keyword. So adding the inline keywords in for __open in ifstream and ofstream. That will also make it consistent with __open in basic_filebuf (it seems we added `inline` there before for gcc build as well). Differential Revision: https://reviews.llvm.org/D99422 (cherry picked from commit 52e9d80d5db20f23979e409f958736d130387f9e) --- libcxx/include/fstream | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libcxx/include/fstream b/libcxx/include/fstream index d7d6b46c32d9..7b1bbfe16c01 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -244,7 +244,7 @@ public: return open(__p.c_str(), __mode); } #endif - inline _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY basic_filebuf* __open(int __fd, ios_base::openmode __mode); #endif basic_filebuf* close(); @@ -574,7 +574,7 @@ basic_filebuf<_CharT, _Traits>::open(const char* __s, ios_base::openmode __mode) } template -inline _LIBCPP_INLINE_VISIBILITY +inline basic_filebuf<_CharT, _Traits>* basic_filebuf<_CharT, _Traits>::__open(int __fd, ios_base::openmode __mode) { basic_filebuf<_CharT, _Traits>* __rt = nullptr; @@ -1326,6 +1326,7 @@ basic_ifstream<_CharT, _Traits>::open(const string& __s, ios_base::openmode __mo } template +inline void basic_ifstream<_CharT, _Traits>::__open(int __fd, ios_base::openmode __mode) { if (__sb_.__open(__fd, __mode | ios_base::in)) @@ -1539,6 +1540,7 @@ basic_ofstream<_CharT, _Traits>::open(const string& __s, ios_base::openmode __mo } template +inline void basic_ofstream<_CharT, _Traits>::__open(int __fd, ios_base::openmode __mode) { if (__sb_.__open(__fd, __mode | ios_base::out)) From 5cb4200739750a7866817c22de32b50819cd76b5 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Thu, 8 Apr 2021 22:01:35 -0700 Subject: [PATCH 298/318] [libcxx] Allow shared_ptr's unique_ptr converting constructor to support array types. Refs: https://bugs.llvm.org/show_bug.cgi?id=32147 Differential Revision: https://reviews.llvm.org/D80882 (cherry picked from commit 097d77d611d1e1b3972be661fdc3caaa4d1824b4) --- libcxx/include/memory | 10 +-- .../unique_ptr_Y.pass.cpp | 90 +++++++++++++++++++ .../unique_ptr.pass.cpp | 88 +++++++++++++++++- 3 files changed, 178 insertions(+), 10 deletions(-) diff --git a/libcxx/include/memory b/libcxx/include/memory index e02846b4035c..62235cf72b35 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -2771,7 +2771,6 @@ public: typename enable_if < !is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type = __nat()); @@ -2780,7 +2779,6 @@ public: typename enable_if < is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type = __nat()); @@ -2821,7 +2819,6 @@ public: template typename enable_if < - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, shared_ptr& >::type @@ -3183,7 +3180,6 @@ shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r, typename enable_if < !is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type) @@ -3196,7 +3192,7 @@ shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r, #endif { typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk; + typedef __shared_ptr_pointer::pointer, _Dp, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__r.get(), __r.get_deleter(), _AllocT()); __enable_weak_this(__r.get(), __r.get()); } @@ -3209,7 +3205,6 @@ shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r, typename enable_if < is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type) @@ -3222,7 +3217,7 @@ shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r, #endif { typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer<_Yp*, + typedef __shared_ptr_pointer::pointer, reference_wrapper::type>, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__r.get(), _VSTD::ref(__r.get_deleter()), _AllocT()); @@ -3306,7 +3301,6 @@ template inline typename enable_if < - !is_array<_Yp>::value && is_convertible::pointer, typename shared_ptr<_Tp>::element_type*>::value, shared_ptr<_Tp>& diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp index 0096897ee07e..f9e1798d8a7a 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp @@ -41,6 +41,19 @@ struct A int A::count = 0; +template +struct StatefulArrayDeleter { + int state = 0; + + StatefulArrayDeleter(int val = 0) : state(val) {} + StatefulArrayDeleter(StatefulArrayDeleter const&) { assert(false); } + + void operator()(T* ptr) { + assert(state == 42); + delete []ptr; + } +}; + int main(int, char**) { { @@ -112,5 +125,82 @@ int main(int, char**) assert(B::count == 0); assert(A::count == 0); + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p; + p = std::move(ptr); + } + +#if TEST_STD_VER > 14 + { + StatefulArrayDeleter d; + std::unique_ptr&> u(new A[4], d); + std::shared_ptr p; + p = std::move(u); + d.state = 42; + assert(A::count == 4); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p; + p = std::move(ptr); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p; + p = std::move(ptr); + } +#endif // TEST_STD_VER >= 14 + return 0; } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp index 398c64ee6d74..ad88a3e8a7df 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/unique_ptr.pass.cpp @@ -10,7 +10,7 @@ // -// template explicit shared_ptr(unique_ptr&&r); +// template shared_ptr(unique_ptr&&r); #include #include @@ -69,6 +69,19 @@ struct StatefulDeleter { } }; +template +struct StatefulArrayDeleter { + int state = 0; + + StatefulArrayDeleter(int val = 0) : state(val) {} + StatefulArrayDeleter(StatefulArrayDeleter const&) { assert(false); } + + void operator()(T* ptr) { + assert(state == 42); + delete []ptr; + } +}; + int main(int, char**) { { @@ -135,5 +148,76 @@ int main(int, char**) std::shared_ptr s = std::move(u); } - return 0; + assert(A::count == 0); + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p(std::move(ptr)); + } + +#if TEST_STD_VER > 14 + { + StatefulArrayDeleter d; + std::unique_ptr&> u(new A[4], d); + std::shared_ptr p(std::move(u)); + d.state = 42; + assert(A::count == 4); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(B::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + assert(B::count == 0); + + { + std::unique_ptr ptr(new A[8]); + A* raw_ptr = ptr.get(); + std::shared_ptr p(std::move(ptr)); + assert(A::count == 8); + assert(p.use_count() == 1); + assert(p.get() == raw_ptr); + assert(ptr.get() == 0); + } + assert(A::count == 0); + + { + std::unique_ptr ptr(new int[8]); + std::shared_ptr p(std::move(ptr)); + } +#endif // TEST_STD_VER >= 14 + + return 0; } From fa21c5d4cf8cf00347e89bc3c50fbaf6a5c185dd Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 19 Jan 2021 13:04:01 -0500 Subject: [PATCH 299/318] [libc++] Make feature-test macros consistent with availability macros Before this patch, feature-test macros didn't take special availability markup into account, which means that feature-test macros can sometimes appear to "lie". For example, if you compile in C++20 mode and target macOS 10.13, the __cpp_lib_filesystem feature-test macro will be provided even though the declarations are marked as unavailable. This patch fixes that. rdar://68142369 Differential Revision: https://reviews.llvm.org/D94983 (cherry picked from commit 76fc35752d19ac605c1c1fd757af9c7c3bb4a906) --- libcxx/include/__availability | 43 +++++++ libcxx/include/version | 16 +-- .../fs.req.macros/feature_macro.pass.cpp | 30 ----- .../atomic.version.pass.cpp | 8 +- .../barrier.version.pass.cpp | 8 +- .../filesystem.version.pass.cpp | 48 +++++--- .../latch.version.pass.cpp | 8 +- .../semaphore.version.pass.cpp | 8 +- .../shared_mutex.version.pass.cpp | 28 ++--- .../version.version.pass.cpp | 108 ++++++++++-------- .../generate_feature_test_macro_components.py | 26 +++-- 11 files changed, 192 insertions(+), 139 deletions(-) delete mode 100644 libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp diff --git a/libcxx/include/__availability b/libcxx/include/__availability index db2267c8eb16..cc3b6fabdab1 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -43,6 +43,14 @@ // as unavailable. When vendors decide to ship the feature as part of their // shared library, they can update the markup appropriately. // +// Furthermore, many features in the standard library have corresponding +// feature-test macros. When a feature is made unavailable on some deployment +// target, a macro should be defined to signal that it is unavailable. That +// macro can then be picked up when feature-test macros are generated (see +// generate_feature_test_macro_components.py) to make sure that feature-test +// macros don't announce a feature as being implemented if it has been marked +// as unavailable. +// // Note that this mechanism is disabled by default in the "upstream" libc++. // Availability annotations are only meaningful when shipping libc++ inside // a platform (i.e. as a system library), and so vendors that want them should @@ -76,6 +84,8 @@ // This controls the availability of std::shared_mutex and std::shared_timed_mutex, // which were added to the dylib later. # define _LIBCPP_AVAILABILITY_SHARED_MUTEX +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex // These macros control the availability of std::bad_optional_access and // other exception types. These were put in the shared library to prevent @@ -114,6 +124,7 @@ # define _LIBCPP_AVAILABILITY_FILESYSTEM # define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH # define _LIBCPP_AVAILABILITY_FILESYSTEM_POP +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem // This controls the availability of std::to_chars. # define _LIBCPP_AVAILABILITY_TO_CHARS @@ -122,6 +133,10 @@ // which requires shared library support for various operations // (see libcxx/src/atomic.cpp). # define _LIBCPP_AVAILABILITY_SYNC +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore #elif defined(__APPLE__) @@ -130,6 +145,14 @@ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex +# endif + # define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ __attribute__((availability(macosx,strict,introduced=10.13))) \ __attribute__((availability(ios,strict,introduced=11.0))) \ @@ -139,27 +162,34 @@ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS + # define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) + # define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) + # define _LIBCPP_AVAILABILITY_FUTURE_ERROR \ __attribute__((availability(ios,strict,introduced=6.0))) + # define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_FILESYSTEM \ __attribute__((availability(macosx,strict,introduced=10.15))) \ __attribute__((availability(ios,strict,introduced=13.0))) \ @@ -175,10 +205,23 @@ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem +# endif + # define _LIBCPP_AVAILABILITY_TO_CHARS \ _LIBCPP_AVAILABILITY_FILESYSTEM + + // Note: Those are not ABI-stable yet, so we can't ship them. # define _LIBCPP_AVAILABILITY_SYNC \ __attribute__((unavailable)) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore #else diff --git a/libcxx/include/version b/libcxx/include/version index 813bc1ab9e6a..c021db8bddd7 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -184,7 +184,7 @@ __cpp_lib_void_t 201411L # define __cpp_lib_quoted_string_io 201304L # define __cpp_lib_result_of_sfinae 201210L # define __cpp_lib_robust_nonmodifying_seq_ops 201304L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # define __cpp_lib_shared_timed_mutex 201402L # endif # define __cpp_lib_string_udls 201304L @@ -213,7 +213,9 @@ __cpp_lib_void_t 201411L # define __cpp_lib_clamp 201603L # define __cpp_lib_enable_shared_from_this 201603L // # define __cpp_lib_execution 201603L -# define __cpp_lib_filesystem 201703L +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# define __cpp_lib_filesystem 201703L +# endif # define __cpp_lib_gcd_lcm 201606L // # define __cpp_lib_hardware_interference_size 201703L # if defined(_LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS) @@ -241,7 +243,7 @@ __cpp_lib_void_t 201411L # define __cpp_lib_raw_memory_algorithms 201606L # define __cpp_lib_sample 201603L # define __cpp_lib_scoped_lock 201703L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # define __cpp_lib_shared_mutex 201505L # endif # define __cpp_lib_shared_ptr_arrays 201611L @@ -279,10 +281,10 @@ __cpp_lib_void_t 201411L # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_atomic_value_initialization 201911L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # define __cpp_lib_atomic_wait 201907L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # define __cpp_lib_barrier 201907L # endif // # define __cpp_lib_bind_front 201907L @@ -326,7 +328,7 @@ __cpp_lib_void_t 201411L # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_jthread 201911L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # define __cpp_lib_latch 201907L # endif # define __cpp_lib_list_remove_return_type 201806L @@ -336,7 +338,7 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_polymorphic_allocator 201902L // # define __cpp_lib_ranges 201811L # define __cpp_lib_remove_cvref 201711L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # define __cpp_lib_semaphore 201907L # endif # define __cpp_lib_shift 201806L diff --git a/libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp deleted file mode 100644 index 6b7052b2403d..000000000000 --- a/libcxx/test/std/input.output/filesystems/fs.req.macros/feature_macro.pass.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// #define __cpp_lib_filesystem 201703L - -#include -#include "test_macros.h" - -#if TEST_STD_VER >= 17 -#ifndef __cpp_lib_filesystem -#error Filesystem feature test macro is not defined (__cpp_lib_filesystem) -#elif __cpp_lib_filesystem != 201703L -#error Filesystem feature test macro has an incorrect value (__cpp_lib_filesystem) -#endif -#else // TEST_STD_VER < 17 -#ifdef __cpp_lib_filesystem -#error Filesystem feature test macro should not be defined before C++17 -#endif -#endif - -int main(int, char**) { - return 0; -} diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp index b964e0c6921e..c8a837115ade 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp @@ -248,7 +248,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++20" # endif @@ -257,7 +257,7 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif @@ -367,7 +367,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++2b" # endif @@ -376,7 +376,7 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp index b193095403e9..80f7d9ca24b4 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/barrier.version.pass.cpp @@ -44,7 +44,7 @@ #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++20" # endif @@ -53,13 +53,13 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++2b" # endif @@ -68,7 +68,7 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp index 6e47bdf3b5fe..c361569cb1d6 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp @@ -51,11 +51,17 @@ # error "__cpp_lib_char8_t should not be defined before c++20" # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++17" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++17" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif #elif TEST_STD_VER == 20 @@ -73,11 +79,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++20" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++20" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif #elif TEST_STD_VER > 20 @@ -95,11 +107,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++2b" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++2b" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif #endif // TEST_STD_VER > 20 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp index 29e8fd617bbf..56db3ba66b45 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/latch.version.pass.cpp @@ -44,7 +44,7 @@ #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++20" # endif @@ -53,13 +53,13 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++2b" # endif @@ -68,7 +68,7 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp index febeb6f6c615..79e31aa06a3f 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/semaphore.version.pass.cpp @@ -44,7 +44,7 @@ #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++20" # endif @@ -53,13 +53,13 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++2b" # endif @@ -68,7 +68,7 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp index 953fd0a37790..d26a453f83c3 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/shared_mutex.version.pass.cpp @@ -41,7 +41,7 @@ # error "__cpp_lib_shared_mutex should not be defined before c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++14" # endif @@ -50,13 +50,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif #elif TEST_STD_VER == 17 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++17" # endif @@ -65,11 +65,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++17" # endif @@ -78,13 +78,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif #elif TEST_STD_VER == 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++20" # endif @@ -93,11 +93,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++20" # endif @@ -106,13 +106,13 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif #elif TEST_STD_VER > 20 -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++2b" # endif @@ -121,11 +121,11 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++2b" # endif @@ -134,7 +134,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp index 9e96e2e116e0..1de99be54e30 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp @@ -1133,7 +1133,7 @@ # error "__cpp_lib_shared_ptr_weak_type should not be defined before c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++14" # endif @@ -1142,7 +1142,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif @@ -1534,11 +1534,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++17" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++17" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++17" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif # ifndef __cpp_lib_gcd_lcm @@ -1883,7 +1889,7 @@ # error "__cpp_lib_semaphore should not be defined before c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++17" # endif @@ -1892,7 +1898,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif @@ -1910,7 +1916,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++17" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++17" # endif @@ -1919,7 +1925,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif @@ -2223,7 +2229,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++20" # endif @@ -2232,11 +2238,11 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++20" # endif @@ -2245,7 +2251,7 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif @@ -2575,11 +2581,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++20" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++20" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++20" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif # ifndef __cpp_lib_gcd_lcm @@ -2795,7 +2807,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++20" # endif @@ -2804,7 +2816,7 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif @@ -3019,7 +3031,7 @@ # error "__cpp_lib_scoped_lock should have the value 201703L in c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++20" # endif @@ -3028,11 +3040,11 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++20" # endif @@ -3041,7 +3053,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif @@ -3059,7 +3071,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++20" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++20" # endif @@ -3068,7 +3080,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif @@ -3429,7 +3441,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # ifndef __cpp_lib_atomic_wait # error "__cpp_lib_atomic_wait should be defined in c++2b" # endif @@ -3438,11 +3450,11 @@ # endif # else # ifdef __cpp_lib_atomic_wait -# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_atomic_wait should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # ifndef __cpp_lib_barrier # error "__cpp_lib_barrier should be defined in c++2b" # endif @@ -3451,7 +3463,7 @@ # endif # else # ifdef __cpp_lib_barrier -# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_barrier should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) is not defined!" # endif # endif @@ -3781,11 +3793,17 @@ # endif # endif -# ifndef __cpp_lib_filesystem -# error "__cpp_lib_filesystem should be defined in c++2b" -# endif -# if __cpp_lib_filesystem != 201703L -# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# ifndef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should be defined in c++2b" +# endif +# if __cpp_lib_filesystem != 201703L +# error "__cpp_lib_filesystem should have the value 201703L in c++2b" +# endif +# else +# ifdef __cpp_lib_filesystem +# error "__cpp_lib_filesystem should not be defined when !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) is not defined!" +# endif # endif # ifndef __cpp_lib_gcd_lcm @@ -4004,7 +4022,7 @@ # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # ifndef __cpp_lib_latch # error "__cpp_lib_latch should be defined in c++2b" # endif @@ -4013,7 +4031,7 @@ # endif # else # ifdef __cpp_lib_latch -# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_latch should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) is not defined!" # endif # endif @@ -4228,7 +4246,7 @@ # error "__cpp_lib_scoped_lock should have the value 201703L in c++2b" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # ifndef __cpp_lib_semaphore # error "__cpp_lib_semaphore should be defined in c++2b" # endif @@ -4237,11 +4255,11 @@ # endif # else # ifdef __cpp_lib_semaphore -# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_semaphore should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) is not defined!" # endif # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # ifndef __cpp_lib_shared_mutex # error "__cpp_lib_shared_mutex should be defined in c++2b" # endif @@ -4250,7 +4268,7 @@ # endif # else # ifdef __cpp_lib_shared_mutex -# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) is not defined!" # endif # endif @@ -4268,7 +4286,7 @@ # error "__cpp_lib_shared_ptr_weak_type should have the value 201606L in c++2b" # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # ifndef __cpp_lib_shared_timed_mutex # error "__cpp_lib_shared_timed_mutex should be defined in c++2b" # endif @@ -4277,7 +4295,7 @@ # endif # else # ifdef __cpp_lib_shared_timed_mutex -# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) is not defined!" +# error "__cpp_lib_shared_timed_mutex should not be defined when !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) is not defined!" # endif # endif diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 00de15dae24a..342e15691eb9 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -111,14 +111,14 @@ def add_version_header(tc): "name": "__cpp_lib_atomic_wait", "values": { "c++20": 201907 }, "headers": ["atomic"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait)", }, { "name": "__cpp_lib_barrier", "values": { "c++20": 201907 }, "headers": ["barrier"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier)", }, { "name": "__cpp_lib_bind_front", "values": { "c++20": 201907 }, @@ -270,6 +270,8 @@ def add_version_header(tc): "name": "__cpp_lib_filesystem", "values": { "c++17": 201703 }, "headers": ["filesystem"], + "depends": "!defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem)", + "internal_depends": "!defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem)" }, { "name": "__cpp_lib_gcd_lcm", "values": { "c++17": 201606 }, @@ -383,8 +385,8 @@ def add_version_header(tc): "name": "__cpp_lib_latch", "values": { "c++20": 201907 }, "headers": ["latch"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch)", }, { "name": "__cpp_lib_launder", "values": { "c++17": 201606 }, @@ -496,14 +498,14 @@ def add_version_header(tc): "name": "__cpp_lib_semaphore", "values": { "c++20": 201907 }, "headers": ["semaphore"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore)", }, { "name": "__cpp_lib_shared_mutex", "values": { "c++17": 201505 }, "headers": ["shared_mutex"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex)", }, { "name": "__cpp_lib_shared_ptr_arrays", "values": { "c++17": 201611 }, @@ -516,8 +518,8 @@ def add_version_header(tc): "name": "__cpp_lib_shared_timed_mutex", "values": { "c++14": 201402 }, "headers": ["shared_mutex"], - "depends": "!defined(_LIBCPP_HAS_NO_THREADS)", - "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS)", + "depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex)", + "internal_depends": "!defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex)", }, { "name": "__cpp_lib_shift", "values": { "c++20": 201806 }, From 6c57bab74f6ffc8f40c5a805dedb5deb76c80458 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 3 Mar 2021 09:55:02 -0800 Subject: [PATCH 300/318] [clang] Don't assert in EmitAggregateCopy on trivial_abi types Fixes PR42961. Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D97872 (cherry picked from commit c8227f06b3356cdc9cc757d8888dfb59a6d8ad89) --- clang/lib/CodeGen/CGExprAgg.cpp | 2 +- clang/test/CodeGenCXX/trivial_abi.cpp | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 60ea1b2af037..f3ab91559d30 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -2056,7 +2056,7 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, Record->hasTrivialCopyAssignment() || Record->hasTrivialMoveConstructor() || Record->hasTrivialMoveAssignment() || - Record->isUnion()) && + Record->hasAttr() || Record->isUnion()) && "Trying to aggregate-copy a type without a trivial copy/move " "constructor or assignment operator"); // Ignore empty classes in C++. diff --git a/clang/test/CodeGenCXX/trivial_abi.cpp b/clang/test/CodeGenCXX/trivial_abi.cpp index ac41f5cac086..a4222c100311 100644 --- a/clang/test/CodeGenCXX/trivial_abi.cpp +++ b/clang/test/CodeGenCXX/trivial_abi.cpp @@ -262,3 +262,21 @@ void calleeExceptionLarge(Large, Large); void testExceptionLarge() { calleeExceptionLarge(Large(), Large()); } + +// PR42961 + +// CHECK: define{{.*}} @"_ZN3$_08__invokeEv"() +// CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_SMALL]], align 8 +// CHECK: %[[COERCE:.*]] = alloca %[[STRUCT_SMALL]], align 8 +// CHECK: %[[CALL:.*]] = call{{.*}} @"_ZNK3$_0clEv" +// CHECK: %[[COERCEDIVE:.*]] = getelementptr{{.*}} %[[COERCE]] +// CHECK: %[[COERCEVALIP:.*]] = inttoptr{{.*}} %[[CALL]] +// CHECK: %[[RETVALP:.*]] = bitcast %[[STRUCT_SMALL]]* %[[RETVAL]] +// CHECK: %[[COERCEP:.*]] = bitcast %[[STRUCT_SMALL]]* %[[COERCE]] +// CHECK: call {{.*}}memcpy{{.*}} %[[RETVALP]]{{.*}} %[[COERCEP]] +// CHECK: %[[COERCEDIVE1:.*]] = getelementptr{{.*}} %[[RETVAL]] +// CHECK: %[[TMP:.*]] = load{{.*}} %[[COERCEDIVE1]] +// CHECK: %[[COERCEVALPI:.*]] = ptrtoint{{.*}} %[[TMP]] +// CHECK: ret{{.*}} %[[COERCEVALPI]] + +Small (*fp)() = []() -> Small { return Small(); }; From 0eae129baeb589b905223a1e9c8115937bc541b7 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Mon, 12 Apr 2021 18:05:18 +0100 Subject: [PATCH 301/318] [ConstantMerge] Don't merge thread_local constants with non-thread_local constants Fixes PR49932 Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D100322 (cherry picked from commit ad9ce8142dd5b90f725ad362feb054d52a35aa1f) --- llvm/lib/Transforms/IPO/ConstantMerge.cpp | 2 ++ llvm/test/Transforms/ConstantMerge/dont-merge.ll | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 67f1438b9b6a..8e81f4bad4af 100644 --- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -95,6 +95,8 @@ isUnmergeableGlobal(GlobalVariable *GV, // Only process constants with initializers in the default address space. return !GV->isConstant() || !GV->hasDefinitiveInitializer() || GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch thread-local variables. + GV->isThreadLocal() || // Don't touch values marked with attribute(used). UsedGlobals.count(GV); } diff --git a/llvm/test/Transforms/ConstantMerge/dont-merge.ll b/llvm/test/Transforms/ConstantMerge/dont-merge.ll index 21e390785df5..b0dab923d2cb 100644 --- a/llvm/test/Transforms/ConstantMerge/dont-merge.ll +++ b/llvm/test/Transforms/ConstantMerge/dont-merge.ll @@ -80,3 +80,15 @@ define void @test4(i32** %P1, i32** %P2, i32** %P3, i32** %P4, i32** %P5, i32** store i32* @T4D2, i32** %P8 ret void } + +; CHECK: @T5tls +; CHECK: @T5ua + +@T5tls = private thread_local constant i32 555 +@T5ua = private unnamed_addr constant i32 555 + +define void @test5(i32** %P1, i32** %P2) { + store i32* @T5tls, i32** %P1 + store i32* @T5ua, i32** %P2 + ret void +} From 88c6773026d8e2b013ec2bb1d72593550d18d42f Mon Sep 17 00:00:00 2001 From: Vy Nguyen Date: Thu, 4 Mar 2021 12:13:07 -0500 Subject: [PATCH 302/318] Reland 293e8fa13d3f05e993771577a4c022deee5cbf6e [llvm-exegesis] Disable the LBR check on AMD https://bugs.llvm.org/show_bug.cgi?id=48918 The bug reported a hang (or very very slow runtime) on a Zen2. Unfortunately, we don't have the hardware right now to debug it and I was not able to reproduce the bug on a HSW. Theory we've got is that the lbr-checking code could be confused on AMD. Differential Revision: https://reviews.llvm.org/D97504 New change: - Surround usages of x86 helper in llvm-exegesis/X86/Target.cpp with ifdef - Fix bug which caused the caller of getVendorSignature to not have a copy of EAX that it expected. (cherry picked from commit f8b01d54c31547f3ecb25fb8a1912183026bffa7) --- llvm/include/llvm/Support/Host.h | 14 +++++ llvm/lib/Support/Host.cpp | 68 +++++++++++++++++---- llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 19 +++++- 3 files changed, 85 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Support/Host.h b/llvm/include/llvm/Support/Host.h index d4ef389450cc..b3c15f0683b9 100644 --- a/llvm/include/llvm/Support/Host.h +++ b/llvm/include/llvm/Support/Host.h @@ -65,6 +65,20 @@ namespace sys { StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForBPF(); + + /// Helper functions to extract CPU details from CPUID on x86. + namespace x86 { + enum class VendorSignatures { + UNKNOWN, + GENUINE_INTEL, + AUTHENTIC_AMD, + }; + + /// Returns the host CPU's vendor. + /// MaxLeaf: if a non-nullptr pointer is specified, the EAX value will be + /// assigned to its pointee. + VendorSignatures getVendorSignature(unsigned *MaxLeaf = nullptr); + } // namespace x86 } } } diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index a1bd3cc12f1d..79482dfacd29 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -417,11 +417,6 @@ StringRef sys::detail::getHostCPUNameForBPF() { #if defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) -enum VendorSignatures { - SIG_INTEL = 0x756e6547 /* Genu */, - SIG_AMD = 0x68747541 /* Auth */ -}; - // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID // support. Consequently, for i386, the presence of CPUID is checked first @@ -495,6 +490,42 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, #endif } +namespace llvm { +namespace sys { +namespace detail { +namespace x86 { + +VendorSignatures getVendorSignature(unsigned *MaxLeaf) { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + if (MaxLeaf == nullptr) + MaxLeaf = &EAX; + else + *MaxLeaf = 0; + + if (!isCpuIdSupported()) + return VendorSignatures::UNKNOWN; + + if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) + return VendorSignatures::UNKNOWN; + + // "Genu ineI ntel" + if (EBX == 0x756e6547 && ECX == 0x6c65746e && EDX == 0x49656e69) + return VendorSignatures::GENUINE_INTEL; + + // "Auth enti cAMD" + if (EBX == 0x68747541 && ECX == 0x69746e65 && EDX == 0x444d4163) + return VendorSignatures::AUTHENTIC_AMD; + + return VendorSignatures::UNKNOWN; +} + +} // namespace x86 +} // namespace detail +} // namespace sys +} // namespace llvm + +using namespace llvm::sys::detail::x86; + /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return /// the 4 values in the specified arguments. If we can't run cpuid on the host, /// return true. @@ -1092,14 +1123,12 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, } StringRef sys::getHostCPUName() { - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - unsigned MaxLeaf, Vendor; - - if (!isCpuIdSupported()) + unsigned MaxLeaf = 0; + const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); + if (Vendor == VendorSignatures::UNKNOWN) return "generic"; - if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) - return "generic"; + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); unsigned Family = 0, Model = 0; @@ -1114,10 +1143,10 @@ StringRef sys::getHostCPUName() { StringRef CPU; - if (Vendor == SIG_INTEL) { + if (Vendor == VendorSignatures::GENUINE_INTEL) { CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); - } else if (Vendor == SIG_AMD) { + } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); } @@ -1219,6 +1248,19 @@ StringRef sys::getHostCPUName() { } #else StringRef sys::getHostCPUName() { return "generic"; } +namespace llvm { +namespace sys { +namespace detail { +namespace x86 { + +VendorSignatures getVendorSignature(unsigned *MaxLeaf) { + return VendorSignatures::UNKNOWN; +} + +} // namespace x86 +} // namespace detail +} // namespace sys +} // namespace llvm #endif #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 15fa54e2f6a2..8839f00a69de 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Host.h" #include #include @@ -727,13 +728,25 @@ class ExegesisX86Target : public ExegesisTarget { #if defined(__linux__) && defined(HAVE_LIBPFM) && \ defined(LIBPFM_HAS_FIELD_CYCLES) - // If the kernel supports it, the hardware still may not have it. - return X86LbrCounter::checkLbrSupport(); + // FIXME: Fix this. + // https://bugs.llvm.org/show_bug.cgi?id=48918 + // For now, only do the check if we see an Intel machine because + // the counter uses some intel-specific magic and it could + // be confuse and think an AMD machine actually has LBR support. +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ + defined(_M_X64) + using namespace sys::detail::x86; + + if (getVendorSignature() == VendorSignatures::GENUINE_INTEL) + // If the kernel supports it, the hardware still may not have it. + return X86LbrCounter::checkLbrSupport(); #else + llvm_unreachable("Running X86 exegesis on non-X86 target"); +#endif +#endif return llvm::make_error( "LBR not supported on this kernel and/or platform", llvm::errc::not_supported); -#endif } std::unique_ptr withSavedState() const override { From 1a9f4b3a3890786dbee252e542338f906da98b3c Mon Sep 17 00:00:00 2001 From: Vy Nguyen Date: Wed, 10 Mar 2021 02:41:58 -0500 Subject: [PATCH 303/318] [llvm] Fix thinko in getVendorSignature(), where expected values of ECX and EDX were flipped for the AMD case. Follow up to D97504 Differential Revision: https://reviews.llvm.org/D98322 (cherry picked from commit 64d2c326b7f01942f0179fb797070e5cefbba303) --- llvm/lib/Support/Host.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 79482dfacd29..09146c47ff2c 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -509,11 +509,11 @@ VendorSignatures getVendorSignature(unsigned *MaxLeaf) { return VendorSignatures::UNKNOWN; // "Genu ineI ntel" - if (EBX == 0x756e6547 && ECX == 0x6c65746e && EDX == 0x49656e69) + if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) return VendorSignatures::GENUINE_INTEL; // "Auth enti cAMD" - if (EBX == 0x68747541 && ECX == 0x69746e65 && EDX == 0x444d4163) + if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) return VendorSignatures::AUTHENTIC_AMD; return VendorSignatures::UNKNOWN; From edd770b4bc99eb83dcf6ec1d2af92da42de2c575 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Thu, 17 Jun 2021 19:06:10 -0700 Subject: [PATCH 304/318] [Coroutine] Properly deal with byval and noalias parameters This patch is to address https://bugs.llvm.org/show_bug.cgi?id=48857. Previous attempts can be found in D104007 and D101980. A lot of discussions can be found in those two patches. To summarize the bug: When Clang emits IR for coroutines, the first thing it does is to make a copy of every argument to the local stack, so that uses of the arguments in the function will all refer to the local copies instead of the arguments directly. However, in some cases we find that arguments are still directly used: When Clang emits IR for a function that has pass-by-value arguments, sometimes it emits an argument with byval attribute. A byval attribute is considered to be local to the function (just like alloca) and hence it can be easily determined that it does not alias other values. If in the IR there exists a memcpy from a byval argument to a local alloca, and then from that local alloca to another alloca, MemCpyOpt will optimize out the first memcpy because byval argument's content will not change. This causes issues because after a coroutine suspension, the byval argument may die outside of the function, and latter uses will lead to memory use-after-free. This is only a problem for arguments with either byval attribute or noalias attribute, because only these two kinds are considered local. Arguments without these two attributes will be considered to alias coro_suspend and hence we won't have this problem. So we need to be able to deal with these two attributes in coroutines properly. For noalias arguments, since coro_suspend may potentially change the value of any argument outside of the function, we simply shouldn't mark any argument in a coroutiune as noalias. This can be taken care of in CoroEarly pass. For byval arguments, if such an argument needs to live across suspensions, we will have to copy their value content to the frame, not just the pointer. Differential Revision: https://reviews.llvm.org/D104184 (cherry picked from commit 3522167efd80e2fef42a865cdf7481d60d062603) --- llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 9 ++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 31 ++++- .../Transforms/Coroutines/coro-byval-param.ll | 127 ++++++++++++++++++ .../Coroutines/coro-noalias-param.ll | 40 ++++++ 4 files changed, 202 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/Coroutines/coro-byval-param.ll create mode 100644 llvm/test/Transforms/Coroutines/coro-noalias-param.ll diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 1660e41ba830..5e5e513cdfda 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -149,6 +149,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { bool Changed = false; CoroIdInst *CoroId = nullptr; SmallVector CoroFrees; + bool HasCoroSuspend = false; for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) { Instruction &I = *IB++; if (auto *CB = dyn_cast(&I)) { @@ -163,6 +164,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { // pass expects that there is at most one final suspend point. if (cast(&I)->isFinal()) CB->setCannotDuplicate(); + HasCoroSuspend = true; break; case Intrinsic::coro_end_async: case Intrinsic::coro_end: @@ -213,6 +215,13 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { if (CoroId) for (CoroFreeInst *CF : CoroFrees) CF->setArgOperand(0, CoroId); + // Coroutine suspention could potentially lead to any argument modified + // outside of the function, hence arguments should not have noalias + // attributes. + if (HasCoroSuspend) + for (Argument &A : F.args()) + if (A.hasNoAliasAttr()) + A.removeAttr(Attribute::NoAlias); return Changed; } diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index e53e7605b254..e1e0d50979dc 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -781,7 +781,13 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, PromiseAlloca, DenseMap>{}, false); // Create an entry for every spilled value. for (auto &S : FrameData.Spills) { - FieldIDType Id = B.addField(S.first->getType(), None); + Type *FieldType = S.first->getType(); + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + if (const Argument *A = dyn_cast(S.first)) + if (A->hasByValAttr()) + FieldType = FieldType->getPointerElementType(); + FieldIDType Id = B.addField(FieldType, None); FrameData.setFieldIndex(S.first, Id); } @@ -1149,6 +1155,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, // Create a store instruction storing the value into the // coroutine frame. Instruction *InsertPt = nullptr; + bool NeedToCopyArgPtrValue = false; if (auto *Arg = dyn_cast(Def)) { // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of @@ -1159,6 +1166,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, // from the coroutine function. Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); + if (Arg->hasByValAttr()) + NeedToCopyArgPtrValue = true; + } else if (auto *CSI = dyn_cast(Def)) { // Don't spill immediately after a suspend; splitting assumes // that the suspend will be followed by a branch. @@ -1193,7 +1203,15 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, Builder.SetInsertPoint(InsertPt); auto *G = Builder.CreateConstInBoundsGEP2_32( FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); - Builder.CreateStore(Def, G); + if (NeedToCopyArgPtrValue) { + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + auto *Value = + Builder.CreateLoad(Def->getType()->getPointerElementType(), Def); + Builder.CreateStore(Value, G); + } else { + Builder.CreateStore(Def, G); + } BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; @@ -1207,9 +1225,12 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData, auto *GEP = GetFramePointer(E.first); GEP->setName(E.first->getName() + Twine(".reload.addr")); - CurrentReload = Builder.CreateLoad( - FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, - E.first->getName() + Twine(".reload")); + if (NeedToCopyArgPtrValue) + CurrentReload = GEP; + else + CurrentReload = Builder.CreateLoad( + FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, + E.first->getName() + Twine(".reload")); TinyPtrVector DIs = FindDbgDeclareUses(Def); for (DbgDeclareInst *DDI : DIs) { diff --git a/llvm/test/Transforms/Coroutines/coro-byval-param.ll b/llvm/test/Transforms/Coroutines/coro-byval-param.ll new file mode 100644 index 000000000000..6c3c4582fc8b --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-byval-param.ll @@ -0,0 +1,127 @@ +; RUN: opt < %s -passes=coro-split -S | FileCheck %s +%promise_type = type { i8 } +%struct.A = type <{ i64, i64, i32, [4 x i8] }> + +; Function Attrs: noinline ssp uwtable mustprogress +define %promise_type* @foo(%struct.A* nocapture readonly byval(%struct.A) align 8 %a1) #0 { +entry: + %__promise = alloca %promise_type, align 1 + %a2 = alloca %struct.A, align 8 + %0 = getelementptr inbounds %promise_type, %promise_type* %__promise, i64 0, i32 0 + %1 = call token @llvm.coro.id(i32 16, i8* nonnull %0, i8* bitcast (%promise_type* (%struct.A*)* @foo to i8*), i8* null) + %2 = call i1 @llvm.coro.alloc(token %1) + br i1 %2, label %coro.alloc, label %coro.init + +coro.alloc: ; preds = %entry + %3 = call i64 @llvm.coro.size.i64() + %call = call noalias nonnull i8* @_Znwm(i64 %3) #9 + br label %coro.init + +coro.init: ; preds = %coro.alloc, %entry + %4 = phi i8* [ null, %entry ], [ %call, %coro.alloc ] + %5 = call i8* @llvm.coro.begin(token %1, i8* %4) #10 + %6 = bitcast %struct.A* %a1 to i8* + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) #2 + %call2 = call %promise_type* @_ZN4task12promise_type17get_return_objectEv(%promise_type* nonnull dereferenceable(1) %__promise) + call void @initial_suspend(%promise_type* nonnull dereferenceable(1) %__promise) + %7 = call token @llvm.coro.save(i8* null) + call fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8* %5) #2 + %8 = call i8 @llvm.coro.suspend(token %7, i1 false) + switch i8 %8, label %coro.ret [ + i8 0, label %init.ready + i8 1, label %cleanup33 + ] + +init.ready: ; preds = %coro.init + %9 = bitcast %struct.A* %a2 to i8* + call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %9) #2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %6, i64 24, i1 false) + call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %9) #2 + call void @_ZN4task12promise_type13final_suspendEv(%promise_type* nonnull dereferenceable(1) %__promise) #2 + %10 = call token @llvm.coro.save(i8* null) + call fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8* %5) #2 + %11 = call i8 @llvm.coro.suspend(token %10, i1 true) #10 + %switch = icmp ult i8 %11, 2 + br i1 %switch, label %cleanup33, label %coro.ret + +cleanup33: ; preds = %init.ready, %coro.init + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) #2 + %12 = call i8* @llvm.coro.free(token %1, i8* %5) + %.not = icmp eq i8* %12, null + br i1 %.not, label %coro.ret, label %coro.free + +coro.free: ; preds = %cleanup33 + call void @_ZdlPv(i8* nonnull %12) #2 + br label %coro.ret + +coro.ret: ; preds = %coro.free, %cleanup33, %init.ready, %coro.init + %13 = call i1 @llvm.coro.end(i8* null, i1 false) #10 + ret %promise_type* %call2 +} + +; check that the frame contains the entire struct, instead of just the struct pointer +; CHECK: %foo.Frame = type { void (%foo.Frame*)*, void (%foo.Frame*)*, %promise_type, %struct.A, i1 } + +; Function Attrs: argmemonly nounwind readonly +declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1 + +; Function Attrs: nounwind +declare i1 @llvm.coro.alloc(token) #2 + +; Function Attrs: nobuiltin nofree allocsize(0) +declare nonnull i8* @_Znwm(i64) local_unnamed_addr #3 + +; Function Attrs: nounwind readnone +declare i64 @llvm.coro.size.i64() #4 + +; Function Attrs: nounwind +declare i8* @llvm.coro.begin(token, i8* writeonly) #2 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #6 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare %promise_type* @_ZN4task12promise_type17get_return_objectEv(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare void @initial_suspend(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2 + +; Function Attrs: nounwind +declare token @llvm.coro.save(i8*) #2 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare hidden fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8*) unnamed_addr #7 align 2 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 + +; Function Attrs: nounwind +declare i8 @llvm.coro.suspend(token, i1) #2 + +; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress +declare void @_ZN4task12promise_type13final_suspendEv(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2 + +; Function Attrs: nounwind +declare i1 @llvm.coro.end(i8*, i1) #2 + +; Function Attrs: nobuiltin nounwind +declare void @_ZdlPv(i8*) local_unnamed_addr #8 + +; Function Attrs: argmemonly nounwind readonly +declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1 + +attributes #0 = { noinline ssp uwtable mustprogress "coroutine.presplit"="1" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind } +attributes #3 = { nobuiltin nofree allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind readnone } +attributes #5 = { argmemonly nofree nosync nounwind willreturn } +attributes #6 = { argmemonly nofree nounwind willreturn } +attributes #7 = { noinline nounwind ssp uwtable willreturn mustprogress "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #8 = { nobuiltin nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #9 = { allocsize(0) } +attributes #10 = { noduplicate } + diff --git a/llvm/test/Transforms/Coroutines/coro-noalias-param.ll b/llvm/test/Transforms/Coroutines/coro-noalias-param.ll new file mode 100644 index 000000000000..0b9a70ad0366 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-noalias-param.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -S -passes=coro-early | FileCheck %s +%struct.A = type <{ i64, i64, i32, [4 x i8] }> + +define void @f(%struct.A* nocapture readonly noalias align 8 %a) { + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @print(i32 0) + %s1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %s1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + call void @print(i32 1) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret void +} + +; check that the noalias attribute is removed from the argument +; CHECK: define void @f(%struct.A* nocapture readonly align 8 %a) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @print(i32) +declare void @free(i8*) From 02b775a5efb6127e289bb00d91b88a303d51c85a Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Tue, 9 Mar 2021 08:31:23 +0100 Subject: [PATCH 305/318] [nfc] llvm-dwarfdump: DWARFAbbreviationDeclaration::AttributeSpec -> DWARFAttribute `AttributeSpec` does not contain values while `DWARFAttribute` already does. Therefore one no longer needs to pass `uint64_t *OffsetPtr`. Differential Revision: https://reviews.llvm.org/D98194 (cherry picked from commit ba8907bf6f2cf5ca2f6d92a6dfe7bc9cf74f003f) --- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 427f6f4942c3..c2760dd02471 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -69,7 +69,7 @@ static void dumpRanges(const DWARFObject &Obj, raw_ostream &OS, } } -static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, +static void dumpLocation(raw_ostream &OS, const DWARFFormValue &FormValue, DWARFUnit *U, unsigned Indent, DIDumpOptions DumpOpts) { DWARFContext &Ctx = U->getContext(); @@ -230,21 +230,22 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) { } static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, - uint64_t *OffsetPtr, dwarf::Attribute Attr, - dwarf::Form Form, unsigned Indent, + const DWARFAttribute &AttrValue, unsigned Indent, DIDumpOptions DumpOpts) { if (!Die.isValid()) return; const char BaseIndent[] = " "; OS << BaseIndent; OS.indent(Indent + 2); + dwarf::Attribute Attr = AttrValue.Attr; WithColor(OS, HighlightColor::Attribute) << formatv("{0}", Attr); + dwarf::Form Form = AttrValue.Value.getForm(); if (DumpOpts.Verbose || DumpOpts.ShowForm) OS << formatv(" [{0}]", Form); DWARFUnit *U = Die.getDwarfUnit(); - DWARFFormValue FormValue = DWARFFormValue::createFromUnit(Form, U, OffsetPtr); + const DWARFFormValue &FormValue = AttrValue.Value; OS << "\t("; @@ -631,15 +632,14 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, OS << '\n'; // Dump all data in the DIE for the attributes. - for (const auto &AttrSpec : AbbrevDecl->attributes()) { - if (AttrSpec.Form == DW_FORM_implicit_const) { + for (const DWARFAttribute &AttrValue : attributes()) { + if (AttrValue.Value.getForm() == DW_FORM_implicit_const) { // We are dumping .debug_info section , // implicit_const attribute values are not really stored here, // but in .debug_abbrev section. So we just skip such attrs. continue; } - dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form, - Indent, DumpOpts); + dumpAttribute(OS, *this, AttrValue, Indent, DumpOpts); } DWARFDie child = getFirstChild(); From e8a397203c67adbeae04763ce25c6a5ae76af52c Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Tue, 9 Mar 2021 09:26:17 +0100 Subject: [PATCH 306/318] llvm-dwarfdump: Fix DWARF-5 DW_FORM_implicit_const (used by GCC) Differential Revision: https://reviews.llvm.org/D98195 (cherry picked from commit 4289a7f1d78972e9f1fa173c8ee0f6b8b45223d7) --- .../DWARF/DWARFAbbreviationDeclaration.h | 10 ++++++ llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 23 ++++++------- llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp | 5 +++ llvm/test/DebugInfo/implicit-const-test2.s | 34 +++++++++++++++++++ 4 files changed, 60 insertions(+), 12 deletions(-) create mode 100644 llvm/test/DebugInfo/implicit-const-test2.s diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index 39ae53c4e7fe..cf4c827b9267 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -111,6 +111,16 @@ class DWARFAbbreviationDeclaration { return AttributeSpecs[idx].Attr; } + bool getAttrIsImplicitConstByIndex(uint32_t idx) const { + assert(idx < AttributeSpecs.size()); + return AttributeSpecs[idx].isImplicitConst(); + } + + int64_t getAttrImplicitConstValueByIndex(uint32_t idx) const { + assert(idx < AttributeSpecs.size()); + return AttributeSpecs[idx].getImplicitConstValue(); + } + /// Get the index of the specified attribute. /// /// Searches the this abbreviation declaration for the index of the specified diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index c2760dd02471..5a55f3a04148 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -632,15 +632,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, OS << '\n'; // Dump all data in the DIE for the attributes. - for (const DWARFAttribute &AttrValue : attributes()) { - if (AttrValue.Value.getForm() == DW_FORM_implicit_const) { - // We are dumping .debug_info section , - // implicit_const attribute values are not really stored here, - // but in .debug_abbrev section. So we just skip such attrs. - continue; - } + for (const DWARFAttribute &AttrValue : attributes()) dumpAttribute(OS, *this, AttrValue, Indent, DumpOpts); - } DWARFDie child = getFirstChild(); if (DumpOpts.ShowChildren && DumpOpts.ChildRecurseDepth > 0 && child) { @@ -723,10 +716,16 @@ void DWARFDie::attribute_iterator::updateForIndex( // Add the previous byte size of any previous attribute value. AttrValue.Offset += AttrValue.ByteSize; uint64_t ParseOffset = AttrValue.Offset; - auto U = Die.getDwarfUnit(); - assert(U && "Die must have valid DWARF unit"); - AttrValue.Value = DWARFFormValue::createFromUnit( - AbbrDecl.getFormByIndex(Index), U, &ParseOffset); + if (AbbrDecl.getAttrIsImplicitConstByIndex(Index)) + AttrValue.Value = DWARFFormValue::createFromSValue( + AbbrDecl.getFormByIndex(Index), + AbbrDecl.getAttrImplicitConstValueByIndex(Index)); + else { + auto U = Die.getDwarfUnit(); + assert(U && "Die must have valid DWARF unit"); + AttrValue.Value = DWARFFormValue::createFromUnit( + AbbrDecl.getFormByIndex(Index), U, &ParseOffset); + } AttrValue.ByteSize = ParseOffset - AttrValue.Offset; } else { assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only"); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 7a84605211fb..2559765876d9 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -168,6 +168,7 @@ bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, case DW_FORM_line_strp: case DW_FORM_GNU_ref_alt: case DW_FORM_GNU_strp_alt: + case DW_FORM_implicit_const: if (Optional FixedSize = dwarf::getFixedFormByteSize(Form, Params)) { *OffsetPtr += *FixedSize; @@ -345,6 +346,9 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, case DW_FORM_ref_sig8: Value.uval = Data.getU64(OffsetPtr, &Err); break; + case DW_FORM_implicit_const: + // Value has been already set by DWARFFormValue::createFromSValue. + break; default: // DWARFFormValue::skipValue() will have caught this and caused all // DWARF DIEs to fail to be parsed, so this code is not be reachable. @@ -482,6 +486,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { break; case DW_FORM_sdata: + case DW_FORM_implicit_const: OS << Value.sval; break; case DW_FORM_udata: diff --git a/llvm/test/DebugInfo/implicit-const-test2.s b/llvm/test/DebugInfo/implicit-const-test2.s new file mode 100644 index 000000000000..dd50592afde9 --- /dev/null +++ b/llvm/test/DebugInfo/implicit-const-test2.s @@ -0,0 +1,34 @@ +# REQUIRES: x86-registered-target + +# RUN: llvm-mc --filetype=obj --triple=x86_64-pc-linux %s -o %t.o -g + +# RUN: llvm-dwarfdump -v %t.o | FileCheck %s + +# CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_no +# CHECK-NEXT: DW_AT_language DW_FORM_implicit_const 29 + +# CHECK: 0x0000000c: DW_TAG_compile_unit [1] +# CHECK-NEXT: DW_AT_language [DW_FORM_implicit_const] (DW_LANG_C11) + + .section .debug_info,"",@progbits +.Ldebug_info0: + .long .Ldebug_info0_end - .Ldebug_info0_start # Length of Compilation Unit Info +.Ldebug_info0_start: + .value 0x5 # DWARF version number + .byte 0x1 # DW_UT_compile + .byte 0x8 # Pointer Size (in bytes) + .long .Ldebug_abbrev0 # Offset Into Abbrev. Section + .uleb128 0x1 # (DIE DW_TAG_compile_unit) + # DW_AT_language +.Ldebug_info0_end: + .section .debug_abbrev,"",@progbits +.Ldebug_abbrev0: + .uleb128 0x1 # (abbrev code) + .uleb128 0x11 # (TAG: DW_TAG_compile_unit) + .byte 0x0 # DW_children_no + .uleb128 0x13 # (DW_AT_language) + .uleb128 0x21 # (DW_FORM_implicit_const) + .sleb128 0x1d + .byte 0 + .byte 0 + .byte 0 From b6ff4dd2e99e86390321fdd43a22c93d0659fe2a Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 17 Feb 2021 09:22:43 -0500 Subject: [PATCH 307/318] [PowerPC] Handle FP physical register in inline asm constraint. Do not defer to the base class when the register constraint is a physical fpr. The base class will select SPILLTOVSRRC as the register class and register allocation will fail on subtargets without VSX registers. Differential Revision: https://reviews.llvm.org/D91629 (cherry picked from commit 4e127bce2d1133ba95a551d69bd0e8fc3b4f9e71) --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 41 ++++++++++++++----- .../PowerPC/inline-asm-physical-fpr-spe.ll | 23 +++++++++++ .../PowerPC/inline-asm-physical-fpr.ll | 26 ++++++++++++ 3 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll create mode 100644 llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7833bfc1d1b6..26dc3afc899e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15154,17 +15154,38 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &PPC::LRRCRegClass); } - // If we name a VSX register, we can't defer to the base class because it - // will not recognize the correct register (their names will be VSL{0-31} - // and V{0-31} so they won't match). So we match them here. - if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { - int VSNum = atoi(Constraint.data() + 3); - assert(VSNum >= 0 && VSNum <= 63 && - "Attempted to access a vsr out of range"); - if (VSNum < 32) - return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); - return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + // Handle special cases of physical registers that are not properly handled + // by the base class. + if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') { + // If we name a VSX register, we can't defer to the base class because it + // will not recognize the correct register (their names will be VSL{0-31} + // and V{0-31} so they won't match). So we match them here. + if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { + int VSNum = atoi(Constraint.data() + 3); + assert(VSNum >= 0 && VSNum <= 63 && + "Attempted to access a vsr out of range"); + if (VSNum < 32) + return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); + return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + } + + // For float registers, we can't defer to the base class as it will match + // the SPILLTOVSRRC class. + if (Constraint.size() > 3 && Constraint[1] == 'f') { + int RegNum = atoi(Constraint.data() + 2); + if (RegNum > 31 || RegNum < 0) + report_fatal_error("Invalid floating point register number"); + if (VT == MVT::f32 || VT == MVT::i32) + return Subtarget.hasSPE() + ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return Subtarget.hasSPE() + ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass); + } } + std::pair R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll new file mode 100644 index 000000000000..94147501878f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr-spe.ll @@ -0,0 +1,23 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %s + +define i32 @test_f32(float %x) { +; CHECK-LABEL: test_f32: +; CHECK: #APP +; CHECK-NEXT: efsctsi 31, 3 +; CHECK-NEXT: #NO_APP +entry: + %0 = call i32 asm sideeffect "efsctsi $0, $1", "={f31},f"(float %x) + ret i32 %0 +} + +define i32 @test_f64(double %x) { +; CHECK-LABEL: test_f64: +; CHECK: #APP +; CHECK-NEXT: efdctsi 0, 3 +; CHECK-NEXT: #NO_APP +entry: + %0 = call i32 asm sideeffect "efdctsi $0, $1", "={f0},d"(double %x) + ret i32 %0 +} + diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll new file mode 100644 index 000000000000..132b5d150054 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll @@ -0,0 +1,26 @@ +; RUN: llc -mcpu=pwr7 -mattr=-altivec -verify-machineinstrs \ +; RUN: -mtriple=powerpc-unknown-aix < %s | FileCheck %s + +; RUN: llc -mcpu=pwr7 -mattr=-altivec -verify-machineinstrs \ +; RUN: -mtriple=powerpc64-unknown-aix < %s | FileCheck %s + + +define dso_local double @test_double(double %a, double %b) { +entry: + %0 = tail call double asm "fadd. $0,$1,$2\0A", "={f31},d,d,0"(double %a, double %b, double 0.000000e+00) + ret double %0 +} + +; CHECK-LABEL: test_double +; CHECK: #APP +; CHECK-NEXT: fadd. 31,1,2 + +define dso_local signext i32 @test_int(double %a, double %b) { +entry: + %0 = tail call i32 asm "fadd. $0,$1,$2\0A", "={f0},d,d,0"(double %a, double %b, i32 0) + ret i32 %0 +} + +; CHECK-LABEL: test_int +; CHECK: #APP +; CHECK-NEXT: fadd. 0,1,2 From 07234c7d6bc246925710b88a1f9552f678587165 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20Lern=C3=B6?= Date: Fri, 19 Mar 2021 18:55:52 -0400 Subject: [PATCH 308/318] Add type attributes to LLVM C API The LLVM C API is missing type attributes as is needed by attributes such as sret and byval. This patch adds three missing wrapper functions. Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=48249 https://reviews.llvm.org/D97763 (cherry picked from commit 528f6f7d617757addac9b51dd5bcc1ab1352e9be) --- llvm/include/llvm-c/Core.h | 12 ++++++++++++ llvm/lib/IR/Core.cpp | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index a78df16ca404..2901ab715810 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -604,6 +604,17 @@ unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A); */ uint64_t LLVMGetEnumAttributeValue(LLVMAttributeRef A); +/** + * Create a type attribute + */ +LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID, + LLVMTypeRef type_ref); + +/** + * Get the type attribute's value. + */ +LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A); + /** * Create a string attribute. */ @@ -626,6 +637,7 @@ const char *LLVMGetStringAttributeValue(LLVMAttributeRef A, unsigned *Length); */ LLVMBool LLVMIsEnumAttribute(LLVMAttributeRef A); LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A); +LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A); /** * Obtain a Type from a context by its registered name. diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 90ba69069bae..039b34ace6ab 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -164,6 +164,18 @@ uint64_t LLVMGetEnumAttributeValue(LLVMAttributeRef A) { return Attr.getValueAsInt(); } +LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID, + LLVMTypeRef type_ref) { + auto &Ctx = *unwrap(C); + auto AttrKind = (Attribute::AttrKind)KindID; + return wrap(Attribute::get(Ctx, AttrKind, unwrap(type_ref))); +} + +LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A) { + auto Attr = unwrap(A); + return wrap(Attr.getValueAsType()); +} + LLVMAttributeRef LLVMCreateStringAttribute(LLVMContextRef C, const char *K, unsigned KLength, const char *V, unsigned VLength) { @@ -194,6 +206,10 @@ LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A) { return unwrap(A).isStringAttribute(); } +LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A) { + return unwrap(A).isTypeAttribute(); +} + char *LLVMGetDiagInfoDescription(LLVMDiagnosticInfoRef DI) { std::string MsgStorage; raw_string_ostream Stream(MsgStorage); From fd27379463917761a5e6135cfdb0b2d163a72ee3 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 9 Jun 2021 20:48:42 +0100 Subject: [PATCH 309/318] [ARM] MVE VPT block tests with debug info. NFC --- .../CodeGen/Thumb2/mve-vpt-block-debug.mir | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir new file mode 100644 index 000000000000..cb36f48934f1 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-none-unknown-eabihf" + + define <4 x i32> @test(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) #0 !dbg !5 { + entry: + call void @llvm.dbg.value(metadata <4 x i32> %x, metadata !17, metadata !DIExpression()), !dbg !21 + call void @llvm.dbg.value(metadata <4 x i32> %y, metadata !18, metadata !DIExpression()), !dbg !21 + call void @llvm.dbg.value(metadata <4 x i32> %z, metadata !19, metadata !DIExpression()), !dbg !21 + %0 = icmp sle <4 x i32> %x, %y, !dbg !22 + call void @llvm.dbg.value(metadata i32 undef, metadata !20, metadata !DIExpression()), !dbg !21 + %1 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i1> %0, <4 x i32> %z), !dbg !23 + call void @llvm.dbg.value(metadata <4 x i32> %1, metadata !19, metadata !DIExpression()), !dbg !21 + %2 = icmp sgt <4 x i32> %x, %y, !dbg !24 + call void @llvm.dbg.value(metadata i32 undef, metadata !20, metadata !DIExpression()), !dbg !21 + %3 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i1> %2, <4 x i32> %1), !dbg !25 + call void @llvm.dbg.value(metadata <4 x i32> %3, metadata !19, metadata !DIExpression()), !dbg !21 + ret <4 x i32> %3, !dbg !26 + } + + declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #1 + declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + + attributes #0 = { "target-features"="+fullfp16,+lob,+mve.fp" } + attributes #1 = { nounwind readnone "target-features"="+fullfp16,+lob,+mve.fp" } + attributes #2 = { nofree nosync nounwind readnone speculatable willreturn "target-features"="+fullfp16,+lob,+mve.fp" } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project 921572a18dc9b97c259bda2ce8130f04b2ebe3ed)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "tmp.c", directory: "/work/llvm-project/build") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !6, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) + !6 = !DISubroutineType(types: !7) + !7 = !{!8, !8, !8, !8} + !8 = !DIDerivedType(tag: DW_TAG_typedef, name: "int32x4_t", file: !9, line: 28, baseType: !10) + !9 = !DIFile(filename: "lib/clang/13.0.0/include/arm_mve.h", directory: "/work/llvm-project/build") + !10 = !DICompositeType(tag: DW_TAG_array_type, baseType: !11, size: 128, flags: DIFlagVector, elements: !14) + !11 = !DIDerivedType(tag: DW_TAG_typedef, name: "int32_t", file: !12, line: 58, baseType: !13) + !12 = !DIFile(filename: "pb-rel/testabletools/include/stdint.h", directory: "/work") + !13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !14 = !{!15} + !15 = !DISubrange(count: 4) + !16 = !{!17, !18, !19, !20} + !17 = !DILocalVariable(name: "x", arg: 1, scope: !5, file: !1, line: 2, type: !8) + !18 = !DILocalVariable(name: "y", arg: 2, scope: !5, file: !1, line: 2, type: !8) + !19 = !DILocalVariable(name: "z", arg: 3, scope: !5, file: !1, line: 2, type: !8) + !20 = !DILocalVariable(name: "p", scope: !5, file: !1, line: 3, type: !13) + !21 = !DILocation(line: 0, scope: !5) + !22 = !DILocation(line: 3, column: 11, scope: !5) + !23 = !DILocation(line: 4, column: 7, scope: !5) + !24 = !DILocation(line: 5, column: 7, scope: !5) + !25 = !DILocation(line: 6, column: 7, scope: !5) + !26 = !DILocation(line: 7, column: 3, scope: !5) + +... +--- +name: test +tracksRegLiveness: true +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } + - { reg: '$q2', virtual-reg: '' } +body: | + bb.0.entry: + liveins: $q0, $q1, $q2 + + ; CHECK-LABEL: name: test + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK: DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit $q1, implicit $q0, implicit killed $q2, debug-location !23 { + ; CHECK: MVE_VPTv4s32 8, renamable $q1, renamable $q0, 10, implicit-def $vpr, debug-location !23 + ; CHECK: renamable $q2 = MVE_VADDi32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q2, debug-location !23 + ; CHECK: } + ; CHECK: DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg, debug-location !24 + ; CHECK: BUNDLE implicit-def $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit killed $vpr, implicit killed $q0, implicit killed $q1, implicit killed $q2, debug-location !25 { + ; CHECK: MVE_VPST 8, implicit $vpr, debug-location !25 + ; CHECK: renamable $q2 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q2, debug-location !25 + ; CHECK: } + ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, undef $q0, debug-location !26 + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0, debug-location !26 + DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 + DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + DBG_VALUE $q1, $noreg, !18, !DIExpression(), debug-location !21 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + renamable $vpr = MVE_VCMPs32 renamable $q1, renamable $q0, 10, 0, $noreg, debug-location !22 + DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + renamable $q2 = MVE_VADDi32 renamable $q0, renamable $q1, 1, renamable $vpr, killed renamable $q2, debug-location !23 + DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg, debug-location !24 + renamable $q2 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q2, debug-location !25 + DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 + $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, undef $q0, debug-location !26 + tBX_RET 14 /* CC::al */, $noreg, implicit $q0, debug-location !26 + +... From c7381b628d63b4423617163ee1116e64f49d2265 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 10 Jun 2021 14:49:04 +0100 Subject: [PATCH 310/318] [ARM] Skip debug during vpt block creation Debug info is currently preventing VPT block creation, leading to different codegen. This patch attempts to skip any debug instructions during vpt block creation, making sure they do not interfere. Differential Revision: https://reviews.llvm.org/D103610 --- llvm/lib/Target/ARM/MVEVPTBlockPass.cpp | 13 ++++++++++++- llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir | 16 ++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index 9a710b784fd1..c7f451cba14f 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -107,6 +107,12 @@ static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, NumInstrsSteppedOver = 0; while (Iter != EndIter) { + if (Iter->isDebugInstr()) { + // Skip debug instructions + ++Iter; + continue; + } + NextPred = getVPTInstrPredicate(*Iter, PredReg); assert(NextPred != ARMVCC::Else && "VPT block pass does not expect Else preds"); @@ -170,6 +176,8 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = std::next(BlockBeg); AddedInstIter != Iter; ++AddedInstIter) { + if (AddedInstIter->isDebugInstr()) + continue; dbgs() << " adding: "; AddedInstIter->dump(); }); @@ -197,7 +205,7 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) break; - LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); + LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump()); // Record the new size of the block BlockSize += ElseInstCnt; @@ -211,6 +219,9 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, // Note that we are using "Iter" to iterate over the block so we can update // it at the same time. for (; Iter != VPNOTBlockEndIter; ++Iter) { + if (Iter->isDebugInstr()) + continue; + // Find the register in which the predicate is int OpIdx = findFirstVPTPredOperandIdx(*Iter); assert(OpIdx != -1); diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir index cb36f48934f1..ce540e506131 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-debug.mir @@ -80,18 +80,14 @@ body: | ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 ; CHECK: DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 - ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit $q1, implicit $q0, implicit killed $q2, debug-location !23 { - ; CHECK: MVE_VPTv4s32 8, renamable $q1, renamable $q0, 10, implicit-def $vpr, debug-location !23 + ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit killed $q1, implicit killed $q0, implicit killed $q2, debug-location !23 { + ; CHECK: MVE_VPTv4s32 12, renamable $q1, renamable $q0, 10, implicit-def $vpr, debug-location !23 ; CHECK: renamable $q2 = MVE_VADDi32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q2, debug-location !23 + ; CHECK: DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 + ; CHECK: DBG_VALUE internal $q2, $noreg, !19, !DIExpression(), debug-location !21 + ; CHECK: renamable $q2 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 2, internal killed renamable $vpr, internal killed renamable $q2, debug-location !25 + ; CHECK: DBG_VALUE internal $q2, $noreg, !19, !DIExpression(), debug-location !21 ; CHECK: } - ; CHECK: DBG_VALUE $noreg, $noreg, !20, !DIExpression(), debug-location !21 - ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 - ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg, debug-location !24 - ; CHECK: BUNDLE implicit-def $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit killed $vpr, implicit killed $q0, implicit killed $q1, implicit killed $q2, debug-location !25 { - ; CHECK: MVE_VPST 8, implicit $vpr, debug-location !25 - ; CHECK: renamable $q2 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q2, debug-location !25 - ; CHECK: } - ; CHECK: DBG_VALUE $q2, $noreg, !19, !DIExpression(), debug-location !21 ; CHECK: $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, undef $q0, debug-location !26 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0, debug-location !26 DBG_VALUE $q0, $noreg, !17, !DIExpression(), debug-location !21 From a3543fd9d47054596fed913b8ddc68285200c821 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Fri, 19 Mar 2021 11:19:32 +0000 Subject: [PATCH 311/318] [ARM] Handle debug instrs in ARM Low Overhead Loop pass In function ConvertVPTBlocks(), it is assumed that every instruction within a vector-predicated block is predicated. This is false for debug instructions, used by LLVM. Because of this, an assertion failure is reached when an input contains debug instructions inside VPT blocks. In non-assert builds, an out of bounds memory access took place. The present patch properly covers the case of debug instructions. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D99075 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 20 +- .../LowOverheadLoops/skip-vpt-debug.mir | 330 ++++++++++++++++++ 2 files changed, 343 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 61a924078f29..8dc532058492 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1467,14 +1467,15 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { auto RemovePredicate = [](MachineInstr *MI) { + if (MI->isDebugInstr()) + return; LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI); - if (int PIdx = llvm::findFirstVPTPredOperandIdx(*MI)) { - assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && - "Expected Then predicate!"); - MI->getOperand(PIdx).setImm(ARMVCC::None); - MI->getOperand(PIdx+1).setReg(0); - } else - llvm_unreachable("trying to unpredicate a non-predicated instruction"); + int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); + assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction"); + assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && + "Expected Then predicate!"); + MI->getOperand(PIdx).setImm(ARMVCC::None); + MI->getOperand(PIdx + 1).setReg(0); }; for (auto &Block : LoLoop.getVPTBlocks()) { @@ -1518,8 +1519,13 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { // - Insert a new vpst to predicate the instruction(s) that following // the divergent vpr def. MachineInstr *Divergent = VPTState::getDivergent(Block); + MachineBasicBlock *MBB = Divergent->getParent(); auto DivergentNext = ++MachineBasicBlock::iterator(Divergent); + while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr()) + ++DivergentNext; + bool DivergentNextIsPredicated = + DivergentNext != MBB->end() && getVPTInstrPredicate(*DivergentNext) != ARMVCC::None; for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir new file mode 100644 index 000000000000..8637ab3f1856 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir @@ -0,0 +1,330 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s +--- | + ; ModuleID = 'skip-vpt-debug.ll' + source_filename = "skip-vpt-debug.c" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabihf" + + ; Function Attrs: nofree norecurse nounwind optsize + define hidden void @arm_max_no_idx_f32(float* nocapture readonly %pSrc, i32 %blockSize, float* nocapture %pResult) local_unnamed_addr #0 !dbg !13 { + entry: + call void @llvm.dbg.value(metadata float* %pSrc, metadata !24, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata i32 %blockSize, metadata !25, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata float* %pResult, metadata !26, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata float 0x3810000000000000, metadata !27, metadata !DIExpression()), !dbg !29 + %cmp.not7 = icmp eq i32 %blockSize, 0, !dbg !30 + br i1 %cmp.not7, label %while.end, label %vector.ph, !dbg !31 + + vector.ph: ; preds = %entry + %n.rnd.up = add i32 %blockSize, 3, !dbg !31 + %n.vec = and i32 %n.rnd.up, -4, !dbg !31 + %0 = add i32 %n.vec, -4, !dbg !31 + %1 = lshr i32 %0, 2, !dbg !31 + %2 = add nuw nsw i32 %1, 1, !dbg !31 + %3 = call i32 @llvm.start.loop.iterations.i32(i32 %2), !dbg !31 + br label %vector.body, !dbg !31 + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv1 = phi float* [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ] + %vec.phi = phi <4 x float> [ , %vector.ph ], [ %10, %vector.body ] + %4 = phi i32 [ %3, %vector.ph ], [ %11, %vector.body ] + %5 = phi i32 [ %blockSize, %vector.ph ], [ %7, %vector.body ] + %lsr.iv12 = bitcast float* %lsr.iv1 to <4 x float>* + %6 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %5) + %7 = sub i32 %5, 4 + %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34 + %8 = fcmp nnan ninf nsz olt <4 x float> %vec.phi, %wide.masked.load, !dbg !38 + %9 = and <4 x i1> %6, %8, !dbg !40 + %10 = select <4 x i1> %9, <4 x float> %wide.masked.load, <4 x float> %vec.phi, !dbg !40 + %scevgep = getelementptr float, float* %lsr.iv1, i32 4 + %11 = call i32 @llvm.loop.decrement.reg.i32(i32 %4, i32 1) + %12 = icmp ne i32 %11, 0 + br i1 %12, label %vector.body, label %middle.block, !llvm.loop !41 + + middle.block: ; preds = %vector.body + %13 = call nnan ninf nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> %10), !dbg !31 + br label %while.end, !dbg !45 + + while.end: ; preds = %middle.block, %entry + %maxValue.0.lcssa = phi float [ 0x3810000000000000, %entry ], [ %13, %middle.block ], !dbg !29 + store float %maxValue.0.lcssa, float* %pResult, align 4, !dbg !45, !tbaa !34 + ret void, !dbg !46 + } + + ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + + ; Function Attrs: nofree nosync nounwind readnone willreturn + declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #2 + + ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn + declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #3 + + ; Function Attrs: nofree nosync nounwind readnone willreturn + declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #2 + + ; Function Attrs: noduplicate nofree nosync nounwind willreturn + declare i32 @llvm.start.loop.iterations.i32(i32) #4 + + ; Function Attrs: noduplicate nofree nosync nounwind willreturn + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4 + + ; Function Attrs: nounwind readnone + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #5 + + attributes #0 = { nofree norecurse nounwind optsize "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-dotprod,-fp16fml,-hwdiv-arm,-i8mm,-sb,-sha2" } + attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } + attributes #2 = { nofree nosync nounwind readnone willreturn } + attributes #3 = { argmemonly nofree nosync nounwind readonly willreturn } + attributes #4 = { noduplicate nofree nosync nounwind willreturn } + attributes #5 = { nounwind readnone } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9, !10, !11} + !llvm.ident = !{!12} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "skip-vpt-debug.c", directory: "/home/vicspe01") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{i32 1, !"static_rwdata", i32 1} + !7 = !{i32 1, !"enumsize_buildattr", i32 2} + !8 = !{i32 1, !"armlib_unavailable", i32 0} + !9 = !{i32 1, !"branch-target-enforcement", i32 0} + !10 = !{i32 1, !"sign-return-address", i32 0} + !11 = !{i32 1, !"sign-return-address-all", i32 0} + !12 = !{!"Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]"} + !13 = distinct !DISubprogram(name: "arm_max_no_idx_f32", scope: !1, file: !1, line: 5, type: !14, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !23) + !14 = !DISubroutineType(types: !15) + !15 = !{null, !16, !20, !22} + !16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 32) + !17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !18) + !18 = !DIDerivedType(tag: DW_TAG_typedef, name: "float32_t", file: !1, line: 1, baseType: !19) + !19 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) + !20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !1, line: 2, baseType: !21) + !21 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) + !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 32) + !23 = !{!24, !25, !26, !27, !28} + !24 = !DILocalVariable(name: "pSrc", arg: 1, scope: !13, file: !1, line: 5, type: !16) + !25 = !DILocalVariable(name: "blockSize", arg: 2, scope: !13, file: !1, line: 5, type: !20) + !26 = !DILocalVariable(name: "pResult", arg: 3, scope: !13, file: !1, line: 6, type: !22) + !27 = !DILocalVariable(name: "maxValue", scope: !13, file: !1, line: 7, type: !18) + !28 = !DILocalVariable(name: "newVal", scope: !13, file: !1, line: 8, type: !18) + !29 = !DILocation(line: 0, scope: !13) + !30 = !DILocation(line: 10, column: 20, scope: !13) + !31 = !DILocation(line: 10, column: 3, scope: !13) + !32 = !DILocation(line: 11, column: 14, scope: !33) + !33 = distinct !DILexicalBlock(scope: !13, file: !1, line: 10, column: 26) + !34 = !{!35, !35, i64 0} + !35 = !{!"float", !36, i64 0} + !36 = !{!"omnipotent char", !37, i64 0} + !37 = !{!"Simple C/C++ TBAA"} + !38 = !DILocation(line: 12, column: 18, scope: !39) + !39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 12, column: 9) + !40 = !DILocation(line: 12, column: 9, scope: !33) + !41 = distinct !{!41, !31, !42, !43, !44} + !42 = !DILocation(line: 15, column: 3, scope: !13) + !43 = !{!"llvm.loop.mustprogress"} + !44 = !{!"llvm.loop.isvectorized", i32 1} + !45 = !DILocation(line: 16, column: 12, scope: !13) + !46 = !DILocation(line: 17, column: 1, scope: !13) + +... +--- +name: arm_max_no_idx_f32 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: + - id: 0 + value: float 0x3810000000000000 + alignment: 4 + isTargetSpecific: false +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: arm_max_no_idx_f32 + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.4(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: tCBZ renamable $r1, %bb.4, debug-location !31 + ; CHECK: bb.1.vector.ph: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, undef renamable $q0 + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1, debug-location !31 + ; CHECK: bb.2.vector.body (align 4): + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, debug-location !32 :: (load 16 from %ir.lsr.iv12, align 4, !tbaa !34) + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + ; CHECK: MVE_VPTv4f32 8, renamable $q1, renamable $q0, 12, implicit-def $vpr, debug-location !40 + ; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 + ; CHECK: bb.3.middle.block: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $q0, $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31 + ; CHECK: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit killed $q0, debug-location !31 + ; CHECK: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31 + ; CHECK: tB %bb.5, 14 /* CC::al */, $noreg + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: liveins: $r2 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + ; CHECK: renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; CHECK: bb.5.while.end: + ; CHECK: liveins: $r2, $s0 + ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store 4 into %ir.pResult, !tbaa !34) + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46 + ; CHECK: bb.6 (align 4): + ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 4 + bb.0.entry: + successors: %bb.4(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r1, $r2, $r7, $lr + + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + tCBZ renamable $r1, %bb.4, debug-location !31 + + bb.1.vector.ph: + successors: %bb.2(0x80000000) + liveins: $r0, $r1, $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg, debug-location !31 + renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg, debug-location !31 + renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg, debug-location !31 + renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg, debug-location !31 + renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, undef renamable $q0 + renamable $lr = t2DoLoopStartTP killed renamable $r3, renamable $r1, debug-location !31 + + bb.2.vector.body (align 4): + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg + MVE_VPST 2, implicit $vpr, debug-location !32 + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, debug-location !32 :: (load 16 from %ir.lsr.iv12, align 4, !tbaa !34) + DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 1, killed renamable $vpr, debug-location !40 + renamable $q0 = MVE_VORR killed renamable $q1, renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40 + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr + tB %bb.3, 14 /* CC::al */, $noreg + + bb.3.middle.block: + successors: %bb.5(0x80000000) + liveins: $q0, $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31 + renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit $q0, debug-location !31 + renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31 + tB %bb.5, 14 /* CC::al */, $noreg + + bb.4: + successors: %bb.5(0x80000000) + liveins: $r2 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 + DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 + renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + + bb.5.while.end: + liveins: $r2, $s0 + + DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 + DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 + VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store 4 into %ir.pResult, !tbaa !34) + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46 + + bb.6 (align 4): + CONSTPOOL_ENTRY 0, %const.0, 4 + +... From 0f3fec4618e40f54ec2a042cb603cdedd253312c Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 30 May 2021 18:02:14 +0100 Subject: [PATCH 312/318] [ARM] Guard against loop variant gather ptr operands This ensures that the operands of any gather/scatter instructions that we attempt to push out of the loop are invariant, preventing invalid IR from being generated. --- .../Target/ARM/MVEGatherScatterLowering.cpp | 3 +- .../Thumb2/mve-gather-optimisation-deep.ll | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 81f113b8302f..039f6f2053d8 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -960,7 +960,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, // Get the value that is added to/multiplied with the phi Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp); - if (IncrementPerRound->getType() != OffsSecondOperand->getType()) + if (IncrementPerRound->getType() != OffsSecondOperand->getType() || + !L->isLoopInvariant(OffsSecondOperand)) // Something has gone wrong, abort return false; diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll index 4c5bcd836c37..4e971542bf75 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll @@ -187,4 +187,48 @@ end: ret void; } +define arm_aapcs_vfpcc void @invariant_add(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +; CHECK-LABEL: @invariant_add( +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[L1:%.*]] = add <4 x i32> [[L0]], [[VEC_IND]] +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[DATA:%.*]], <4 x i32> [[L1]], i32 32, i32 2, i32 1) +; CHECK-NEXT: [[L3:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[L4:%.*]] = bitcast i32* [[L3]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP0]], <4 x i32>* [[L4]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[L5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] +; CHECK-NEXT: br i1 [[L5]], label [[END:%.*]], label [[VECTOR_BODY]] +; CHECK: end: +; CHECK-NEXT: ret void +; + +vector.ph: + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] + %l0 = mul <4 x i32> %vec.ind, + %l1 = add <4 x i32> %l0, %vec.ind + %l2 = getelementptr inbounds i32, i32* %data, <4 x i32> %l1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %l2, i32 4, <4 x i1> , <4 x i32> undef) + %l3 = getelementptr inbounds i32, i32* %dst, i32 %index + %l4 = bitcast i32* %l3 to <4 x i32>* + store <4 x i32> %wide.masked.gather, <4 x i32>* %l4, align 4 + %index.next = add i32 %index, 4 + %vec.ind.next = add <4 x i32> %vec.ind, + %l5 = icmp eq i32 %index.next, %n.vec + br i1 %l5, label %end, label %vector.body + +end: + ret void; +} + + declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) From e2e2057132c1360d014235a087d4f678efc56420 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 26 Apr 2021 10:04:33 +0100 Subject: [PATCH 313/318] [ARM] Ensure loop invariant active.lane.mask operands CGP can move instructions like a ptrtoint into a loop, but the MVETailPredication when converting them will currently assume invariant trip counts. This tries to ensure the operands are loop invariant, and bails if not. Differential Revision: https://reviews.llvm.org/D100550 --- llvm/lib/Target/ARM/MVETailPredication.cpp | 4 + .../Thumb2/mve-tailpred-loopinvariant.ll | 145 ++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index b705208660df..cccac5595288 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -205,6 +205,10 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, EnableTailPredication == TailPredication::ForceEnabled; Value *ElemCount = ActiveLaneMask->getOperand(1); + bool Changed = false; + if (!L->makeLoopInvariant(ElemCount, Changed)) + return false; + auto *EC= SE->getSCEV(ElemCount); auto *TC = SE->getSCEV(TripCount); int VectorWidth = diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll new file mode 100644 index 000000000000..2bd3e51772bf --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +; This test has an instruction that gets sunk into the loop, that is a +; active.lane.mask operand. (%exitcount.ptrcnt.to.int = ptrtoint). We +; need to make sure it is loop invariant. + +define i32 @a(i32* readnone %b, i8* %c) { +; CHECK-LABEL: a: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: it ls +; CHECK-NEXT: popls {r4, pc} +; CHECK-NEXT: .LBB0_1: @ %while.body.preheader +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movs r3, #1 +; CHECK-NEXT: add.w r2, r0, #15 +; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: bic r2, r2, #15 +; CHECK-NEXT: subs r2, #16 +; CHECK-NEXT: add.w lr, r3, r2, lsr #4 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB0_2: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: adds r3, r1, r2 +; CHECK-NEXT: vctp.8 r0 +; CHECK-NEXT: vmov.8 q0[0], r3 +; CHECK-NEXT: adds r4, r3, #1 +; CHECK-NEXT: vmov.8 q0[1], r4 +; CHECK-NEXT: adds r4, r3, #2 +; CHECK-NEXT: vmov.8 q0[2], r4 +; CHECK-NEXT: adds r4, r3, #3 +; CHECK-NEXT: vmov.8 q0[3], r4 +; CHECK-NEXT: adds r4, r3, #4 +; CHECK-NEXT: vmov.8 q0[4], r4 +; CHECK-NEXT: adds r4, r3, #5 +; CHECK-NEXT: vmov.8 q0[5], r4 +; CHECK-NEXT: adds r4, r3, #6 +; CHECK-NEXT: vmov.8 q0[6], r4 +; CHECK-NEXT: adds r4, r3, #7 +; CHECK-NEXT: vmov.8 q0[7], r4 +; CHECK-NEXT: add.w r4, r3, #8 +; CHECK-NEXT: vmov.8 q0[8], r4 +; CHECK-NEXT: add.w r4, r3, #9 +; CHECK-NEXT: vmov.8 q0[9], r4 +; CHECK-NEXT: add.w r4, r3, #10 +; CHECK-NEXT: vmov.8 q0[10], r4 +; CHECK-NEXT: add.w r4, r3, #11 +; CHECK-NEXT: vmov.8 q0[11], r4 +; CHECK-NEXT: add.w r4, r3, #12 +; CHECK-NEXT: vmov.8 q0[12], r4 +; CHECK-NEXT: add.w r4, r3, #13 +; CHECK-NEXT: vmov.8 q0[13], r4 +; CHECK-NEXT: add.w r4, r3, #14 +; CHECK-NEXT: adds r2, #16 +; CHECK-NEXT: subs r0, #16 +; CHECK-NEXT: vmov.8 q0[14], r4 +; CHECK-NEXT: adds r3, #15 +; CHECK-NEXT: vmov.8 q0[15], r3 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrbt.8 q0, [r12], #16 +; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: @ %bb.3: @ %while.end +; CHECK-NEXT: pop {r4, pc} +entry: + %0 = bitcast i32* %b to i8* + %cmp3 = icmp ugt i8* %0, %c + br i1 %cmp3, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %entry + %c5 = ptrtoint i8* %c to i32 + %1 = sub i32 0, %c5 + %uglygep = getelementptr i8, i8* %0, i32 %1 + %exitcount.ptrcnt.to.int = ptrtoint i8* %uglygep to i32 + %n.rnd.up = add i32 %exitcount.ptrcnt.to.int, 15 + %n.vec = and i32 %n.rnd.up, -16 + br label %vector.body + +vector.body: ; preds = %vector.body, %while.body.preheader + %index = phi i32 [ 0, %while.body.preheader ], [ %index.next, %vector.body ] + %next.gep = getelementptr i8, i8* %c, i32 %index + %2 = or i32 %index, 1 + %next.gep7 = getelementptr i8, i8* %c, i32 %2 + %3 = or i32 %index, 2 + %next.gep8 = getelementptr i8, i8* %c, i32 %3 + %4 = or i32 %index, 3 + %next.gep9 = getelementptr i8, i8* %c, i32 %4 + %5 = or i32 %index, 4 + %next.gep10 = getelementptr i8, i8* %c, i32 %5 + %6 = or i32 %index, 5 + %next.gep11 = getelementptr i8, i8* %c, i32 %6 + %7 = or i32 %index, 6 + %next.gep12 = getelementptr i8, i8* %c, i32 %7 + %8 = or i32 %index, 7 + %next.gep13 = getelementptr i8, i8* %c, i32 %8 + %9 = or i32 %index, 8 + %next.gep14 = getelementptr i8, i8* %c, i32 %9 + %10 = or i32 %index, 9 + %next.gep15 = getelementptr i8, i8* %c, i32 %10 + %11 = or i32 %index, 10 + %next.gep16 = getelementptr i8, i8* %c, i32 %11 + %12 = or i32 %index, 11 + %next.gep17 = getelementptr i8, i8* %c, i32 %12 + %13 = or i32 %index, 12 + %next.gep18 = getelementptr i8, i8* %c, i32 %13 + %14 = or i32 %index, 13 + %next.gep19 = getelementptr i8, i8* %c, i32 %14 + %15 = or i32 %index, 14 + %next.gep20 = getelementptr i8, i8* %c, i32 %15 + %16 = or i32 %index, 15 + %next.gep21 = getelementptr i8, i8* %c, i32 %16 + %17 = insertelement <16 x i8*> poison, i8* %next.gep, i32 0 + %18 = insertelement <16 x i8*> %17, i8* %next.gep7, i32 1 + %19 = insertelement <16 x i8*> %18, i8* %next.gep8, i32 2 + %20 = insertelement <16 x i8*> %19, i8* %next.gep9, i32 3 + %21 = insertelement <16 x i8*> %20, i8* %next.gep10, i32 4 + %22 = insertelement <16 x i8*> %21, i8* %next.gep11, i32 5 + %23 = insertelement <16 x i8*> %22, i8* %next.gep12, i32 6 + %24 = insertelement <16 x i8*> %23, i8* %next.gep13, i32 7 + %25 = insertelement <16 x i8*> %24, i8* %next.gep14, i32 8 + %26 = insertelement <16 x i8*> %25, i8* %next.gep15, i32 9 + %27 = insertelement <16 x i8*> %26, i8* %next.gep16, i32 10 + %28 = insertelement <16 x i8*> %27, i8* %next.gep17, i32 11 + %29 = insertelement <16 x i8*> %28, i8* %next.gep18, i32 12 + %30 = insertelement <16 x i8*> %29, i8* %next.gep19, i32 13 + %31 = insertelement <16 x i8*> %30, i8* %next.gep20, i32 14 + %32 = insertelement <16 x i8*> %31, i8* %next.gep21, i32 15 + %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %exitcount.ptrcnt.to.int) + %33 = ptrtoint <16 x i8*> %32 to <16 x i32> + %34 = trunc <16 x i32> %33 to <16 x i8> + %35 = bitcast i8* %next.gep to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %34, <16 x i8>* %35, i32 1, <16 x i1> %active.lane.mask) + %index.next = add i32 %index, 16 + %36 = icmp eq i32 %index.next, %n.vec + br i1 %36, label %while.end, label %vector.body + +while.end: ; preds = %vector.body, %entry + ret i32 undef +} + +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) From 1a8f0b969c4e77e32fe88b9e5de257fe96a3307d Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 22 May 2021 13:38:00 +0100 Subject: [PATCH 314/318] [ARM] Clean up some tests, removing dead instructions. NFC --- .../cond-vector-reduce-mve-codegen.ll | 82 ++------- .../LowOverheadLoops/extending-loads.ll | 44 ----- .../Thumb2/LowOverheadLoops/fast-fp-loops.ll | 22 --- .../LowOverheadLoops/mve-tail-data-types.ll | 121 -------------- .../CodeGen/Thumb2/LowOverheadLoops/nested.ll | 30 ---- .../Thumb2/LowOverheadLoops/reductions.ll | 10 -- .../LowOverheadLoops/tail-pred-basic.ll | 45 ----- .../LowOverheadLoops/tail-pred-const.ll | 40 ----- .../tail-pred-disabled-in-loloops.ll | 4 - .../tail-pred-intrinsic-add-sat.ll | 2 - .../tail-pred-intrinsic-fabs.ll | 1 - .../tail-pred-intrinsic-round.ll | 6 - .../tail-pred-intrinsic-sub-sat.ll | 2 - .../LowOverheadLoops/tail-pred-reduce.ll | 25 --- .../LowOverheadLoops/tail-pred-widen.ll | 28 +--- .../varying-outer-2d-reduction.ll | 1 - .../LowOverheadLoops/vector-arith-codegen.ll | 89 +--------- .../vector-reduce-mve-tail.ll | 9 - llvm/test/CodeGen/Thumb2/mve-fma-loops.ll | 156 ++---------------- .../CodeGen/Thumb2/mve-gather-increment.ll | 2 - .../Thumb2/mve-gather-optimisation-deep.ll | 29 ++-- .../Thumb2/mve-gather-scatter-optimisation.ll | 11 -- .../CodeGen/Thumb2/mve-vecreduce-loops.ll | 15 -- 23 files changed, 39 insertions(+), 735 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 29ecf00c556f..e36e219ebaf1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -48,22 +48,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -147,22 +138,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -205,13 +187,12 @@ define dso_local i32 @and_mul_reduce_add(i32* noalias nocapture readonly %a, i32 ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB2_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r4, r12, #3 -; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: bic r4, r4, #3 -; CHECK-NEXT: sub.w lr, r4, #4 +; CHECK-NEXT: add.w lr, r12, #3 ; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: bic lr, lr, #3 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: sub.w lr, lr, #4 ; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -222,12 +203,11 @@ define dso_local i32 @and_mul_reduce_add(i32* noalias nocapture readonly %a, i32 ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill ; CHECK-NEXT: vsub.i32 q1, q2, q1 -; CHECK-NEXT: adds r4, #4 +; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpsttt ; CHECK-NEXT: vcmpt.i32 eq, q1, zr ; CHECK-NEXT: vldrwt.u32 q1, [r3], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r2], #16 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB2_2 @@ -249,22 +229,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -304,13 +275,12 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB3_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: add.w r4, r12, #3 -; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: bic r4, r4, #3 -; CHECK-NEXT: sub.w lr, r4, #4 +; CHECK-NEXT: add.w lr, r12, #3 ; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: bic lr, lr, #3 +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: sub.w lr, lr, #4 ; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -326,9 +296,8 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* ; CHECK-NEXT: vcmpt.i32 ne, q1, zr ; CHECK-NEXT: vldrwe.u32 q1, [r3], #16 ; CHECK-NEXT: vldrwe.u32 q2, [r2], #16 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB3_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -348,22 +317,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %add, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -402,11 +362,9 @@ define dso_local void @continue_on_zero(i32* noalias nocapture %arg, i32* noalia ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB4_1: @ %bb3 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB4_2: @ %bb9 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [r0] @@ -423,21 +381,12 @@ bb: bb3: ; preds = %bb %tmp4 = add i32 %arg2, 3 %tmp5 = and i32 %tmp4, -4 - %tmp6 = add i32 %arg2, -1 - %tmp7 = insertelement <4 x i32> undef, i32 %tmp6, i32 0 - %tmp8 = shufflevector <4 x i32> %tmp7, <4 x i32> undef, <4 x i32> zeroinitializer br label %bb9 bb9: ; preds = %bb9, %bb3 %tmp10 = phi i32 [ 0, %bb3 ], [ %tmp25, %bb9 ] - %tmp11 = insertelement <4 x i32> undef, i32 %tmp10, i32 0 - %tmp12 = shufflevector <4 x i32> %tmp11, <4 x i32> undef, <4 x i32> zeroinitializer - %tmp13 = add <4 x i32> %tmp12, %tmp14 = getelementptr inbounds i32, i32* %arg1, i32 %tmp10 - - ; %tmp15 = icmp ule <4 x i32> %tmp13, %tmp8 %tmp15 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %tmp10, i32 %arg2) - %tmp16 = bitcast i32* %tmp14 to <4 x i32>* %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp16, i32 4, <4 x i1> %tmp15, <4 x i32> undef) %tmp18 = icmp ne <4 x i32> %tmp17, zeroinitializer @@ -464,7 +413,6 @@ define dso_local arm_aapcs_vfpcc void @range_test(i32* noalias nocapture %arg, i ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %bb4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -472,7 +420,6 @@ define dso_local arm_aapcs_vfpcc void @range_test(i32* noalias nocapture %arg, i ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q0, r2 ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q0, [r0], #16 @@ -486,23 +433,14 @@ bb: bb4: ; preds = %bb %tmp5 = add i32 %arg3, 3 %tmp6 = and i32 %tmp5, -4 - %tmp7 = add i32 %arg3, -1 - %tmp8 = insertelement <4 x i32> undef, i32 %tmp7, i32 0 - %tmp9 = shufflevector <4 x i32> %tmp8, <4 x i32> undef, <4 x i32> zeroinitializer %tmp10 = insertelement <4 x i32> undef, i32 %arg2, i32 0 %tmp11 = shufflevector <4 x i32> %tmp10, <4 x i32> undef, <4 x i32> zeroinitializer br label %bb12 bb12: ; preds = %bb12, %bb4 %tmp13 = phi i32 [ 0, %bb4 ], [ %tmp30, %bb12 ] - %tmp14 = insertelement <4 x i32> undef, i32 %tmp13, i32 0 - %tmp15 = shufflevector <4 x i32> %tmp14, <4 x i32> undef, <4 x i32> zeroinitializer - %tmp16 = add <4 x i32> %tmp15, %tmp17 = getelementptr inbounds i32, i32* %arg, i32 %tmp13 - - ; %tmp18 = icmp ule <4 x i32> %tmp16, %tmp9 %tmp18= call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %tmp13, i32 %arg3) - %tmp19 = bitcast i32* %tmp17 to <4 x i32>* %tmp20 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp19, i32 4, <4 x i1> %tmp18, <4 x i32> undef) %tmp21 = icmp ne <4 x i32> %tmp20, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll index 2627965913eb..01564487b576 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll @@ -9,11 +9,9 @@ define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* no ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vldrb.s16 q0, [r1], #8 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vadd.i16 q0, q1, q0 @@ -28,21 +26,12 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = or <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - - ; %1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <8 x i8>* %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = sext <8 x i8> %wide.masked.load to <8 x i16> @@ -69,11 +58,9 @@ define dso_local arm_aapcs_vfpcc void @zext_i8(i16* noalias nocapture %a, i8* no ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vldrb.u16 q0, [r1], #8 ; CHECK-NEXT: vldrh.u16 q1, [r0] ; CHECK-NEXT: vadd.i16 q0, q1, q0 @@ -88,21 +75,12 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = or <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - - ; %1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <8 x i8>* %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = zext <8 x i8> %wide.masked.load to <8 x i16> @@ -129,11 +107,9 @@ define dso_local arm_aapcs_vfpcc void @sext_i16(i32* noalias nocapture %a, i16* ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB2_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrh.s32 q0, [r1], #8 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vadd.i32 q0, q1, q0 @@ -148,21 +124,12 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -189,11 +156,9 @@ define dso_local arm_aapcs_vfpcc void @zext_i16(i32* noalias nocapture %a, i16* ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: vldrh.u32 q0, [r1], #8 ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vadd.i32 q0, q1, q0 @@ -208,21 +173,12 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index 273e1d5dd360..03e56812e273 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -34,11 +34,9 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: b .LBB0_8 ; CHECK-NEXT: .LBB0_4: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r2], #16 ; CHECK-NEXT: vmul.f32 q0, q1, q0 @@ -122,21 +120,12 @@ for.body.preheader.new: ; preds = %for.body.preheader vector.ph: ; preds = %vector.memcheck %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert21 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat22 = shufflevector <4 x i32> %broadcast.splatinsert21, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds float, float* %b, i32 %index - - ; %3 = icmp ule <4 x i32> %induction, %broadcast.splat22 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast float* %2 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %3, <4 x float> undef) %5 = getelementptr inbounds float, float* %c, i32 %index @@ -225,12 +214,10 @@ define arm_aapcs_vfpcc float @fast_float_mac(float* nocapture readonly %b, float ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: add.w lr, r12, r3, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpstt @@ -262,22 +249,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %6, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %c, i32 %index diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index e2136c5c9483..77ae179851b3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -16,12 +16,10 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -40,9 +38,6 @@ vector.ph: ; preds = %entry %conv = zext i8 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -50,14 +45,8 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -92,12 +81,10 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -116,9 +103,6 @@ vector.ph: ; preds = %entry %conv = sext i16 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -126,14 +110,8 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -168,12 +146,10 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -192,9 +168,6 @@ vector.ph: ; preds = %entry %conv = zext i8 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -202,14 +175,8 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -244,12 +211,10 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -268,9 +233,6 @@ vector.ph: ; preds = %entry %conv = sext i16 %a to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %conv, i32 0 %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -278,14 +240,8 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> @@ -320,12 +276,10 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst @@ -343,9 +297,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert9 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat10 = shufflevector <4 x i32> %broadcast.splatinsert9, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %a, i32 0 %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body @@ -353,14 +304,8 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %b, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat10 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat12 @@ -413,11 +358,9 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB5_8 ; CHECK-NEXT: .LBB5_4: @ %vector.ph -; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB5_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -500,23 +443,14 @@ for.body.preheader.new: ; preds = %for.body.preheader vector.ph: ; preds = %for.body.lr.ph %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert22 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat23 = shufflevector <4 x i32> %broadcast.splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds i8, i8* %a, i32 %index - - ; %3 = icmp ule <4 x i32> %induction, %broadcast.splat20 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i8* %2 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef) %5 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -620,11 +554,9 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} ; CHECK-NEXT: .LBB6_1: @ %vector.ph -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrh.s32 q0, [r0], #8 ; CHECK-NEXT: vldrh.s32 q1, [r1], #8 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -640,23 +572,14 @@ vector.ph: ; preds = %entry %conv3 = sext i16 %c to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %a, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat13 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -711,11 +634,9 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB7_8 ; CHECK-NEXT: .LBB7_4: @ %vector.ph -; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB7_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -798,23 +719,14 @@ for.body.preheader.new: ; preds = %for.body.preheader vector.ph: ; preds = %for.body.lr.ph %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert22 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat23 = shufflevector <4 x i32> %broadcast.splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds i8, i8* %a, i32 %index - -; %3 = icmp ule <4 x i32> %induction, %broadcast.splat20 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i8* %2 to <4 x i8>* %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef) %5 = zext <4 x i8> %wide.masked.load to <4 x i32> @@ -918,11 +830,9 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrh.u32 q0, [r0], #8 ; CHECK-NEXT: vldrh.u32 q1, [r1], #8 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -938,23 +848,14 @@ vector.ph: ; preds = %entry %conv3 = sext i16 %c to i32 %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <4 x i32> %broadcast.splatinsert12, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %conv3, i32 0 %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %a, i32 %index - -; %1 = icmp ule <4 x i32> %induction, %broadcast.splat13 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> @@ -1009,11 +910,9 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB9_8 ; CHECK-NEXT: .LBB9_4: @ %vector.ph -; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB9_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r7, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vmlas.u32 q1, q0, r2 @@ -1095,23 +994,14 @@ for.body.preheader.new: ; preds = %for.body.preheader vector.ph: ; preds = %vector.memcheck %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert21 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat22 = shufflevector <4 x i32> %broadcast.splatinsert21, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert24 = insertelement <4 x i32> undef, i32 %c, i32 0 %broadcast.splat25 = shufflevector <4 x i32> %broadcast.splatinsert24, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %2 = getelementptr inbounds i32, i32* %a, i32 %index - -; %3 = icmp ule <4 x i32> %induction, %broadcast.splat22 %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i32* %2 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %3, <4 x i32> undef) %5 = getelementptr inbounds i32, i32* %b, i32 %index @@ -1202,11 +1092,9 @@ define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB10_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: vldrb.u16 q0, [r1], #8 ; CHECK-NEXT: vldrb.u16 q1, [r2], #8 ; CHECK-NEXT: vmul.i16 q0, q1, q0 @@ -1221,21 +1109,12 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - -; %1 = icmp ule <8 x i32> %induction, %broadcast.splat13 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <8 x i8>* %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = zext <8 x i8> %wide.masked.load to <8 x i16> diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll index c8d38032a6a4..6b71a070d651 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll @@ -9,9 +9,6 @@ define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readon ; CHECK: for.cond1.preheader.us.preheader: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TT:%.*]] = add i32 [[N_VEC]], -4 ; CHECK-NEXT: [[TT1:%.*]] = lshr i32 [[TT]], 2 ; CHECK-NEXT: [[TT2:%.*]] = add nuw nsw i32 [[TT1]], 1 @@ -30,9 +27,6 @@ define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readon ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TT4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT14:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TT5:%.*]] = phi i32 [ [[START]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT15:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TT6:%.*]] = getelementptr inbounds i16, i16* [[TT3]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 @@ -66,9 +60,6 @@ entry: for.cond1.preheader.us.preheader: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert28 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat29 = shufflevector <4 x i32> %broadcast.splatinsert28, <4 x i32> undef, <4 x i32> zeroinitializer %tt = add i32 %n.vec, -4 %tt1 = lshr i32 %tt, 2 %tt2 = add nuw nsw i32 %tt1, 1 @@ -88,14 +79,8 @@ vector.body: ; preds = %vector.body, %for.c %index = phi i32 [ 0, %for.cond1.preheader.us ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ %tt4, %for.cond1.preheader.us ], [ %tt14, %vector.body ] %tt5 = phi i32 [ %start, %for.cond1.preheader.us ], [ %tt15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tt6 = getelementptr inbounds i16, i16* %tt3, i32 %index - - ; %tt7 = icmp ule <4 x i32> %induction, %broadcast.splat29 %tt7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tt8 = bitcast i16* %tt6 to <4 x i16>* %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tt8, i32 2, <4 x i1> %tt7, <4 x i16> undef) %tt9 = sext <4 x i16> %wide.masked.load to <4 x i32> @@ -130,9 +115,6 @@ define void @mat_vec_i32(i32** nocapture readonly %A, i32* nocapture readonly %B ; CHECK: for.cond1.preheader.us.preheader: ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4 -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <4 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT28:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT27]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TT:%.*]] = add i32 [[N_VEC]], -4 ; CHECK-NEXT: [[TT1:%.*]] = lshr i32 [[TT]], 2 ; CHECK-NEXT: [[TT2:%.*]] = add nuw nsw i32 [[TT1]], 1 @@ -151,9 +133,6 @@ define void @mat_vec_i32(i32** nocapture readonly %A, i32* nocapture readonly %B ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TT4]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TT5:%.*]] = phi i32 [ [[START]], [[FOR_COND1_PREHEADER_US]] ], [ [[TT13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[N]], [[FOR_COND1_PREHEADER_US]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TT6:%.*]] = getelementptr inbounds i32, i32* [[TT3]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2]] = sub i32 [[TMP0]], 4 @@ -185,9 +164,6 @@ entry: for.cond1.preheader.us.preheader: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert27 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat28 = shufflevector <4 x i32> %broadcast.splatinsert27, <4 x i32> undef, <4 x i32> zeroinitializer %tt = add i32 %n.vec, -4 %tt1 = lshr i32 %tt, 2 %tt2 = add nuw nsw i32 %tt1, 1 @@ -207,14 +183,8 @@ vector.body: ; preds = %vector.body, %for.c %index = phi i32 [ 0, %for.cond1.preheader.us ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ %tt4, %for.cond1.preheader.us ], [ %tt12, %vector.body ] %tt5 = phi i32 [ %start, %for.cond1.preheader.us ], [ %tt13, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tt6 = getelementptr inbounds i32, i32* %tt3, i32 %index - - ; %tt7 = icmp ule <4 x i32> %induction, %broadcast.splat28 %tt7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tt8 = bitcast i32* %tt6 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tt8, i32 4, <4 x i1> %tt7, <4 x i32> undef) %tt9 = getelementptr inbounds i32, i32* %B, i32 %index diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index 778b50150128..7469a01500d9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -30,7 +30,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -100,7 +99,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -172,7 +170,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -242,7 +239,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -314,7 +310,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -384,7 +379,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -481,7 +475,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -510,7 +503,6 @@ middle.block: ; preds = %vector.body vector.ph47: ; preds = %middle.block %n.rnd.up48 = add i32 %N, 3 %n.vec50 = and i32 %n.rnd.up48, -4 - %trip.count.minus.154 = add i32 %N, -1 %i11 = insertelement <4 x i32> , i32 %i10, i32 0 br label %vector.body46 @@ -594,7 +586,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -719,7 +710,6 @@ lor.end: ; preds = %entry, %lor.rhs vector.ph: ; preds = %lor.end %n.rnd.up = add i32 %4, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %4, -1 %5 = insertelement <4 x i32> , i32 %0, i32 0 br label %vector.body diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll index 1492a01a272e..d10cbffe2dd2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-basic.ll @@ -22,23 +22,14 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <16 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <16 x i32> %broadcast.splatinsert10, <16 x i32> undef, <16 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer - %induction = or <16 x i32> %broadcast.splat, %tmp = getelementptr inbounds i8, i8* %a, i32 %index - -; %tmp1 = icmp ule <16 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i8* %tmp to <16 x i8>* %wide.masked.load = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %tmp2, i32 4, <16 x i1> %active.lane.mask, <16 x i8> undef) %tmp3 = getelementptr inbounds i8, i8* %b, i32 %index @@ -79,23 +70,14 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp = getelementptr inbounds i16, i16* %a, i32 %index - -; %tmp1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i16* %tmp to <8 x i16>* %wide.masked.load = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp2, i32 4, <8 x i1> %active.lane.mask, <8 x i16> undef) %tmp3 = getelementptr inbounds i16, i16* %b, i32 %index @@ -135,20 +117,13 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - ; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %tmp2 = bitcast i32* %tmp to <4 x i32>* %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -190,20 +165,13 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -262,10 +230,7 @@ vector.body: ; preds = %vector.body, %vecto %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wrong = icmp ult <4 x i32> %induction, %broadcast.splat11 %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -321,10 +286,7 @@ vector.body: ; preds = %vector.body, %vecto %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - -; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wrong = icmp ult <4 x i32> %induction, %broadcast.splat11 %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) @@ -370,7 +332,6 @@ entry: vector.ph: - %trip.count.minus.1 = add i32 %N, -1 %scevgep = getelementptr i32, i32* %A, i32 8 %scevgep30 = getelementptr i32, i32* %C, i32 8 %scevgep37 = getelementptr i32, i32* %B, i32 8 @@ -459,9 +420,7 @@ vector.body: ; preds = %vector.body, %vecto %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 42) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load @@ -495,7 +454,6 @@ entry: br i1 %cmp8, label %vector.ph, label %for.cond.cleanup vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body @@ -509,9 +467,7 @@ vector.body: ; preds = %vector.body, %vecto %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %index) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load @@ -546,7 +502,6 @@ entry: br i1 %cmp8, label %vector.ph, label %for.cond.cleanup vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll index 4682f1d36f31..1c173e9dfd1d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll @@ -10,22 +10,17 @@ define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture rea ; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i32* [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LSR_IV11:%.*]] = phi i32* [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32* [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 32003, [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[LSR_IV1416:%.*]] = bitcast i32* [[LSR_IV14]] to <4 x i32>* ; CHECK-NEXT: [[LSR_IV1113:%.*]] = bitcast i32* [[LSR_IV11]] to <4 x i32>* ; CHECK-NEXT: [[LSR_IV10:%.*]] = bitcast i32* [[LSR_IV]] to <4 x i32>* -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.vctp32(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3]] = sub i32 [[TMP1]], 4 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[LSR_IV10]], i32 4, <4 x i1> [[TMP2]], <4 x i32> undef) ; CHECK-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[LSR_IV1113]], i32 4, <4 x i1> [[TMP2]], <4 x i32> undef) ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP4]], <4 x i32>* [[LSR_IV1416]], i32 4, <4 x i1> [[TMP2]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[SCEVGEP]] = getelementptr i32, i32* [[LSR_IV]], i32 4 ; CHECK-NEXT: [[SCEVGEP12]] = getelementptr i32, i32* [[LSR_IV11]], i32 4 ; CHECK-NEXT: [[SCEVGEP15]] = getelementptr i32, i32* [[LSR_IV14]], i32 4 @@ -48,13 +43,7 @@ vector.body: %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - - ; %1 = icmp ult <4 x i32> %induction, %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -244,11 +233,7 @@ vector.body: %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %broadcast.splat, - -; Non-uniform constant vector here. This can't be represented with -; @llvm.get.active.lane.mask, but let's keep this test as a sanity check: %1 = icmp ult <4 x i32> %induction, - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -285,13 +270,8 @@ vector.body: %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - ; BTC = UINT_MAX, and scalar trip count BTC + 1 would overflow: %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 4294967295) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -328,12 +308,7 @@ vector.body: %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -371,13 +346,8 @@ vector.body: %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - ; The induction variable %N is not an IV: %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %N, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -414,12 +384,7 @@ vector.body: %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) %2 = add nsw <4 x i32> %wide.masked.load9, %wide.masked.load @@ -460,9 +425,6 @@ vector.body: %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1113 = bitcast i32* %lsr.iv11 to <4 x i32>* %lsr.iv10 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 32003) %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv10, i32 4, <4 x i1> %1, <4 x i32> undef) %wide.masked.load9 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1113, i32 4, <4 x i1> %1, <4 x i32> undef) @@ -514,10 +476,8 @@ vector.body: ; preds = %vector.body, %vecto %lsr.iv3840 = bitcast i32* %lsr.iv38 to <4 x i32>* %lsr.iv3335 = bitcast i32* %lsr.iv33 to <4 x i32>* %lsr.iv2830 = bitcast i32* %lsr.iv28 to <4 x i32>* - ; It's using %j.025, the induction variable from its outer loop: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %j.025, i32 4096) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv3840, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load27 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv3335, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %1 = add nsw <4 x i32> %wide.masked.load27, %wide.masked.load diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll index a2361f518636..73865945cdc3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll @@ -82,7 +82,6 @@ entry: br i1 %cmp8, label %vector.ph, label %for.cond.cleanup vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body @@ -92,13 +91,10 @@ vector.body: ; preds = %vector.body, %vecto %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %8, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* - %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %7 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll index c0b2a036f371..14f1d0c00204 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll @@ -27,7 +27,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -77,7 +76,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll index 5ad6d9112308..66216022d647 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll @@ -26,7 +26,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll index bd927fdcf859..024857b65802 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll @@ -26,7 +26,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -72,7 +71,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -118,7 +116,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -164,7 +161,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -210,7 +206,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -260,7 +255,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll index 98d48d49539c..0e51661e8f58 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll @@ -27,7 +27,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -77,7 +76,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %blockSize, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %blockSize, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll index ef79f27ce5dc..01bbd3ac28e2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-reduce.ll @@ -18,8 +18,6 @@ vector.ph: %tmp = add i32 %N, -1 %n.rnd.up = add i32 %tmp, 8 %n.vec = and i32 %n.rnd.up, -8 - %broadcast.splatinsert1 = insertelement <8 x i32> undef, i32 %tmp, i32 0 - %broadcast.splat2 = shufflevector <8 x i32> %broadcast.splatinsert1, <8 x i32> undef, <8 x i32> zeroinitializer %0 = add i32 %n.vec, -8 %1 = lshr i32 %0, 3 %2 = add i32 %1, 1 @@ -30,14 +28,8 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp8, %vector.body ] %3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index - - ; %tmp3 = icmp ule <8 x i32> %induction, %broadcast.splat2 %tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp4 = bitcast i16* %tmp2 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef) %tmp5 = getelementptr inbounds i16, i16* %B, i32 %index @@ -87,8 +79,6 @@ vector.ph: %tmp = add i32 %N, -1 %n.rnd.up = add nuw nsw i32 %tmp, 8 %n.vec = and i32 %n.rnd.up, -8 - %broadcast.splatinsert1 = insertelement <8 x i32> undef, i32 %tmp, i32 0 - %broadcast.splat2 = shufflevector <8 x i32> %broadcast.splatinsert1, <8 x i32> undef, <8 x i32> zeroinitializer %broadcast.splatinsert3 = insertelement <8 x i16> undef, i16 %B, i32 0 %broadcast.splat4 = shufflevector <8 x i16> %broadcast.splatinsert3, <8 x i16> undef, <8 x i32> zeroinitializer %0 = add i32 %n.vec, -8 @@ -101,14 +91,8 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph], [ %tmp6, %vector.body ] %3 = phi i32 [ %start, %vector.ph], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index - - ; %tmp3 = icmp ule <8 x i32> %induction, %broadcast.splat2 %tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp4 = bitcast i16* %tmp2 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef) %tmp5 = add <8 x i16> %vec.phi, %broadcast.splat4 @@ -151,8 +135,6 @@ entry: %tmp = add i32 %N, -1 %n.rnd.up = add nuw nsw i32 %tmp, 8 %n.vec = and i32 %n.rnd.up, -8 - %broadcast.splatinsert1 = insertelement <8 x i32> undef, i32 %tmp, i32 0 - %broadcast.splat2 = shufflevector <8 x i32> %broadcast.splatinsert1, <8 x i32> undef, <8 x i32> zeroinitializer %broadcast.splatinsert3 = insertelement <8 x i16> undef, i16 %B, i32 0 %broadcast.splat4 = shufflevector <8 x i16> %broadcast.splatinsert3, <8 x i16> undef, <8 x i32> zeroinitializer %0 = add i32 %n.vec, -8 @@ -165,14 +147,8 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %entry], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %entry], [ %tmp6, %vector.body ] %3 = phi i32 [ %start, %entry ], [ %4, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp2 = getelementptr inbounds i16, i16* %A, i32 %index - - ; %tmp3 = icmp ule <8 x i32> %induction, %broadcast.splat2 %tmp3 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp4 = bitcast i16* %tmp2 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp4, i32 4, <8 x i1> %tmp3, <8 x i16> undef) %tmp5 = add <8 x i16> %vec.phi, %broadcast.splat4 @@ -227,7 +203,6 @@ for.body: br i1 %cmp433, label %vector.ph, label %for.end vector.ph: ; preds = %for.body - %trip.count.minus.1 = add i32 %8, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %7) br label %vector.body diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll index 939d3cc5e558..c9b2905755ed 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll @@ -14,23 +14,14 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp = getelementptr inbounds i16, i16* %a, i32 %index - - ; %tmp1 = icmp ule <8 x i32> %induction, %broadcast.splat11 - %tmp1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - + %tmp1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) %tmp2 = bitcast i16* %tmp to <8 x i16>* %wide.masked.load = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp2, i32 4, <8 x i1> %tmp1, <8 x i16> undef) %tmp3 = getelementptr inbounds i16, i16* %b, i32 %index @@ -72,8 +63,6 @@ entry: vector.ph: ; preds = %entry %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <8 x i32> %broadcast.splatinsert10, <8 x i32> undef, <8 x i32> zeroinitializer %broadcast.splatinsert10.store = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 %broadcast.splat11.store = shufflevector <4 x i32> %broadcast.splatinsert10.store, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) @@ -83,14 +72,8 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %store.idx = phi i32 [ 0, %vector.ph ], [ %store.idx.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %tmp = getelementptr inbounds i16, i16* %a, i32 %index - - ; %tmp1 = icmp ule <8 x i32> %induction, %broadcast.splat11 %tmp1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i16* %tmp to <8 x i16>* %wide.masked.load = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp2, i32 4, <8 x i1> %tmp1, <8 x i16> undef) %tmp3 = getelementptr inbounds i16, i16* %b, i32 %index @@ -136,23 +119,14 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp13) br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %tmp14 = phi i32 [ %start, %vector.ph ], [ %tmp15, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %tmp = getelementptr inbounds i32, i32* %a, i32 %index - - ; %tmp1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %tmp1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %tmp2 = bitcast i32* %tmp to <4 x i32>* %wide.masked.load = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %tmp2, i32 4, <4 x i1> %tmp1, <4 x i32> undef) %tmp3 = getelementptr inbounds i32, i32* %b, i32 %index diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll index 1ea183d4a5ff..af5c76fd4477 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -183,7 +183,6 @@ for.body: ; preds = %for.end, %for.body. br i1 %cmp433, label %vector.ph, label %for.end vector.ph: ; preds = %for.body - %trip.count.minus.1 = add i32 %i8, -1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %i7) br label %vector.body diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll index 6b4113ea4bb5..02f65d240c06 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -11,27 +11,25 @@ define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* no ; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: adds r3, r2, #3 -; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: sub.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 +; CHECK-NEXT: vldrwt.u32 q1, [r0], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 -; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: vmul.i32 q0, q2, q0 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vmul.i32 q1, q2, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} entry: @@ -41,22 +39,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %6, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %a, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat12 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = getelementptr inbounds i32, i32* %b, i32 %index @@ -93,7 +82,6 @@ define dso_local i32 @mul_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r1, #4 ; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -101,7 +89,6 @@ define dso_local i32 @mul_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 -; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: le lr, .LBB1_2 @@ -116,22 +103,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert9 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat10 = shufflevector <4 x i32> %broadcast.splatinsert9, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %a, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat10 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %vec.phi @@ -164,7 +142,6 @@ define dso_local i32 @add_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: subs r1, #4 ; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -172,7 +149,6 @@ define dso_local i32 @add_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 -; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: le lr, .LBB2_2 @@ -187,22 +163,13 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert9 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat10 = shufflevector <4 x i32> %broadcast.splatinsert9, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %a, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat10 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %vec.phi @@ -228,11 +195,9 @@ define dso_local void @vector_mul_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 @@ -246,23 +211,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %c, i32 0 %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat11 @@ -285,11 +241,9 @@ define dso_local void @vector_add_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB4_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r0], #16 @@ -303,23 +257,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert8 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %c, i32 0 %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, %0 = getelementptr inbounds i32, i32* %b, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat9 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %broadcast.splat11 @@ -342,11 +287,9 @@ define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i8(i8* noalias nocaptur ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB5_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.8 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #16 ; CHECK-NEXT: vldrb.u8 q0, [r1], #16 ; CHECK-NEXT: vldrb.u8 q1, [r2], #16 ; CHECK-NEXT: vmul.i8 q0, q1, q0 @@ -361,21 +304,12 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <16 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <16 x i32> %broadcast.splatinsert12, <16 x i32> undef, <16 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer - %induction = add <16 x i32> %broadcast.splat, %0 = getelementptr inbounds i8, i8* %b, i32 %index - - ; %1 = icmp ule <16 x i32> %induction, %broadcast.splat13 %1 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <16 x i8>* %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %2, i32 1, <16 x i1> %1, <16 x i8> undef) %3 = getelementptr inbounds i8, i8* %c, i32 %index @@ -402,11 +336,9 @@ define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(i16* noalias nocapt ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} ; CHECK-NEXT: .LBB6_1: @ %vector.ph -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vldrh.u16 q1, [r2], #16 ; CHECK-NEXT: vmul.i16 q0, q1, q0 @@ -421,21 +353,12 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %N, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert12 = insertelement <8 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat13 = shufflevector <8 x i32> %broadcast.splatinsert12, <8 x i32> undef, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <8 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer - %induction = add <8 x i32> %broadcast.splat, %0 = getelementptr inbounds i16, i16* %b, i32 %index - - ; %1 = icmp ule <8 x i32> %induction, %broadcast.splat13 %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <8 x i16>* %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %2, i32 2, <8 x i1> %1, <8 x i16> undef) %3 = getelementptr inbounds i16, i16* %c, i32 %index diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll index e8da32611be2..ec6a7554b3e6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll @@ -30,9 +30,6 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.ph vector.ph: ; preds = %entry - %trip.count.minus.1 = add i32 %N, -1 - %broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat12 = shufflevector <4 x i32> %broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer %start = call i32 @llvm.start.loop.iterations.i32(i32 %5) br label %vector.body @@ -44,13 +41,7 @@ vector.body: ; preds = %vector.body, %vecto %6 = phi i32 [ %start, %vector.ph ], [ %10, %vector.body ] %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>* %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = add <4 x i32> %broadcast.splat, - - ; %7 = icmp ule <4 x i32> %induction, %broadcast.splat12 %7 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %7, <4 x i32> undef) %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %7, <4 x i32> undef) %8 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll index 7609c16ea84c..a34a278103a0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -11,11 +11,9 @@ define arm_aapcs_vfpcc void @fmas1(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 @@ -30,23 +28,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -74,11 +63,9 @@ define arm_aapcs_vfpcc void @fmas2(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 @@ -93,23 +80,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -138,11 +116,9 @@ define arm_aapcs_vfpcc void @fma1(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 @@ -157,23 +133,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -201,11 +168,9 @@ define arm_aapcs_vfpcc void @fma2(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 @@ -220,23 +185,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat13 = shufflevector <4 x float> %broadcast.splatinsert12, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = fmul fast <4 x float> %wide.masked.load, %broadcast.splat13 @@ -265,12 +221,10 @@ define arm_aapcs_vfpcc void @fmss1(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: eor r12, r12, #-2147483648 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vfmas.f32 q1, q0, r12 @@ -286,23 +240,14 @@ vector.ph: ; preds = %entry %fneg = fneg fast float %a %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %fneg, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -329,14 +274,12 @@ define arm_aapcs_vfpcc void @fmss2(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB5_1: @ %vector.ph -; CHECK-NEXT: vmov lr, s0 -; CHECK-NEXT: vdup.32 q0, lr +; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vdup.32 q0, r12 ; CHECK-NEXT: vneg.f32 q0, q0 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB5_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -352,23 +295,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -398,11 +332,9 @@ define arm_aapcs_vfpcc void @fmss3(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB6_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -418,23 +350,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -464,11 +387,9 @@ define arm_aapcs_vfpcc void @fmss4(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: .LBB7_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB7_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 @@ -484,23 +405,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -529,12 +441,10 @@ define arm_aapcs_vfpcc void @fms1(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: eor r12, r12, #-2147483648 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q0, r12 @@ -550,23 +460,14 @@ vector.ph: ; preds = %entry %fneg = fneg fast float %a %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %fneg, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -595,11 +496,9 @@ define arm_aapcs_vfpcc void @fms2(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: .LBB9_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vdup.32 q0, r4 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vfms.f32 q2, q1, q0 @@ -614,23 +513,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -659,16 +549,14 @@ define arm_aapcs_vfpcc void @fms3(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB10_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB10_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0], #16 -; CHECK-NEXT: vldrw.u32 q1, [r1], #16 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vneg.f32 q1, q1 -; CHECK-NEXT: vfma.f32 q1, q0, r12 -; CHECK-NEXT: vstrw.32 q1, [r2], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vneg.f32 q0, q0 +; CHECK-NEXT: vfma.f32 q0, q1, r12 +; CHECK-NEXT: vstrw.32 q0, [r2], #16 ; CHECK-NEXT: letp lr, .LBB10_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -679,23 +567,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert13 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = getelementptr inbounds float, float* %y, i32 %index @@ -724,16 +603,14 @@ define arm_aapcs_vfpcc void @fms4(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB11_1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 -; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB11_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0], #16 -; CHECK-NEXT: vldrw.u32 q1, [r1], #16 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vneg.f32 q1, q1 -; CHECK-NEXT: vfma.f32 q1, q0, r12 -; CHECK-NEXT: vstrw.32 q1, [r2], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vneg.f32 q0, q0 +; CHECK-NEXT: vfma.f32 q0, q1, r12 +; CHECK-NEXT: vstrw.32 q0, [r2], #16 ; CHECK-NEXT: letp lr, .LBB11_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -744,23 +621,14 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 - %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %trip.count.minus.1, i32 0 - %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer %broadcast.splatinsert12 = insertelement <4 x float> undef, float %a, i32 0 %broadcast.splat13 = shufflevector <4 x float> %broadcast.splatinsert12, <4 x float> undef, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 - %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer - %induction = or <4 x i32> %broadcast.splat, %0 = getelementptr inbounds float, float* %x, i32 %index - - ; %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast float* %0 to <4 x float>* %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) %3 = fmul fast <4 x float> %wide.masked.load, %broadcast.splat13 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll index c4f68959ecf4..a2d1f2a9db68 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -244,7 +244,6 @@ define arm_aapcs_vfpcc void @gather_pre_inc(i32* noalias nocapture readonly %dat ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 ; CHECK-NEXT: .long 0 @ 0x0 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -288,7 +287,6 @@ define arm_aapcs_vfpcc void @gather_post_inc(i32* noalias nocapture readonly %da ; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 vector.ph41: ; preds = %for.body6.preheader - %ind.end47 = shl i32 %n.vec43, 1 br label %vector.body39 vector.body39: ; preds = %vector.body39, %vector.ph41 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll index 4e971542bf75..1617ce36d4ca 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll @@ -1,18 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py - - -; RUN: opt --arm-mve-gather-scatter-lowering -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -S -o 2>/dev/null - | FileCheck %s +; RUN: opt --arm-mve-gather-scatter-lowering -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -S -o - | FileCheck %s define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC:%.*]], 1 ; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> , ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[PUSHEDOUTADD]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY_END]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 50 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 48 ; CHECK-NEXT: br i1 [[TMP0]], label [[LOWER_BLOCK:%.*]], label [[END:%.*]] ; CHECK: lower.block: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[DATA:%.*]], <4 x i32> [[VEC_IND]], i32 32, i32 2, i32 1) @@ -23,20 +20,19 @@ define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture reado ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: br label [[VECTOR_BODY_END]] ; CHECK: vector.body.end: -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP4]], label [[END]], label [[VECTOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; vector.ph: - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body.end ] %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body.end ] - %0 = icmp eq i32 %index, 50 + %0 = icmp eq i32 %index, 48 br i1 %0, label %lower.block, label %end lower.block: ; preds = %vector.body @@ -61,7 +57,6 @@ end: define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_block( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC:%.*]], 1 ; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> , ; CHECK-NEXT: [[PRODUCT:%.*]] = mul <4 x i32> , ; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], @@ -69,7 +64,7 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture reado ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[PUSHEDOUTADD]], [[VECTOR_PH]] ], [ [[INCREMENTPUSHEDOUTMUL:%.*]], [[VECTOR_BODY_END]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 50 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX]], 48 ; CHECK-NEXT: br i1 [[TMP0]], label [[LOWER_BLOCK:%.*]], label [[END:%.*]] ; CHECK: lower.block: ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* [[DATA:%.*]], <4 x i32> [[VEC_IND]], i32 32, i32 2, i32 1) @@ -80,20 +75,19 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture reado ; CHECK-NEXT: br label [[VECTOR_BODY_END]] ; CHECK: vector.body.end: ; CHECK-NEXT: [[INCREMENTPUSHEDOUTMUL]] = add <4 x i32> [[VEC_IND]], [[PRODUCT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP4]], label [[END]], label [[VECTOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; vector.ph: - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body.end ] %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body.end ] - %0 = icmp eq i32 %index, 50 + %0 = icmp eq i32 %index, 48 br i1 %0, label %lower.block, label %end lower.block: ; preds = %vector.body @@ -120,7 +114,6 @@ end: define arm_aapcs_vfpcc void @push_out_mul_sub_loop(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_loop( ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC:%.*]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ] @@ -138,19 +131,18 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(i32* noalias nocapture readon ; CHECK-NEXT: br label [[VECTOR_2_BODY_END:%.*]] ; CHECK: vector.2.body.end: ; CHECK-NEXT: [[INDEX_2_NEXT:%.*]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_2_NEXT]], 15 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_2_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_BODY_END]], label [[VECTOR_2_BODY]] ; CHECK: vector.body.end: ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]] ; CHECK-NEXT: br i1 [[TMP6]], label [[END:%.*]], label [[VECTOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; vector.ph: - %ind.end = shl i32 %n.vec, 2 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -162,7 +154,6 @@ vector.2.ph: br label %vector.2.body vector.2.body: ; preds = %vector.body - %index.2 = phi i32 [ 0, %vector.2.ph ], [ %index.2.next, %vector.2.body.end ] %0 = mul <4 x i32> %vec.ind, %1 = add <4 x i32> %0, %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 @@ -174,7 +165,7 @@ vector.2.body: ; preds = %vector.body vector.2.body.end: ; preds = %lower.block %index.2.next = add i32 %index, 4 - %5 = icmp eq i32 %index.2.next, 15 + %5 = icmp eq i32 %index.2.next, 16 br i1 %5, label %vector.body.end, label %vector.2.body vector.body.end: ; preds = %lower.block diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index cfed9ccaebae..15c5291ca36d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -39,7 +39,6 @@ define arm_aapcs_vfpcc void @push_out_mul_gather(i32* noalias nocapture readonly ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -83,7 +82,6 @@ define arm_aapcs_vfpcc void @push_out_add_gather(i32* noalias nocapture readonly ; CHECK-NEXT: .long 16 @ 0x10 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -127,7 +125,6 @@ define arm_aapcs_vfpcc void @push_out_mul_add_gather(i32* noalias nocapture read ; CHECK-NEXT: .long 0 @ 0x0 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -173,7 +170,6 @@ define arm_aapcs_vfpcc void @push_out_mul_scatter(i32* noalias nocapture readonl <4 x i32> %to.store) { vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -215,7 +211,6 @@ define arm_aapcs_vfpcc void @push_out_add_scatter(i32* noalias nocapture readonl <4 x i32> %to.store) { vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -259,7 +254,6 @@ define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(i32* noalias nocapture i32* noalias nocapture %dst, i32 %n.vec) { vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -301,7 +295,6 @@ define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture reado ; CHECK-NEXT: .long 16 @ 0x10 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -358,7 +351,6 @@ define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %d ; CHECK-NEXT: .long 6 @ 0x6 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -411,7 +403,6 @@ define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %d ; CHECK-NEXT: .long 6 @ 0x6 vector.ph: ; preds = %for.body.preheader - %ind.end = shl i32 %n.vec, 1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -528,7 +519,6 @@ for.cond8.preheader.us.us.preheader.preheader: ; preds = %entry %2 = add nuw i32 %1, 1 %min.iters.check = icmp ult i32 %0, 6 %n.vec = and i32 %2, -4 - %ind.end = shl i32 %n.vec, 1 %broadcast.splatinsert86 = insertelement <4 x i32> undef, i32 %m, i32 0 %broadcast.splat87 = shufflevector <4 x i32> %broadcast.splatinsert86, <4 x i32> undef, <4 x i32> zeroinitializer %cmp.n = icmp eq i32 %2, %n.vec @@ -978,7 +968,6 @@ for.body10.i: ; preds = %for.cond.cleanup20. br i1 0, label %for.cond.cleanup20.i, label %for.cond22.preheader.lr.ph.i for.cond22.preheader.lr.ph.i: ; preds = %for.body10.i - %ind.end = add nsw i32 0, %n.vec %.splatinsert = insertelement <4 x i32> undef, i32 0, i32 0 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %.splat, diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll index 728328ac9cba..8ae094fb66e6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -1711,7 +1711,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1762,7 +1761,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1816,7 +1814,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1868,7 +1865,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1924,7 +1920,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -1976,7 +1971,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2032,7 +2026,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2083,7 +2076,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2137,7 +2129,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2189,7 +2180,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2245,7 +2235,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2296,7 +2285,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 15 %n.vec = and i32 %n.rnd.up, -16 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2352,7 +2340,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2406,7 +2393,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 3 %n.vec = and i32 %n.rnd.up, -4 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -2465,7 +2451,6 @@ entry: vector.ph: ; preds = %entry %n.rnd.up = add i32 %n, 7 %n.vec = and i32 %n.rnd.up, -8 - %trip.count.minus.1 = add i32 %n, -1 br label %vector.body vector.body: ; preds = %vector.body, %vector.ph From f83afe6ae9613512bd8f30090d8c287292b55dcd Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 10 Jun 2021 20:18:12 +0100 Subject: [PATCH 315/318] [ARM] Ensure instructions are simplified prior to GatherScatter lowering. Surprisingly, not all instructions are always simplified after unrolling and before MVE gather/scatter lowering. Notably dead gather operations can be left around which cause the gather/scatter lowering pass to crash if there are multiple gathers, some of which are dead. This patch ensures they are simplified before we modify anything, which can change some of the existing tests, including making them no-longer test what they originally tested. This uses a combination of disabling the gather/scatter lowering pass and adjusting the test to keep them as before. Differential Revision: https://reviews.llvm.org/D103150 --- .../Target/ARM/MVEGatherScatterLowering.cpp | 2 + .../Thumb2/LowOverheadLoops/remat-vctp.ll | 2 +- llvm/test/CodeGen/Thumb2/lsll0.ll | 2 +- .../Thumb2/mve-gather-scatter-optimisation.ll | 74 +++++++++++-------- llvm/test/CodeGen/Thumb2/mve-gather-unused.ll | 38 ++++++++++ llvm/test/CodeGen/Thumb2/mve-phireg.ll | 2 +- llvm/test/CodeGen/Thumb2/mve-pred-xor.ll | 4 +- llvm/test/CodeGen/Thumb2/mve-selectcc.ll | 2 +- llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll | 22 ++---- 9 files changed, 96 insertions(+), 52 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-gather-unused.ll diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 039f6f2053d8..195622cfd586 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -1166,6 +1166,8 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) { bool Changed = false; for (BasicBlock &BB : F) { + SimplifyInstructionsInBlock(&BB); + for (Instruction &I : BB) { IntrinsicInst *II = dyn_cast(&I); if (II && II->getIntrinsicID() == Intrinsic::masked_gather && diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll index f334a5950acb..e809eea1e0c2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -enable-arm-maskedgatscat=false %s -o - | FileCheck %s define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { ; CHECK-LABEL: remat_vctp: diff --git a/llvm/test/CodeGen/Thumb2/lsll0.ll b/llvm/test/CodeGen/Thumb2/lsll0.ll index 30d2edecbd28..4e9735be077a 100644 --- a/llvm/test/CodeGen/Thumb2/lsll0.ll +++ b/llvm/test/CodeGen/Thumb2/lsll0.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -enable-arm-maskedgatscat=false -verify-machineinstrs %s -o - | FileCheck %s define void @_Z4loopPxS_iS_i(i64* %d) { ; CHECK-LABEL: _Z4loopPxS_iS_i: diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index 15c5291ca36d..672fa16fc4c1 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -321,26 +321,29 @@ end: ret void; } -define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) { ; CHECK-LABEL: non_gatscat_use1: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: adr r3, .LCPI7_0 -; CHECK-NEXT: vmov.i32 q0, #0x8 -; CHECK-NEXT: vldrw.u32 q2, [r3] +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: adr.w r12, .LCPI7_0 +; CHECK-NEXT: vmov.i32 q0, #0x9 +; CHECK-NEXT: vldrw.u32 q3, [r12] ; CHECK-NEXT: vmov.i32 q1, #0xc +; CHECK-NEXT: vmov.i32 q2, #0x8 ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vadd.i32 q3, q2, q0 -; CHECK-NEXT: vmlas.u32 q2, q1, r0 -; CHECK-NEXT: vldrw.u32 q4, [q2, #24] +; CHECK-NEXT: vadd.i32 q4, q3, q2 +; CHECK-NEXT: vmul.i32 q5, q3, q0 +; CHECK-NEXT: vmlas.u32 q3, q1, r0 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q2, q3 -; CHECK-NEXT: vstrb.8 q4, [r1], #16 +; CHECK-NEXT: vldrw.u32 q6, [q3, #24] +; CHECK-NEXT: vmov q3, q4 +; CHECK-NEXT: vstrw.32 q5, [r3] +; CHECK-NEXT: vstrb.8 q6, [r1], #16 ; CHECK-NEXT: bne .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %end -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: @@ -364,6 +367,7 @@ vector.body: ; preds = %vector.body, %vecto %4 = bitcast i32* %3 to <4 x i32>* store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 %non_gatscat_use = mul <4 x i32> %0, + store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, %5 = icmp eq i32 %index.next, %n.vec @@ -373,26 +377,31 @@ end: ret void; } -define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) { ; CHECK-LABEL: non_gatscat_use2: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: adr r3, .LCPI8_0 -; CHECK-NEXT: vmov.i32 q0, #0x8 -; CHECK-NEXT: vldrw.u32 q2, [r3] -; CHECK-NEXT: vmov.i32 q1, #0xc +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: adr.w r12, .LCPI8_0 +; CHECK-NEXT: vmov.i32 q0, #0x12 +; CHECK-NEXT: vldrw.u32 q4, [r12] +; CHECK-NEXT: vmov.i32 q1, #0x9 +; CHECK-NEXT: vmov.i32 q2, #0x8 +; CHECK-NEXT: vmov.i32 q3, #0xc ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vadd.i32 q3, q2, q0 -; CHECK-NEXT: vmlas.u32 q2, q1, r0 -; CHECK-NEXT: vldrw.u32 q4, [q2, #24] +; CHECK-NEXT: vadd.i32 q5, q4, q2 +; CHECK-NEXT: vmul.i32 q6, q4, q1 +; CHECK-NEXT: vmlas.u32 q4, q3, r0 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q2, q3 -; CHECK-NEXT: vstrb.8 q4, [r1], #16 +; CHECK-NEXT: vldrw.u32 q7, [q4, #24] +; CHECK-NEXT: vadd.i32 q4, q6, q0 +; CHECK-NEXT: vstrw.32 q4, [r3] +; CHECK-NEXT: vmov q4, q5 +; CHECK-NEXT: vstrb.8 q7, [r1], #16 ; CHECK-NEXT: bne .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %end -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: @@ -416,6 +425,7 @@ vector.body: ; preds = %vector.body, %vecto %4 = bitcast i32* %3 to <4 x i32>* store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 %non_gatscat_use = mul <4 x i32> %1, + store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, %5 = icmp eq i32 %index.next, %n.vec @@ -844,12 +854,12 @@ define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly ; CHECK-NEXT: add.w r8, r7, #10 ; CHECK-NEXT: adr r7, .LCPI11_0 ; CHECK-NEXT: ldr r1, [sp, #96] -; CHECK-NEXT: vdup.32 q1, r2 -; CHECK-NEXT: vldrw.u32 q0, [r7] +; CHECK-NEXT: vdup.32 q0, r2 +; CHECK-NEXT: vldrw.u32 q1, [r7] ; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: mov.w r9, #6 ; CHECK-NEXT: movs r6, #11 -; CHECK-NEXT: vshl.i32 q1, q1, #2 +; CHECK-NEXT: vshl.i32 q0, q0, #2 ; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: .LBB11_1: @ %for.body10.i ; CHECK-NEXT: @ =>This Loop Header: Depth=1 @@ -884,10 +894,10 @@ define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly ; CHECK-NEXT: mul r4, r11, r6 ; CHECK-NEXT: vdup.32 q3, r5 ; CHECK-NEXT: vdup.32 q2, r7 -; CHECK-NEXT: vadd.i32 q4, q0, r4 +; CHECK-NEXT: vadd.i32 q4, q1, r4 ; CHECK-NEXT: vmla.u32 q3, q4, r2 ; CHECK-NEXT: adds r4, #113 -; CHECK-NEXT: vadd.i32 q4, q0, r4 +; CHECK-NEXT: vadd.i32 q4, q1, r4 ; CHECK-NEXT: mov r4, r8 ; CHECK-NEXT: vmla.u32 q2, q4, r2 ; CHECK-NEXT: .LBB11_5: @ %vector.body @@ -897,8 +907,8 @@ define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly ; CHECK-NEXT: @ Parent Loop BB11_4 Depth=4 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=5 ; CHECK-NEXT: vldrb.s32 q6, [r0, q2] -; CHECK-NEXT: vadd.i32 q5, q2, q1 -; CHECK-NEXT: vadd.i32 q4, q3, q1 +; CHECK-NEXT: vadd.i32 q5, q2, q0 +; CHECK-NEXT: vadd.i32 q4, q3, q0 ; CHECK-NEXT: subs r4, #4 ; CHECK-NEXT: vadd.i32 q2, q6, r2 ; CHECK-NEXT: vldrb.s32 q6, [r1, q3] diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-unused.ll b/llvm/test/CodeGen/Thumb2/mve-gather-unused.ll new file mode 100644 index 000000000000..b8d732b709cb --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-gather-unused.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s + +; This files has some unused gathers, making sure that they do not cause +; problems as the function gets simplified. + +define arm_aapcs_vfpcc void @unused1(<4 x i32*> %offs) { +; CHECK-LABEL: unused1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + ret void +} + +define arm_aapcs_vfpcc void @unused2(<4 x i32*> %offs) { +; CHECK-LABEL: unused2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %gather1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + %gather2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + ret void +} + +define arm_aapcs_vfpcc void @unused2_used(<4 x i32*> %offs) { +; CHECK-LABEL: unused2_used: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %gather1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + %gather2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> , <4 x i32> undef) + %unused = add <4 x i32> %gather1, %gather2 + ret void +} + + +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll index 252f9c6439ec..d910d1929315 100644 --- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll +++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat=false -verify-machineinstrs %s -o - | FileCheck %s ; verify-machineinstrs previously caught the incorrect use of QPR in the stack reloads. diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll index 6c13200a2d55..5d947e86d183 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -170,8 +170,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpugez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpugez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i32 ne, q0, zr -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vcmp.i32 eq, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-selectcc.ll b/llvm/test/CodeGen/Thumb2/mve-selectcc.ll index b4f5d8d8fa3f..9e2dc568a086 100644 --- a/llvm/test/CodeGen/Thumb2/mve-selectcc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-selectcc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat=false -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK define arm_aapcs_vfpcc <4 x i32> @test_v4i32(i32 %x, <4 x i32> %s0, <4 x i32> %s1) { ; CHECK-LABEL: test_v4i32: diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll index 6722aa706769..fee5fd9af694 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll @@ -70,8 +70,6 @@ entry: define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) { ; CHECK-LABEL: vqdmulh_i16_c: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q2, q0 ; CHECK-NEXT: vmov.u16 r0, q0[2] ; CHECK-NEXT: vmov.u16 r1, q0[0] @@ -87,35 +85,32 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) { ; CHECK-NEXT: vmov q3[3], q3[1], r1, r0 ; CHECK-NEXT: vmov.u16 r1, q2[4] ; CHECK-NEXT: vmullb.s16 q0, q3, q0 -; CHECK-NEXT: vmov.i32 q3, #0x7fff ; CHECK-NEXT: vshl.i32 q0, q0, #10 ; CHECK-NEXT: vshr.s32 q0, q0, #10 -; CHECK-NEXT: vshr.s32 q0, q0, #15 -; CHECK-NEXT: vmin.s32 q4, q0, q3 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vshr.s32 q3, q0, #15 +; CHECK-NEXT: vmov r0, s12 ; CHECK-NEXT: vmov.16 q0[0], r0 -; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: vmov r0, s13 ; CHECK-NEXT: vmov.16 q0[1], r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r0, s14 ; CHECK-NEXT: vmov.16 q0[2], r0 -; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r0, s15 ; CHECK-NEXT: vmov.16 q0[3], r0 ; CHECK-NEXT: vmov.u16 r0, q2[6] -; CHECK-NEXT: vmov q4[2], q4[0], r1, r0 +; CHECK-NEXT: vmov q3[2], q3[0], r1, r0 ; CHECK-NEXT: vmov.u16 r0, q2[7] ; CHECK-NEXT: vmov.u16 r1, q2[5] -; CHECK-NEXT: vmov q4[3], q4[1], r1, r0 +; CHECK-NEXT: vmov q3[3], q3[1], r1, r0 ; CHECK-NEXT: vmov.u16 r0, q1[6] ; CHECK-NEXT: vmov.u16 r1, q1[4] ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0 ; CHECK-NEXT: vmov.u16 r0, q1[7] ; CHECK-NEXT: vmov.u16 r1, q1[5] ; CHECK-NEXT: vmov q2[3], q2[1], r1, r0 -; CHECK-NEXT: vmullb.s16 q1, q2, q4 +; CHECK-NEXT: vmullb.s16 q1, q2, q3 ; CHECK-NEXT: vshl.i32 q1, q1, #10 ; CHECK-NEXT: vshr.s32 q1, q1, #10 ; CHECK-NEXT: vshr.s32 q1, q1, #15 -; CHECK-NEXT: vmin.s32 q1, q1, q3 ; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vmov.16 q0[4], r0 ; CHECK-NEXT: vmov r0, s5 @@ -124,7 +119,6 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) { ; CHECK-NEXT: vmov.16 q0[6], r0 ; CHECK-NEXT: vmov r0, s7 ; CHECK-NEXT: vmov.16 q0[7], r0 -; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %l2 = sext <8 x i16> %s0 to <8 x i22> From d29ae443aa4028ca9cc274cd1496f7d80f34a38a Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 10 Jun 2021 21:53:04 +0100 Subject: [PATCH 316/318] [ARM] Fix Changed status in MVEGatherScatterLoweringPass. Now that we are calling SimplifyInstructionsInBlock, make sure we update Changed when it reports alterations. --- llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 195622cfd586..56823735e2d9 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -1166,7 +1166,7 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) { bool Changed = false; for (BasicBlock &BB : F) { - SimplifyInstructionsInBlock(&BB); + Changed |= SimplifyInstructionsInBlock(&BB); for (Instruction &I : BB) { IntrinsicInst *II = dyn_cast(&I); From b7c7b42db1d16c4cab595bf53c62a70a6a505e0e Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 29 Apr 2021 07:44:04 +0100 Subject: [PATCH 317/318] [ARM] Use just ARM::t2B in ARMBlockPlacementPass The ARMConstantIsland pass will convert any t2B to tB if they are within range after it has added or moved any constant pools. They don't need to be deliberately converted beforehand, and it doesn't deal with needing to convert tB to t2B very well. --- llvm/lib/Target/ARM/ARMBlockPlacement.cpp | 7 ++----- llvm/test/CodeGen/Thumb2/block-placement.mir | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp index 581b4b9857af..9ba16003a97a 100644 --- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -145,8 +145,7 @@ bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) { It++) { MachineBasicBlock *MBB = &*It; for (auto &Terminator : MBB->terminators()) { - if (Terminator.getOpcode() != ARM::t2LoopEnd && - Terminator.getOpcode() != ARM::t2LoopEndDec) + if (Terminator.getOpcode() != ARM::t2LoopEndDec) continue; MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB(); // The LE will become forwards branching if it branches to LoopExit @@ -204,10 +203,8 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB, if (!Terminator.isUnconditionalBranch()) { // The BB doesn't have an unconditional branch so it relied on // fall-through. Fix by adding an unconditional branch to the moved BB. - unsigned BrOpc = - BBUtils->isBBInRange(&Terminator, To, 254) ? ARM::tB : ARM::t2B; MachineInstrBuilder MIB = - BuildMI(From, Terminator.getDebugLoc(), TII->get(BrOpc)); + BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B)); MIB.addMBB(To); MIB.addImm(ARMCC::CondCodes::AL); MIB.addReg(ARM::NoRegister); diff --git a/llvm/test/CodeGen/Thumb2/block-placement.mir b/llvm/test/CodeGen/Thumb2/block-placement.mir index ed4a0a6b493d..c38223baba76 100644 --- a/llvm/test/CodeGen/Thumb2/block-placement.mir +++ b/llvm/test/CodeGen/Thumb2/block-placement.mir @@ -48,7 +48,7 @@ body: | ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: t2WhileLoopStart killed renamable $r0, %bb.1, implicit-def dead $cpsr - ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.3: @@ -145,7 +145,7 @@ body: | ; CHECK: $lr = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2WhileLoopStart killed renamable $lr, %bb.1, implicit-def dead $cpsr - ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: successors: %bb.4(0x80000000) ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr From fed41342a82f5a3a9201819a82bf7a48313e296b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 28 Jun 2021 09:23:38 -0700 Subject: [PATCH 318/318] Revert "Revert "[Coverage] Fix branch coverage merging in FunctionCoverageSummary::get() for instantiation"" This reverts commit 33d312b2d731507327252fd597bac1b738870330. The original patch was correct, so we need to restore it in the release branch. --- llvm/test/tools/llvm-cov/branch-templates.cpp | 16 +++++++++++++++- llvm/tools/llvm-cov/CoverageSummaryInfo.cpp | 6 +----- llvm/tools/llvm-cov/CoverageSummaryInfo.h | 5 +++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/llvm/test/tools/llvm-cov/branch-templates.cpp b/llvm/test/tools/llvm-cov/branch-templates.cpp index 750dc7bd58f2..4797428f8835 100644 --- a/llvm/test/tools/llvm-cov/branch-templates.cpp +++ b/llvm/test/tools/llvm-cov/branch-templates.cpp @@ -1,9 +1,9 @@ // RUN: llvm-profdata merge %S/Inputs/branch-templates.proftext -o %t.profdata // RUN: llvm-cov show --show-expansions --show-branches=count %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s // RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -show-functions -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORT +// RUN: llvm-cov report --show-branch-summary %S/Inputs/branch-templates.o32l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s -check-prefix=REPORTFILE #include - template void unused(T x) { return; @@ -45,3 +45,17 @@ int main() { // REPORT-NEXT: _Z4funcIfEiT_ 5 2 60.00% 7 3 57.14% 2 1 50.00% // REPORT-NEXT: --- // REPORT-NEXT: TOTAL 22 7 68.18% 31 11 64.52% 12 6 50.00% + +// Make sure the covered branch tally for the function instantiation group is +// merged to reflect maximum branch coverage of a single instantiation, just +// like what is done for lines and regions. Also, the total branch tally +// summary for an instantiation group should agree with the total number of +// branches in the definition (In this case, 2 and 6 for func<>() and main(), +// respectively). This is returned by: FunctionCoverageSummary::get(const +// InstantiationGroup &Group, ...) + +// REPORTFILE: Filename Regions Missed Regions Cover Functions Missed Functions Executed Lines Missed Lines Cover Branches Missed Branches Cover +// REPORTFILE-NEXT: --- +// REPORTFILE-NEXT: branch-templates.cpp 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% +// REPORTFILE-NEXT: --- +// REPORTFILE-NEXT: TOTAL 12 3 75.00% 2 0 100.00% 17 4 76.47% 8 4 50.00% diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp b/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp index 4a0a86168908..10e059adeb7d 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp @@ -100,11 +100,7 @@ FunctionCoverageSummary::get(const InstantiationGroup &Group, for (const auto &FCS : Summaries.drop_front()) { Summary.RegionCoverage.merge(FCS.RegionCoverage); Summary.LineCoverage.merge(FCS.LineCoverage); - - // Sum branch coverage across instantiation groups for the summary rather - // than "merge" the maximum count. This is a clearer view into whether all - // created branches are covered. - Summary.BranchCoverage += FCS.BranchCoverage; + Summary.BranchCoverage.merge(FCS.BranchCoverage); } return Summary; } diff --git a/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/llvm/tools/llvm-cov/CoverageSummaryInfo.h index 4bc1c24a079f..62e7cad1012b 100644 --- a/llvm/tools/llvm-cov/CoverageSummaryInfo.h +++ b/llvm/tools/llvm-cov/CoverageSummaryInfo.h @@ -123,6 +123,11 @@ class BranchCoverageInfo { return *this; } + void merge(const BranchCoverageInfo &RHS) { + Covered = std::max(Covered, RHS.Covered); + NumBranches = std::max(NumBranches, RHS.NumBranches); + } + size_t getCovered() const { return Covered; } size_t getNumBranches() const { return NumBranches; }