diff --git a/.circleci/config.yml b/.circleci/config.yml index e55a64a1b4d..539e53df7bb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -105,7 +105,7 @@ jobs: environment: - PARALLELISM: 4 - CI_OS: linux - - LLVM_MAJOR: 14 + - LLVM_MAJOR: 15 - HOST_LDC_VERSION: 1.24.0 - EXTRA_CMAKE_FLAGS: "-DMULTILIB=ON -DRT_SUPPORT_SANITIZERS=ON -DBUILD_LTO_LIBS=ON" Ubuntu-20.04-sharedLibsOnly-gdmd: @@ -116,7 +116,7 @@ jobs: environment: - PARALLELISM: 4 - CI_OS: linux - - LLVM_MAJOR: 14 + - LLVM_MAJOR: 15 - EXTRA_APT_PACKAGES: gdmd - EXTRA_CMAKE_FLAGS: "-DBUILD_SHARED_LIBS=ON -DBUILD_LTO_LIBS=ON -DD_COMPILER=gdmd -DLDC_LINK_MANUALLY=ON" diff --git a/.github/workflows/supported_llvm_versions.yml b/.github/workflows/supported_llvm_versions.yml index 1cd0b8ed86d..5467403eb38 100644 --- a/.github/workflows/supported_llvm_versions.yml +++ b/.github/workflows/supported_llvm_versions.yml @@ -36,11 +36,6 @@ jobs: host_dc: dmd-beta llvm_version: 15.0.6 cmake_flags: -DBUILD_SHARED_LIBS=ON -DRT_SUPPORT_SANITIZERS=ON -DLIB_SUFFIX=64 -DLDC_LINK_MANUALLY=ON - - job_name: macOS 11, LLVM 14, latest DMD beta - os: macos-11 - host_dc: dmd-beta - llvm_version: 14.0.6 - cmake_flags: -DBUILD_SHARED_LIBS=ON -DRT_SUPPORT_SANITIZERS=ON -DLDC_LINK_MANUALLY=ON -DCMAKE_CXX_COMPILER=/usr/bin/c++ -DCMAKE_C_COMPILER=/usr/bin/cc name: ${{ matrix.job_name }} runs-on: ${{ matrix.os }} env: diff --git a/CHANGELOG.md b/CHANGELOG.md index a3bd7af37d1..fc8d6ece7af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ #### Big news #### Platform support -- Supports LLVM 14 - 18. Support for LLVM 11, 12 and 13 was dropped. +- Supports LLVM 15 - 18. Support for LLVM 11 - 14 was dropped. The CLI options `-passmanager` and `-opaque-pointers` were removed. #### Bug fixes diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d34aad4df2..449de4b6fc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ endfunction() # Locate LLVM. # -find_package(LLVM 14.0 REQUIRED +find_package(LLVM 15.0 REQUIRED all-targets analysis asmparser asmprinter bitreader bitwriter codegen core debuginfodwarf debuginfomsf debuginfopdb demangle instcombine ipo instrumentation irreader libdriver linker lto mc @@ -823,13 +823,8 @@ if (LDC_INSTALL_LLVM_RUNTIME_LIBS) copy_compilerrt_lib("darwin/libclang_rt.xray-fdr_osx.a" "libldc_rt.xray-fdr.a" FALSE) copy_compilerrt_lib("darwin/libclang_rt.xray-profiling_osx.a" "libldc_rt.xray-profiling.a" FALSE) elseif(UNIX) - if(LDC_LLVM_VER LESS 1500) - set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT "linux") - set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT "x86_64") - else() - set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT "x86_64-unknown-linux-gnu") - set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT "") - endif() + set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT "x86_64-unknown-linux-gnu") + set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT "") set(LDC_INSTALL_LLVM_RUNTIME_LIBS_OS "${LDC_INSTALL_LLVM_RUNTIME_LIBS_OS_DEFAULT}" CACHE STRING "Non-Mac Posix: OS used as directory name for the compiler-rt source libraries, e.g., 'freebsd'.") set(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH "${LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH_DEFAULT}" CACHE STRING "Non-Mac Posix: architecture used as libname suffix for the compiler-rt source libraries, e.g., 'aarch64'.") if(LDC_INSTALL_LLVM_RUNTIME_LIBS_ARCH STREQUAL "") diff --git a/cmake/Modules/FindLLVM.cmake b/cmake/Modules/FindLLVM.cmake index 061617ba13b..9dfc883a31c 100644 --- a/cmake/Modules/FindLLVM.cmake +++ b/cmake/Modules/FindLLVM.cmake @@ -36,7 +36,6 @@ set(llvm_config_names llvm-config-18.1 llvm-config181 llvm-config-18 llvm-config-17.0 llvm-config170 llvm-config-17 llvm-config-16.0 llvm-config160 llvm-config-16 llvm-config-15.0 llvm-config150 llvm-config-15 - llvm-config-14.0 llvm-config140 llvm-config-14 llvm-config) find_program(LLVM_CONFIG NAMES ${llvm_config_names} @@ -49,12 +48,10 @@ if(APPLE) NAMES ${llvm_config_names} PATHS /opt/local/libexec/llvm-18/bin /opt/local/libexec/llvm-17/bin /opt/local/libexec/llvm-16/bin /opt/local/libexec/llvm-15/bin - /opt/local/libexec/llvm-14/bin /opt/local/libexec/llvm-13/bin - /opt/local/libexec/llvm-12/bin /opt/local/libexec/llvm-11/bin /opt/local/libexec/llvm/bin /usr/local/opt/llvm@18/bin /usr/local/opt/llvm@17/bin /usr/local/opt/llvm@16/bin /usr/local/opt/llvm@15/bin - /usr/local/opt/llvm@14/bin /usr/local/opt/llvm/bin + /usr/local/opt/llvm/bin NO_DEFAULT_PATH) endif() diff --git a/driver/main.cpp b/driver/main.cpp index 5e01d314a03..3f77e912d7f 100644 --- a/driver/main.cpp +++ b/driver/main.cpp @@ -561,10 +561,8 @@ void parseCommandLine(Strings &sourceFiles) { // enforce opaque IR pointers #if LDC_LLVM_VER >= 1700 // supports opaque IR pointers only -#elif LDC_LLVM_VER >= 1500 +#else getGlobalContext().setOpaquePointers(true); -#else // LLVM 14 - getGlobalContext().enableOpaquePointers(); #endif } diff --git a/gen/abi/x86.cpp b/gen/abi/x86.cpp index b5251a1ee2a..5965a7f38d2 100644 --- a/gen/abi/x86.cpp +++ b/gen/abi/x86.cpp @@ -264,13 +264,7 @@ struct X86TargetABI : TargetABI { // Keep alignment for LLVM 13+, to prevent invalid `movaps` etc., // but limit to 4 (required according to runnable/ldc_cabi1.d). auto align4 = llvm::Align(4); - if (arg->attrs.getAlignment(). -#if LDC_LLVM_VER >= 1500 - value_or -#else - getValueOr -#endif - (align4) > align4) + if (arg->attrs.getAlignment().value_or(align4) > align4) arg->attrs.addAlignmentAttr(align4); } } diff --git a/gen/functions.cpp b/gen/functions.cpp index 5b75c7ee38e..68a0cd44be9 100644 --- a/gen/functions.cpp +++ b/gen/functions.cpp @@ -1139,11 +1139,7 @@ void DtoDefineFunction(FuncDeclaration *fd, bool linkageAvailableExternally) { // function attributes if (gABI->needsUnwindTables()) { -#if LDC_LLVM_VER >= 1500 func->setUWTableKind(llvm::UWTableKind::Default); -#else - func->addFnAttr(LLAttribute::UWTable); -#endif } if (opts::isAnySanitizerEnabled() && !opts::functionIsInSanitizerBlacklist(fd)) { diff --git a/gen/modules.cpp b/gen/modules.cpp index cab2c97bdd1..9e747f21eae 100644 --- a/gen/modules.cpp +++ b/gen/modules.cpp @@ -299,11 +299,7 @@ void addCoverageAnalysis(Module *m) { ctor->setCallingConv(gABI->callingConv(LINK::d)); // Set function attributes. See functions.cpp:DtoDefineFunction() if (global.params.targetTriple->getArch() == llvm::Triple::x86_64) { -#if LDC_LLVM_VER >= 1500 ctor->setUWTableKind(llvm::UWTableKind::Default); -#else - ctor->addFnAttr(LLAttribute::UWTable); -#endif } llvm::BasicBlock *bb = llvm::BasicBlock::Create(gIR->context(), "", ctor); @@ -404,11 +400,7 @@ void registerModuleInfo(Module *m) { } void addModuleFlags(llvm::Module &m) { -#if LDC_LLVM_VER >= 1500 const auto ModuleMinFlag = llvm::Module::Min; -#else - const auto ModuleMinFlag = llvm::Module::Warning; // Fallback value -#endif if (opts::fCFProtection == opts::CFProtectionType::Return || opts::fCFProtection == opts::CFProtectionType::Full) { diff --git a/gen/naked.cpp b/gen/naked.cpp index ff179192da2..e5ea338262f 100644 --- a/gen/naked.cpp +++ b/gen/naked.cpp @@ -507,20 +507,11 @@ llvm::CallInst *DtoInlineAsmExpr(const Loc &loc, llvm::StringRef code, llvm::FunctionType *FT = llvm::FunctionType::get(returnType, operandTypes, false); -#if LDC_LLVM_VER < 1500 - // make sure the constraints are valid - if (!llvm::InlineAsm::Verify(FT, constraints)) { - error(loc, "inline asm constraints are invalid"); - fatal(); - } -#else if (auto err = llvm::InlineAsm::verify(FT, constraints)) { error(loc, "inline asm constraints are invalid"); llvm::errs() << err; fatal(); } -#endif - // build asm call bool sideeffect = true; diff --git a/gen/runtime.cpp b/gen/runtime.cpp index 3f13351fd4a..48c2865fe0e 100644 --- a/gen/runtime.cpp +++ b/gen/runtime.cpp @@ -273,11 +273,7 @@ struct LazyFunctionDeclarer { // FIXME: Move to better place (abi-x86-64.cpp?) // NOTE: There are several occurances if this line. if (global.params.targetTriple->getArch() == llvm::Triple::x86_64) { -#if LDC_LLVM_VER >= 1500 fn->setUWTableKind(llvm::UWTableKind::Default); -#else - fn->addFnAttr(LLAttribute::UWTable); -#endif } fn->setCallingConv(gABI->callingConv(dty, false)); diff --git a/runtime/druntime/src/ldc/intrinsics.di b/runtime/druntime/src/ldc/intrinsics.di index 99c3d3fc820..485ffc8c473 100644 --- a/runtime/druntime/src/ldc/intrinsics.di +++ b/runtime/druntime/src/ldc/intrinsics.di @@ -19,8 +19,7 @@ else static assert(false, "This module is only valid for LDC"); } - version (LDC_LLVM_1400) enum LLVM_version = 1400; -else version (LDC_LLVM_1500) enum LLVM_version = 1500; + version (LDC_LLVM_1500) enum LLVM_version = 1500; else version (LDC_LLVM_1600) enum LLVM_version = 1600; else version (LDC_LLVM_1700) enum LLVM_version = 1700; else version (LDC_LLVM_1800) enum LLVM_version = 1800; diff --git a/tests/PGO/sample_based.d b/tests/PGO/sample_based.d index 599a8c4df78..700a93fbd83 100644 --- a/tests/PGO/sample_based.d +++ b/tests/PGO/sample_based.d @@ -1,7 +1,5 @@ // Test basic use of sample-based PGO profile -// REQUIRES: atleast_llvm1500 - // RUN: split-file %s %t // RUN: %ldc -O2 -c -gline-tables-only -output-ll -of=%t.ll -fprofile-sample-use=%t/pgo-sample.prof %t/testcase.d && FileCheck %s < %t.ll diff --git a/tests/codegen/gh3692.d b/tests/codegen/gh3692.d index 8ba5b19369f..f75a5cfc1e5 100644 --- a/tests/codegen/gh3692.d +++ b/tests/codegen/gh3692.d @@ -1,48 +1,44 @@ // https://github.com/ldc-developers/ldc/issues/3692 // REQUIRES: target_X86 -// REQUIRES: atmost_llvm1409 // RUN: %ldc -mtriple=x86_64-linux-gnu -output-ll -of=%t.ll %s // RUN: FileCheck %s < %t.ll // D `int[3]` rewritten to LL `{ i64, i32 }` for SysV ABI - mismatching size and alignment -// CHECK: define void @_D6gh36924takeFG3iZv({ i64, i32 } %a_arg) +// CHECK-LABEL: define void @_D6gh36924takeFG3iZv({ i64, i32 } %a_arg) void take(int[3] a) { // the `{ i64, i32 }` size is 16 bytes, so we need a padded alloca (with 8-bytes alignment) - // CHECK-NEXT: %.BaseBitcastABIRewrite_param_storage = alloca { i64, i32 }, align 8 - // CHECK-NEXT: store { i64, i32 } %a_arg, { i64, i32 }* %.BaseBitcastABIRewrite_param_storage - // CHECK-NEXT: %a = bitcast { i64, i32 }* %.BaseBitcastABIRewrite_param_storage to [3 x i32]* + // CHECK-NEXT: = alloca { i64, i32 }, align 8 } -// CHECK: define void @_D6gh36924passFZv() +// CHECK-LABEL: define void @_D6gh36924passFZv() void pass() { // CHECK-NEXT: %arrayliteral = alloca [3 x i32], align 4 // we need an extra padded alloca with proper alignment // CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i32 }, align 8 - // CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, { i64, i32 }* %.BaseBitcastABIRewrite_padded_arg_storage + // CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, {{\{ i64, i32 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage take([1, 2, 3]); } // D `int[4]` rewritten to LL `{ i64, i64 }` for SysV ABI - mismatching alignment only -// CHECK: define void @_D6gh36925take4FG4iZv({ i64, i64 } %a_arg) +// CHECK-LABEL: define void @_D6gh36925take4FG4iZv({ i64, i64 } %a_arg) void take4(int[4] a) { // the alloca should have 8-bytes alignment, even though a.alignof == 4 // CHECK-NEXT: %a = alloca [4 x i32], align 8 - // CHECK-NEXT: %1 = bitcast [4 x i32]* %a to { i64, i64 }* - // CHECK-NEXT: store { i64, i64 } %a_arg, { i64, i64 }* %1 + // CHECK: store { i64, i64 } %a_arg, {{\{ i64, i64 \}\*|ptr}} % } -// CHECK: define void @_D6gh36925pass4FZv() +// CHECK-LABEL: define void @_D6gh36925pass4FZv() void pass4() { // CHECK-NEXT: %arrayliteral = alloca [4 x i32], align 4 // we need an extra alloca with 8-bytes alignment // CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i64 }, align 8 - // CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, { i64, i64 }* %.BaseBitcastABIRewrite_padded_arg_storage + // CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, {{\{ i64, i64 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage take4([1, 2, 3, 4]); } diff --git a/tests/codegen/gh3692_llvm15.d b/tests/codegen/gh3692_llvm15.d deleted file mode 100644 index 718635f8af2..00000000000 --- a/tests/codegen/gh3692_llvm15.d +++ /dev/null @@ -1,45 +0,0 @@ -// https://github.com/ldc-developers/ldc/issues/3692 - -// REQUIRES: target_X86 -// REQUIRES: atleast_llvm1500 -// RUN: %ldc -mtriple=x86_64-linux-gnu -output-ll -of=%t.ll %s -// RUN: FileCheck %s < %t.ll - - -// D `int[3]` rewritten to LL `{ i64, i32 }` for SysV ABI - mismatching size and alignment -// CHECK-LABEL: define void @_D13gh3692_llvm154takeFG3iZv({ i64, i32 } %a_arg) -void take(int[3] a) -{ - // the `{ i64, i32 }` size is 16 bytes, so we need a padded alloca (with 8-bytes alignment) - // CHECK-NEXT: = alloca { i64, i32 }, align 8 -} - -// CHECK-LABEL: define void @_D13gh3692_llvm154passFZv() -void pass() -{ - // CHECK-NEXT: %arrayliteral = alloca [3 x i32], align 4 - // we need an extra padded alloca with proper alignment - // CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i32 }, align 8 - // CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i32 }, {{\{ i64, i32 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage - take([1, 2, 3]); -} - - -// D `int[4]` rewritten to LL `{ i64, i64 }` for SysV ABI - mismatching alignment only -// CHECK-LABEL: define void @_D13gh3692_llvm155take4FG4iZv({ i64, i64 } %a_arg) -void take4(int[4] a) -{ - // the alloca should have 8-bytes alignment, even though a.alignof == 4 - // CHECK-NEXT: %a = alloca [4 x i32], align 8 - // CHECK: store { i64, i64 } %a_arg, {{\{ i64, i64 \}\*|ptr}} % -} - -// CHECK-LABEL: define void @_D13gh3692_llvm155pass4FZv() -void pass4() -{ - // CHECK-NEXT: %arrayliteral = alloca [4 x i32], align 4 - // we need an extra alloca with 8-bytes alignment - // CHECK-NEXT: %.BaseBitcastABIRewrite_padded_arg_storage = alloca { i64, i64 }, align 8 - // CHECK: %.BaseBitcastABIRewrite_arg = load { i64, i64 }, {{\{ i64, i64 \}\*|ptr}} %.BaseBitcastABIRewrite_padded_arg_storage - take4([1, 2, 3, 4]); -} diff --git a/tools/ldc-profdata/llvm-profdata-14.0.cpp b/tools/ldc-profdata/llvm-profdata-14.0.cpp deleted file mode 100644 index 6000460d3c2..00000000000 --- a/tools/ldc-profdata/llvm-profdata-14.0.cpp +++ /dev/null @@ -1,2669 +0,0 @@ -//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// llvm-profdata merges .profdata files. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/Object/Binary.h" -#include "llvm/ProfileData/InstrProfCorrelator.h" -#include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/ProfileData/InstrProfWriter.h" -#include "llvm/ProfileData/ProfileCommon.h" -#include "llvm/ProfileData/RawMemProfReader.h" -#include "llvm/ProfileData/SampleProfReader.h" -#include "llvm/ProfileData/SampleProfWriter.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Discriminator.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/ThreadPool.h" -#include "llvm/Support/Threading.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; - -enum ProfileFormat { - PF_None = 0, - PF_Text, - PF_Compact_Binary, - PF_Ext_Binary, - PF_GCC, - PF_Binary -}; - -static void warn(Twine Message, std::string Whence = "", - std::string Hint = "") { - WithColor::warning(); - if (!Whence.empty()) - errs() << Whence << ": "; - errs() << Message << "\n"; - if (!Hint.empty()) - WithColor::note() << Hint << "\n"; -} - -static void warn(Error E, StringRef Whence = "") { - if (E.isA()) { - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { - warn(IPE.message(), std::string(Whence), std::string("")); - }); - } -} - -static void exitWithError(Twine Message, std::string Whence = "", - std::string Hint = "") { - WithColor::error(); - if (!Whence.empty()) - errs() << Whence << ": "; - errs() << Message << "\n"; - if (!Hint.empty()) - WithColor::note() << Hint << "\n"; - ::exit(1); -} - -static void exitWithError(Error E, StringRef Whence = "") { - if (E.isA()) { - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { - instrprof_error instrError = IPE.get(); - StringRef Hint = ""; - if (instrError == instrprof_error::unrecognized_format) { - // Hint in case user missed specifying the profile type. - Hint = "Perhaps you forgot to use the --sample or --memory option?"; - } - exitWithError(IPE.message(), std::string(Whence), std::string(Hint)); - }); - } - - exitWithError(toString(std::move(E)), std::string(Whence)); -} - -static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { - exitWithError(EC.message(), std::string(Whence)); -} - -namespace { -enum ProfileKinds { instr, sample, memory }; -enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; -} - -static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC, - StringRef Whence = "") { - if (FailMode == failIfAnyAreInvalid) - exitWithErrorCode(EC, Whence); - else - warn(EC.message(), std::string(Whence)); -} - -static void handleMergeWriterError(Error E, StringRef WhenceFile = "", - StringRef WhenceFunction = "", - bool ShowHint = true) { - if (!WhenceFile.empty()) - errs() << WhenceFile << ": "; - if (!WhenceFunction.empty()) - errs() << WhenceFunction << ": "; - - auto IPE = instrprof_error::success; - E = handleErrors(std::move(E), - [&IPE](std::unique_ptr E) -> Error { - IPE = E->get(); - return Error(std::move(E)); - }); - errs() << toString(std::move(E)) << "\n"; - - if (ShowHint) { - StringRef Hint = ""; - if (IPE != instrprof_error::success) { - switch (IPE) { - case instrprof_error::hash_mismatch: - case instrprof_error::count_mismatch: - case instrprof_error::value_site_count_mismatch: - Hint = "Make sure that all profile data to be merged is generated " - "from the same binary."; - break; - default: - break; - } - } - - if (!Hint.empty()) - errs() << Hint << "\n"; - } -} - -namespace { -/// A remapper from original symbol names to new symbol names based on a file -/// containing a list of mappings from old name to new name. -class SymbolRemapper { - std::unique_ptr File; - DenseMap RemappingTable; - -public: - /// Build a SymbolRemapper from a file containing a list of old/new symbols. - static std::unique_ptr create(StringRef InputFile) { - auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); - if (!BufOrError) - exitWithErrorCode(BufOrError.getError(), InputFile); - - auto Remapper = std::make_unique(); - Remapper->File = std::move(BufOrError.get()); - - for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#'); - !LineIt.is_at_eof(); ++LineIt) { - std::pair Parts = LineIt->split(' '); - if (Parts.first.empty() || Parts.second.empty() || - Parts.second.count(' ')) { - exitWithError("unexpected line in remapping file", - (InputFile + ":" + Twine(LineIt.line_number())).str(), - "expected 'old_symbol new_symbol'"); - } - Remapper->RemappingTable.insert(Parts); - } - return Remapper; - } - - /// Attempt to map the given old symbol into a new symbol. - /// - /// \return The new symbol, or \p Name if no such symbol was found. - StringRef operator()(StringRef Name) { - StringRef New = RemappingTable.lookup(Name); - return New.empty() ? Name : New; - } -}; -} - -struct WeightedFile { - std::string Filename; - uint64_t Weight; -}; -typedef SmallVector WeightedFileVector; - -/// Keep track of merged data and reported errors. -struct WriterContext { - std::mutex Lock; - InstrProfWriter Writer; - std::vector> Errors; - std::mutex &ErrLock; - SmallSet &WriterErrorCodes; - - WriterContext(bool IsSparse, std::mutex &ErrLock, - SmallSet &WriterErrorCodes) - : Writer(IsSparse), ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) { - } -}; - -/// Computer the overlap b/w profile BaseFilename and TestFileName, -/// and store the program level result to Overlap. -static void overlapInput(const std::string &BaseFilename, - const std::string &TestFilename, WriterContext *WC, - OverlapStats &Overlap, - const OverlapFuncFilters &FuncFilter, - raw_fd_ostream &OS, bool IsCS) { - auto ReaderOrErr = InstrProfReader::create(TestFilename); - if (Error E = ReaderOrErr.takeError()) { - // Skip the empty profiles by returning sliently. - instrprof_error IPE = InstrProfError::take(std::move(E)); - if (IPE != instrprof_error::empty_raw_profile) - WC->Errors.emplace_back(make_error(IPE), TestFilename); - return; - } - - auto Reader = std::move(ReaderOrErr.get()); - for (auto &I : *Reader) { - OverlapStats FuncOverlap(OverlapStats::FunctionLevel); - FuncOverlap.setFuncInfo(I.Name, I.Hash); - - WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter); - FuncOverlap.dump(OS); - } -} - -/// Load an input into a writer context. -static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, - const InstrProfCorrelator *Correlator, - WriterContext *WC) { - std::unique_lock CtxGuard{WC->Lock}; - - // Copy the filename, because llvm::ThreadPool copied the input "const - // WeightedFile &" by value, making a reference to the filename within it - // invalid outside of this packaged task. - std::string Filename = Input.Filename; - - auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator); - if (Error E = ReaderOrErr.takeError()) { - // Skip the empty profiles by returning sliently. - instrprof_error IPE = InstrProfError::take(std::move(E)); - if (IPE != instrprof_error::empty_raw_profile) - WC->Errors.emplace_back(make_error(IPE), Filename); - return; - } - - auto Reader = std::move(ReaderOrErr.get()); - if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) { - consumeError(std::move(E)); - WC->Errors.emplace_back( - make_error( - "Merge IR generated profile with Clang generated profile.", - std::error_code()), - Filename); - return; - } - - for (auto &I : *Reader) { - if (Remapper) - I.Name = (*Remapper)(I.Name); - const StringRef FuncName = I.Name; - bool Reported = false; - WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) { - if (Reported) { - consumeError(std::move(E)); - return; - } - Reported = true; - // Only show hint the first time an error occurs. - instrprof_error IPE = InstrProfError::take(std::move(E)); - std::unique_lock ErrGuard{WC->ErrLock}; - bool firstTime = WC->WriterErrorCodes.insert(IPE).second; - handleMergeWriterError(make_error(IPE), Input.Filename, - FuncName, firstTime); - }); - } - if (Reader->hasError()) - if (Error E = Reader->getError()) - WC->Errors.emplace_back(std::move(E), Filename); -} - -/// Merge the \p Src writer context into \p Dst. -static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) { - for (auto &ErrorPair : Src->Errors) - Dst->Errors.push_back(std::move(ErrorPair)); - Src->Errors.clear(); - - Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) { - instrprof_error IPE = InstrProfError::take(std::move(E)); - std::unique_lock ErrGuard{Dst->ErrLock}; - bool firstTime = Dst->WriterErrorCodes.insert(IPE).second; - if (firstTime) - warn(toString(make_error(IPE))); - }); -} - -static void writeInstrProfile(StringRef OutputFilename, - ProfileFormat OutputFormat, - InstrProfWriter &Writer) { - std::error_code EC; - raw_fd_ostream Output(OutputFilename.data(), EC, - OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF - : sys::fs::OF_None); - if (EC) - exitWithErrorCode(EC, OutputFilename); - - if (OutputFormat == PF_Text) { - if (Error E = Writer.writeText(Output)) - warn(std::move(E)); - } else { - if (Output.is_displayed()) - exitWithError("cannot write a non-text format profile to the terminal"); - if (Error E = Writer.write(Output)) - warn(std::move(E)); - } -} - -static void mergeInstrProfile(const WeightedFileVector &Inputs, - StringRef DebugInfoFilename, - SymbolRemapper *Remapper, - StringRef OutputFilename, - ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads, FailureMode FailMode) { - if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && - OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) - exitWithError("unknown format is specified"); - - std::unique_ptr Correlator; - if (!DebugInfoFilename.empty()) { - if (auto Err = - InstrProfCorrelator::get(DebugInfoFilename).moveInto(Correlator)) - exitWithError(std::move(Err), DebugInfoFilename); - if (auto Err = Correlator->correlateProfileData()) - exitWithError(std::move(Err), DebugInfoFilename); - } - - std::mutex ErrorLock; - SmallSet WriterErrorCodes; - - // If NumThreads is not specified, auto-detect a good default. - if (NumThreads == 0) - NumThreads = std::min(hardware_concurrency().compute_thread_count(), - unsigned((Inputs.size() + 1) / 2)); - // FIXME: There's a bug here, where setting NumThreads = Inputs.size() fails - // the merge_empty_profile.test because the InstrProfWriter.ProfileKind isn't - // merged, thus the emitted file ends up with a PF_Unknown kind. - - // Initialize the writer contexts. - SmallVector, 4> Contexts; - for (unsigned I = 0; I < NumThreads; ++I) - Contexts.emplace_back(std::make_unique( - OutputSparse, ErrorLock, WriterErrorCodes)); - - if (NumThreads == 1) { - for (const auto &Input : Inputs) - loadInput(Input, Remapper, Correlator.get(), Contexts[0].get()); - } else { - ThreadPool Pool(hardware_concurrency(NumThreads)); - - // Load the inputs in parallel (N/NumThreads serial steps). - unsigned Ctx = 0; - for (const auto &Input : Inputs) { - Pool.async(loadInput, Input, Remapper, Correlator.get(), - Contexts[Ctx].get()); - Ctx = (Ctx + 1) % NumThreads; - } - Pool.wait(); - - // Merge the writer contexts together (~ lg(NumThreads) serial steps). - unsigned Mid = Contexts.size() / 2; - unsigned End = Contexts.size(); - assert(Mid > 0 && "Expected more than one context"); - do { - for (unsigned I = 0; I < Mid; ++I) - Pool.async(mergeWriterContexts, Contexts[I].get(), - Contexts[I + Mid].get()); - Pool.wait(); - if (End & 1) { - Pool.async(mergeWriterContexts, Contexts[0].get(), - Contexts[End - 1].get()); - Pool.wait(); - } - End = Mid; - Mid /= 2; - } while (Mid > 0); - } - - // Handle deferred errors encountered during merging. If the number of errors - // is equal to the number of inputs the merge failed. - unsigned NumErrors = 0; - for (std::unique_ptr &WC : Contexts) { - for (auto &ErrorPair : WC->Errors) { - ++NumErrors; - warn(toString(std::move(ErrorPair.first)), ErrorPair.second); - } - } - if (NumErrors == Inputs.size() || - (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) - exitWithError("no profile can be merged"); - - writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer); -} - -/// The profile entry for a function in instrumentation profile. -struct InstrProfileEntry { - uint64_t MaxCount = 0; - float ZeroCounterRatio = 0.0; - InstrProfRecord *ProfRecord; - InstrProfileEntry(InstrProfRecord *Record); - InstrProfileEntry() = default; -}; - -InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) { - ProfRecord = Record; - uint64_t CntNum = Record->Counts.size(); - uint64_t ZeroCntNum = 0; - for (size_t I = 0; I < CntNum; ++I) { - MaxCount = std::max(MaxCount, Record->Counts[I]); - ZeroCntNum += !Record->Counts[I]; - } - ZeroCounterRatio = (float)ZeroCntNum / CntNum; -} - -/// Either set all the counters in the instr profile entry \p IFE to -1 -/// in order to drop the profile or scale up the counters in \p IFP to -/// be above hot threshold. We use the ratio of zero counters in the -/// profile of a function to decide the profile is helpful or harmful -/// for performance, and to choose whether to scale up or drop it. -static void updateInstrProfileEntry(InstrProfileEntry &IFE, - uint64_t HotInstrThreshold, - float ZeroCounterThreshold) { - InstrProfRecord *ProfRecord = IFE.ProfRecord; - if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) { - // If all or most of the counters of the function are zero, the - // profile is unaccountable and shuld be dropped. Reset all the - // counters to be -1 and PGO profile-use will drop the profile. - // All counters being -1 also implies that the function is hot so - // PGO profile-use will also set the entry count metadata to be - // above hot threshold. - for (size_t I = 0; I < ProfRecord->Counts.size(); ++I) - ProfRecord->Counts[I] = -1; - return; - } - - // Scale up the MaxCount to be multiple times above hot threshold. - const unsigned MultiplyFactor = 3; - uint64_t Numerator = HotInstrThreshold * MultiplyFactor; - uint64_t Denominator = IFE.MaxCount; - ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) { - warn(toString(make_error(E))); - }); -} - -const uint64_t ColdPercentileIdx = 15; -const uint64_t HotPercentileIdx = 11; - -using sampleprof::FSDiscriminatorPass; - -// Internal options to set FSDiscriminatorPass. Used in merge and show -// commands. -static cl::opt FSDiscriminatorPassOption( - "fs-discriminator-pass", cl::init(PassLast), cl::Hidden, - cl::desc("Zero out the discriminator bits for the FS discrimiantor " - "pass beyond this value. The enum values are defined in " - "Support/Discriminator.h"), - cl::values(clEnumVal(Base, "Use base discriminators only"), - clEnumVal(Pass1, "Use base and pass 1 discriminators"), - clEnumVal(Pass2, "Use base and pass 1-2 discriminators"), - clEnumVal(Pass3, "Use base and pass 1-3 discriminators"), - clEnumVal(PassLast, "Use all discriminator bits (default)"))); - -static unsigned getDiscriminatorMask() { - return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue())); -} - -/// Adjust the instr profile in \p WC based on the sample profile in -/// \p Reader. -static void -adjustInstrProfile(std::unique_ptr &WC, - std::unique_ptr &Reader, - unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, - unsigned InstrProfColdThreshold) { - // Function to its entry in instr profile. - StringMap InstrProfileMap; - InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs); - for (auto &PD : WC->Writer.getProfileData()) { - // Populate IPBuilder. - for (const auto &PDV : PD.getValue()) { - InstrProfRecord Record = PDV.second; - IPBuilder.addRecord(Record); - } - - // If a function has multiple entries in instr profile, skip it. - if (PD.getValue().size() != 1) - continue; - - // Initialize InstrProfileMap. - InstrProfRecord *R = &PD.getValue().begin()->second; - InstrProfileMap[PD.getKey()] = InstrProfileEntry(R); - } - - ProfileSummary InstrPS = *IPBuilder.getSummary(); - ProfileSummary SamplePS = Reader->getSummary(); - - // Compute cold thresholds for instr profile and sample profile. - uint64_t ColdSampleThreshold = - ProfileSummaryBuilder::getEntryForPercentile( - SamplePS.getDetailedSummary(), - ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) - .MinCount; - uint64_t HotInstrThreshold = - ProfileSummaryBuilder::getEntryForPercentile( - InstrPS.getDetailedSummary(), - ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) - .MinCount; - uint64_t ColdInstrThreshold = - InstrProfColdThreshold - ? InstrProfColdThreshold - : ProfileSummaryBuilder::getEntryForPercentile( - InstrPS.getDetailedSummary(), - ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) - .MinCount; - - // Find hot/warm functions in sample profile which is cold in instr profile - // and adjust the profiles of those functions in the instr profile. - for (const auto &PD : Reader->getProfiles()) { - auto &FContext = PD.first; - const sampleprof::FunctionSamples &FS = PD.second; - auto It = InstrProfileMap.find(FContext.toString()); - if (FS.getHeadSamples() > ColdSampleThreshold && - It != InstrProfileMap.end() && - It->second.MaxCount <= ColdInstrThreshold && - FS.getBodySamples().size() >= SupplMinSizeThreshold) { - updateInstrProfileEntry(It->second, HotInstrThreshold, - ZeroCounterThreshold); - } - } -} - -/// The main function to supplement instr profile with sample profile. -/// \Inputs contains the instr profile. \p SampleFilename specifies the -/// sample profile. \p OutputFilename specifies the output profile name. -/// \p OutputFormat specifies the output profile format. \p OutputSparse -/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold -/// specifies the minimal size for the functions whose profile will be -/// adjusted. \p ZeroCounterThreshold is the threshold to check whether -/// a function contains too many zero counters and whether its profile -/// should be dropped. \p InstrProfColdThreshold is the user specified -/// cold threshold which will override the cold threshold got from the -/// instr profile summary. -static void supplementInstrProfile( - const WeightedFileVector &Inputs, StringRef SampleFilename, - StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, - unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, - unsigned InstrProfColdThreshold) { - if (OutputFilename.compare("-") == 0) - exitWithError("cannot write indexed profdata format to stdout"); - if (Inputs.size() != 1) - exitWithError("expect one input to be an instr profile"); - if (Inputs[0].Weight != 1) - exitWithError("expect instr profile doesn't have weight"); - - StringRef InstrFilename = Inputs[0].Filename; - - // Read sample profile. - LLVMContext Context; - auto ReaderOrErr = sampleprof::SampleProfileReader::create( - SampleFilename.str(), Context, FSDiscriminatorPassOption); - if (std::error_code EC = ReaderOrErr.getError()) - exitWithErrorCode(EC, SampleFilename); - auto Reader = std::move(ReaderOrErr.get()); - if (std::error_code EC = Reader->read()) - exitWithErrorCode(EC, SampleFilename); - - // Read instr profile. - std::mutex ErrorLock; - SmallSet WriterErrorCodes; - auto WC = std::make_unique(OutputSparse, ErrorLock, - WriterErrorCodes); - loadInput(Inputs[0], nullptr, nullptr, WC.get()); - if (WC->Errors.size() > 0) - exitWithError(std::move(WC->Errors[0].first), InstrFilename); - - adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold, - InstrProfColdThreshold); - writeInstrProfile(OutputFilename, OutputFormat, WC->Writer); -} - -/// Make a copy of the given function samples with all symbol names remapped -/// by the provided symbol remapper. -static sampleprof::FunctionSamples -remapSamples(const sampleprof::FunctionSamples &Samples, - SymbolRemapper &Remapper, sampleprof_error &Error) { - sampleprof::FunctionSamples Result; - Result.setName(Remapper(Samples.getName())); - Result.addTotalSamples(Samples.getTotalSamples()); - Result.addHeadSamples(Samples.getHeadSamples()); - for (const auto &BodySample : Samples.getBodySamples()) { - uint32_t MaskedDiscriminator = - BodySample.first.Discriminator & getDiscriminatorMask(); - Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator, - BodySample.second.getSamples()); - for (const auto &Target : BodySample.second.getCallTargets()) { - Result.addCalledTargetSamples(BodySample.first.LineOffset, - MaskedDiscriminator, - Remapper(Target.first()), Target.second); - } - } - for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { - sampleprof::FunctionSamplesMap &Target = - Result.functionSamplesAt(CallsiteSamples.first); - for (const auto &Callsite : CallsiteSamples.second) { - sampleprof::FunctionSamples Remapped = - remapSamples(Callsite.second, Remapper, Error); - MergeResult(Error, - Target[std::string(Remapped.getName())].merge(Remapped)); - } - } - return Result; -} - -static sampleprof::SampleProfileFormat FormatMap[] = { - sampleprof::SPF_None, - sampleprof::SPF_Text, - sampleprof::SPF_Compact_Binary, - sampleprof::SPF_Ext_Binary, - sampleprof::SPF_GCC, - sampleprof::SPF_Binary}; - -static std::unique_ptr -getInputFileBuf(const StringRef &InputFile) { - if (InputFile == "") - return {}; - - auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); - if (!BufOrError) - exitWithErrorCode(BufOrError.getError(), InputFile); - - return std::move(*BufOrError); -} - -static void populateProfileSymbolList(MemoryBuffer *Buffer, - sampleprof::ProfileSymbolList &PSL) { - if (!Buffer) - return; - - SmallVector SymbolVec; - StringRef Data = Buffer->getBuffer(); - Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); - - for (StringRef SymbolStr : SymbolVec) - PSL.add(SymbolStr.trim()); -} - -static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer, - ProfileFormat OutputFormat, - MemoryBuffer *Buffer, - sampleprof::ProfileSymbolList &WriterList, - bool CompressAllSections, bool UseMD5, - bool GenPartialProfile) { - populateProfileSymbolList(Buffer, WriterList); - if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) - warn("Profile Symbol list is not empty but the output format is not " - "ExtBinary format. The list will be lost in the output. "); - - Writer.setProfileSymbolList(&WriterList); - - if (CompressAllSections) { - if (OutputFormat != PF_Ext_Binary) - warn("-compress-all-section is ignored. Specify -extbinary to enable it"); - else - Writer.setToCompressAllSections(); - } - if (UseMD5) { - if (OutputFormat != PF_Ext_Binary) - warn("-use-md5 is ignored. Specify -extbinary to enable it"); - else - Writer.setUseMD5(); - } - if (GenPartialProfile) { - if (OutputFormat != PF_Ext_Binary) - warn("-gen-partial-profile is ignored. Specify -extbinary to enable it"); - else - Writer.setPartialProfile(); - } -} - -static void -mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, - StringRef OutputFilename, ProfileFormat OutputFormat, - StringRef ProfileSymbolListFile, bool CompressAllSections, - bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, - bool SampleMergeColdContext, bool SampleTrimColdContext, - bool SampleColdContextFrameDepth, FailureMode FailMode) { - using namespace sampleprof; - SampleProfileMap ProfileMap; - SmallVector, 5> Readers; - LLVMContext Context; - sampleprof::ProfileSymbolList WriterList; - Optional ProfileIsProbeBased; - Optional ProfileIsCSFlat; - for (const auto &Input : Inputs) { - auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, - FSDiscriminatorPassOption); - if (std::error_code EC = ReaderOrErr.getError()) { - warnOrExitGivenError(FailMode, EC, Input.Filename); - continue; - } - - // We need to keep the readers around until after all the files are - // read so that we do not lose the function names stored in each - // reader's memory. The function names are needed to write out the - // merged profile map. - Readers.push_back(std::move(ReaderOrErr.get())); - const auto Reader = Readers.back().get(); - if (std::error_code EC = Reader->read()) { - warnOrExitGivenError(FailMode, EC, Input.Filename); - Readers.pop_back(); - continue; - } - - SampleProfileMap &Profiles = Reader->getProfiles(); - if (ProfileIsProbeBased.hasValue() && - ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased) - exitWithError( - "cannot merge probe-based profile with non-probe-based profile"); - ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased; - if (ProfileIsCSFlat.hasValue() && - ProfileIsCSFlat != FunctionSamples::ProfileIsCSFlat) - exitWithError("cannot merge CS profile with non-CS profile"); - ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat; - for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end(); - I != E; ++I) { - sampleprof_error Result = sampleprof_error::success; - FunctionSamples Remapped = - Remapper ? remapSamples(I->second, *Remapper, Result) - : FunctionSamples(); - FunctionSamples &Samples = Remapper ? Remapped : I->second; - SampleContext FContext = Samples.getContext(); - MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight)); - if (Result != sampleprof_error::success) { - std::error_code EC = make_error_code(Result); - handleMergeWriterError(errorCodeToError(EC), Input.Filename, - FContext.toString()); - } - } - - std::unique_ptr ReaderList = - Reader->getProfileSymbolList(); - if (ReaderList) - WriterList.merge(*ReaderList); - } - - if (ProfileIsCSFlat && (SampleMergeColdContext || SampleTrimColdContext)) { - // Use threshold calculated from profile summary unless specified. - SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - auto Summary = Builder.computeSummaryForProfiles(ProfileMap); - uint64_t SampleProfColdThreshold = - ProfileSummaryBuilder::getColdCountThreshold( - (Summary->getDetailedSummary())); - - // Trim and merge cold context profile using cold threshold above; - SampleContextTrimmer(ProfileMap) - .trimAndMergeColdContextProfiles( - SampleProfColdThreshold, SampleTrimColdContext, - SampleMergeColdContext, SampleColdContextFrameDepth, false); - } - - if (ProfileIsCSFlat && GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); - ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat = false; - } - - auto WriterOrErr = - SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); - if (std::error_code EC = WriterOrErr.getError()) - exitWithErrorCode(EC, OutputFilename); - - auto Writer = std::move(WriterOrErr.get()); - // WriterList will have StringRef refering to string in Buffer. - // Make sure Buffer lives as long as WriterList. - auto Buffer = getInputFileBuf(ProfileSymbolListFile); - handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, - CompressAllSections, UseMD5, GenPartialProfile); - if (std::error_code EC = Writer->write(ProfileMap)) - exitWithErrorCode(std::move(EC)); -} - -static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { - StringRef WeightStr, FileName; - std::tie(WeightStr, FileName) = WeightedFilename.split(','); - - uint64_t Weight; - if (WeightStr.getAsInteger(10, Weight) || Weight < 1) - exitWithError("input weight must be a positive integer"); - - return {std::string(FileName), Weight}; -} - -static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) { - StringRef Filename = WF.Filename; - uint64_t Weight = WF.Weight; - - // If it's STDIN just pass it on. - if (Filename == "-") { - WNI.push_back({std::string(Filename), Weight}); - return; - } - - llvm::sys::fs::file_status Status; - llvm::sys::fs::status(Filename, Status); - if (!llvm::sys::fs::exists(Status)) - exitWithErrorCode(make_error_code(errc::no_such_file_or_directory), - Filename); - // If it's a source file, collect it. - if (llvm::sys::fs::is_regular_file(Status)) { - WNI.push_back({std::string(Filename), Weight}); - return; - } - - if (llvm::sys::fs::is_directory(Status)) { - std::error_code EC; - for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E; - F != E && !EC; F.increment(EC)) { - if (llvm::sys::fs::is_regular_file(F->path())) { - addWeightedInput(WNI, {F->path(), Weight}); - } - } - if (EC) - exitWithErrorCode(EC, Filename); - } -} - -static void parseInputFilenamesFile(MemoryBuffer *Buffer, - WeightedFileVector &WFV) { - if (!Buffer) - return; - - SmallVector Entries; - StringRef Data = Buffer->getBuffer(); - Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); - for (const StringRef &FileWeightEntry : Entries) { - StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r"); - // Skip comments. - if (SanitizedEntry.startswith("#")) - continue; - // If there's no comma, it's an unweighted profile. - else if (!SanitizedEntry.contains(',')) - addWeightedInput(WFV, {std::string(SanitizedEntry), 1}); - else - addWeightedInput(WFV, parseWeightedFile(SanitizedEntry)); - } -} - -static int merge_main(int argc, const char *argv[]) { - cl::list InputFilenames(cl::Positional, - cl::desc("")); - cl::list WeightedInputFilenames("weighted-input", - cl::desc(",")); - cl::opt InputFilenamesFile( - "input-files", cl::init(""), - cl::desc("Path to file containing newline-separated " - "[,] entries")); - cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"), - cl::aliasopt(InputFilenamesFile)); - cl::opt DumpInputFileList( - "dump-input-file-list", cl::init(false), cl::Hidden, - cl::desc("Dump the list of input files and their weights, then exit")); - cl::opt RemappingFile("remapping-file", cl::value_desc("file"), - cl::desc("Symbol remapping file")); - cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"), - cl::aliasopt(RemappingFile)); - cl::opt OutputFilename("output", cl::value_desc("output"), - cl::init("-"), cl::desc("Output file")); - cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), - cl::aliasopt(OutputFilename)); - cl::opt ProfileKind( - cl::desc("Profile kind:"), cl::init(instr), - cl::values(clEnumVal(instr, "Instrumentation profile (default)"), - clEnumVal(sample, "Sample profile"))); - cl::opt OutputFormat( - cl::desc("Format of output profile"), cl::init(PF_Binary), - cl::values( - clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), - clEnumValN(PF_Compact_Binary, "compbinary", - "Compact binary encoding"), - clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"), - clEnumValN(PF_Text, "text", "Text encoding"), - clEnumValN(PF_GCC, "gcc", - "GCC encoding (only meaningful for -sample)"))); - cl::opt FailureMode( - "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"), - cl::values(clEnumValN(failIfAnyAreInvalid, "any", - "Fail if any profile is invalid."), - clEnumValN(failIfAllAreInvalid, "all", - "Fail only if all profiles are invalid."))); - cl::opt OutputSparse("sparse", cl::init(false), - cl::desc("Generate a sparse profile (only meaningful for -instr)")); - cl::opt NumThreads( - "num-threads", cl::init(0), - cl::desc("Number of merge threads to use (default: autodetect)")); - cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), - cl::aliasopt(NumThreads)); - cl::opt ProfileSymbolListFile( - "prof-sym-list", cl::init(""), - cl::desc("Path to file containing the list of function symbols " - "used to populate profile symbol list")); - cl::opt CompressAllSections( - "compress-all-sections", cl::init(false), cl::Hidden, - cl::desc("Compress all sections when writing the profile (only " - "meaningful for -extbinary)")); - cl::opt UseMD5( - "use-md5", cl::init(false), cl::Hidden, - cl::desc("Choose to use MD5 to represent string in name table (only " - "meaningful for -extbinary)")); - cl::opt SampleMergeColdContext( - "sample-merge-cold-context", cl::init(false), cl::Hidden, - cl::desc( - "Merge context sample profiles whose count is below cold threshold")); - cl::opt SampleTrimColdContext( - "sample-trim-cold-context", cl::init(false), cl::Hidden, - cl::desc( - "Trim context sample profiles whose count is below cold threshold")); - cl::opt SampleColdContextFrameDepth( - "sample-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore, - cl::desc("Keep the last K frames while merging cold profile. 1 means the " - "context-less base profile")); - cl::opt GenPartialProfile( - "gen-partial-profile", cl::init(false), cl::Hidden, - cl::desc("Generate a partial profile (only meaningful for -extbinary)")); - cl::opt SupplInstrWithSample( - "supplement-instr-with-sample", cl::init(""), cl::Hidden, - cl::desc("Supplement an instr profile with sample profile, to correct " - "the profile unrepresentativeness issue. The sample " - "profile is the input of the flag. Output will be in instr " - "format (The flag only works with -instr)")); - cl::opt ZeroCounterThreshold( - "zero-counter-threshold", cl::init(0.7), cl::Hidden, - cl::desc("For the function which is cold in instr profile but hot in " - "sample profile, if the ratio of the number of zero counters " - "divided by the the total number of counters is above the " - "threshold, the profile of the function will be regarded as " - "being harmful for performance and will be dropped.")); - cl::opt SupplMinSizeThreshold( - "suppl-min-size-threshold", cl::init(10), cl::Hidden, - cl::desc("If the size of a function is smaller than the threshold, " - "assume it can be inlined by PGO early inliner and it won't " - "be adjusted based on sample profile.")); - cl::opt InstrProfColdThreshold( - "instr-prof-cold-threshold", cl::init(0), cl::Hidden, - cl::desc("User specified cold threshold for instr profile which will " - "override the cold threshold got from profile summary. ")); - cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); - cl::opt DebugInfoFilename( - "debug-info", cl::init(""), - cl::desc("Use the provided debug info to correlate the raw profile.")); - - cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); - - WeightedFileVector WeightedInputs; - for (StringRef Filename : InputFilenames) - addWeightedInput(WeightedInputs, {std::string(Filename), 1}); - for (StringRef WeightedFilename : WeightedInputFilenames) - addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename)); - - // Make sure that the file buffer stays alive for the duration of the - // weighted input vector's lifetime. - auto Buffer = getInputFileBuf(InputFilenamesFile); - parseInputFilenamesFile(Buffer.get(), WeightedInputs); - - if (WeightedInputs.empty()) - exitWithError("no input files specified. See " + - sys::path::filename(argv[0]) + " -help"); - - if (DumpInputFileList) { - for (auto &WF : WeightedInputs) - outs() << WF.Weight << "," << WF.Filename << "\n"; - return 0; - } - - std::unique_ptr Remapper; - if (!RemappingFile.empty()) - Remapper = SymbolRemapper::create(RemappingFile); - - if (!SupplInstrWithSample.empty()) { - if (ProfileKind != instr) - exitWithError( - "-supplement-instr-with-sample can only work with -instr. "); - - supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename, - OutputFormat, OutputSparse, SupplMinSizeThreshold, - ZeroCounterThreshold, InstrProfColdThreshold); - return 0; - } - - if (ProfileKind == instr) - mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(), - OutputFilename, OutputFormat, OutputSparse, NumThreads, - FailureMode); - else - mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, - OutputFormat, ProfileSymbolListFile, CompressAllSections, - UseMD5, GenPartialProfile, GenCSNestedProfile, - SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode); - return 0; -} - -/// Computer the overlap b/w profile BaseFilename and profile TestFilename. -static void overlapInstrProfile(const std::string &BaseFilename, - const std::string &TestFilename, - const OverlapFuncFilters &FuncFilter, - raw_fd_ostream &OS, bool IsCS) { - std::mutex ErrorLock; - SmallSet WriterErrorCodes; - WriterContext Context(false, ErrorLock, WriterErrorCodes); - WeightedFile WeightedInput{BaseFilename, 1}; - OverlapStats Overlap; - Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS); - if (E) - exitWithError(std::move(E), "error in getting profile count sums"); - if (Overlap.Base.CountSum < 1.0f) { - OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n"; - exit(0); - } - if (Overlap.Test.CountSum < 1.0f) { - OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n"; - exit(0); - } - loadInput(WeightedInput, nullptr, nullptr, &Context); - overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS, - IsCS); - Overlap.dump(OS); -} - -namespace { -struct SampleOverlapStats { - SampleContext BaseName; - SampleContext TestName; - // Number of overlap units - uint64_t OverlapCount; - // Total samples of overlap units - uint64_t OverlapSample; - // Number of and total samples of units that only present in base or test - // profile - uint64_t BaseUniqueCount; - uint64_t BaseUniqueSample; - uint64_t TestUniqueCount; - uint64_t TestUniqueSample; - // Number of units and total samples in base or test profile - uint64_t BaseCount; - uint64_t BaseSample; - uint64_t TestCount; - uint64_t TestSample; - // Number of and total samples of units that present in at least one profile - uint64_t UnionCount; - uint64_t UnionSample; - // Weighted similarity - double Similarity; - // For SampleOverlapStats instances representing functions, weights of the - // function in base and test profiles - double BaseWeight; - double TestWeight; - - SampleOverlapStats() - : OverlapCount(0), OverlapSample(0), BaseUniqueCount(0), - BaseUniqueSample(0), TestUniqueCount(0), TestUniqueSample(0), - BaseCount(0), BaseSample(0), TestCount(0), TestSample(0), UnionCount(0), - UnionSample(0), Similarity(0.0), BaseWeight(0.0), TestWeight(0.0) {} -}; -} // end anonymous namespace - -namespace { -struct FuncSampleStats { - uint64_t SampleSum; - uint64_t MaxSample; - uint64_t HotBlockCount; - FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {} - FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample, - uint64_t HotBlockCount) - : SampleSum(SampleSum), MaxSample(MaxSample), - HotBlockCount(HotBlockCount) {} -}; -} // end anonymous namespace - -namespace { -enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None }; - -// Class for updating merging steps for two sorted maps. The class should be -// instantiated with a map iterator type. -template class MatchStep { -public: - MatchStep() = delete; - - MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd) - : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter), - SecondEnd(SecondEnd), Status(MS_None) {} - - bool areBothFinished() const { - return (FirstIter == FirstEnd && SecondIter == SecondEnd); - } - - bool isFirstFinished() const { return FirstIter == FirstEnd; } - - bool isSecondFinished() const { return SecondIter == SecondEnd; } - - /// Advance one step based on the previous match status unless the previous - /// status is MS_None. Then update Status based on the comparison between two - /// container iterators at the current step. If the previous status is - /// MS_None, it means two iterators are at the beginning and no comparison has - /// been made, so we simply update Status without advancing the iterators. - void updateOneStep(); - - T getFirstIter() const { return FirstIter; } - - T getSecondIter() const { return SecondIter; } - - MatchStatus getMatchStatus() const { return Status; } - -private: - // Current iterator and end iterator of the first container. - T FirstIter; - T FirstEnd; - // Current iterator and end iterator of the second container. - T SecondIter; - T SecondEnd; - // Match status of the current step. - MatchStatus Status; -}; -} // end anonymous namespace - -template void MatchStep::updateOneStep() { - switch (Status) { - case MS_Match: - ++FirstIter; - ++SecondIter; - break; - case MS_FirstUnique: - ++FirstIter; - break; - case MS_SecondUnique: - ++SecondIter; - break; - case MS_None: - break; - } - - // Update Status according to iterators at the current step. - if (areBothFinished()) - return; - if (FirstIter != FirstEnd && - (SecondIter == SecondEnd || FirstIter->first < SecondIter->first)) - Status = MS_FirstUnique; - else if (SecondIter != SecondEnd && - (FirstIter == FirstEnd || SecondIter->first < FirstIter->first)) - Status = MS_SecondUnique; - else - Status = MS_Match; -} - -// Return the sum of line/block samples, the max line/block sample, and the -// number of line/block samples above the given threshold in a function -// including its inlinees. -static void getFuncSampleStats(const sampleprof::FunctionSamples &Func, - FuncSampleStats &FuncStats, - uint64_t HotThreshold) { - for (const auto &L : Func.getBodySamples()) { - uint64_t Sample = L.second.getSamples(); - FuncStats.SampleSum += Sample; - FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample); - if (Sample >= HotThreshold) - ++FuncStats.HotBlockCount; - } - - for (const auto &C : Func.getCallsiteSamples()) { - for (const auto &F : C.second) - getFuncSampleStats(F.second, FuncStats, HotThreshold); - } -} - -/// Predicate that determines if a function is hot with a given threshold. We -/// keep it separate from its callsites for possible extension in the future. -static bool isFunctionHot(const FuncSampleStats &FuncStats, - uint64_t HotThreshold) { - // We intentionally compare the maximum sample count in a function with the - // HotThreshold to get an approximate determination on hot functions. - return (FuncStats.MaxSample >= HotThreshold); -} - -namespace { -class SampleOverlapAggregator { -public: - SampleOverlapAggregator(const std::string &BaseFilename, - const std::string &TestFilename, - double LowSimilarityThreshold, double Epsilon, - const OverlapFuncFilters &FuncFilter) - : BaseFilename(BaseFilename), TestFilename(TestFilename), - LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon), - FuncFilter(FuncFilter) {} - - /// Detect 0-sample input profile and report to output stream. This interface - /// should be called after loadProfiles(). - bool detectZeroSampleProfile(raw_fd_ostream &OS) const; - - /// Write out function-level similarity statistics for functions specified by - /// options --function, --value-cutoff, and --similarity-cutoff. - void dumpFuncSimilarity(raw_fd_ostream &OS) const; - - /// Write out program-level similarity and overlap statistics. - void dumpProgramSummary(raw_fd_ostream &OS) const; - - /// Write out hot-function and hot-block statistics for base_profile, - /// test_profile, and their overlap. For both cases, the overlap HO is - /// calculated as follows: - /// Given the number of functions (or blocks) that are hot in both profiles - /// HCommon and the number of functions (or blocks) that are hot in at - /// least one profile HUnion, HO = HCommon / HUnion. - void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const; - - /// This function tries matching functions in base and test profiles. For each - /// pair of matched functions, it aggregates the function-level - /// similarity into a profile-level similarity. It also dump function-level - /// similarity information of functions specified by --function, - /// --value-cutoff, and --similarity-cutoff options. The program-level - /// similarity PS is computed as follows: - /// Given function-level similarity FS(A) for all function A, the - /// weight of function A in base profile WB(A), and the weight of function - /// A in test profile WT(A), compute PS(base_profile, test_profile) = - /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0 - /// meaning no-overlap. - void computeSampleProfileOverlap(raw_fd_ostream &OS); - - /// Initialize ProfOverlap with the sum of samples in base and test - /// profiles. This function also computes and keeps the sum of samples and - /// max sample counts of each function in BaseStats and TestStats for later - /// use to avoid re-computations. - void initializeSampleProfileOverlap(); - - /// Load profiles specified by BaseFilename and TestFilename. - std::error_code loadProfiles(); - - using FuncSampleStatsMap = - std::unordered_map; - -private: - SampleOverlapStats ProfOverlap; - SampleOverlapStats HotFuncOverlap; - SampleOverlapStats HotBlockOverlap; - std::string BaseFilename; - std::string TestFilename; - std::unique_ptr BaseReader; - std::unique_ptr TestReader; - // BaseStats and TestStats hold FuncSampleStats for each function, with - // function name as the key. - FuncSampleStatsMap BaseStats; - FuncSampleStatsMap TestStats; - // Low similarity threshold in floating point number - double LowSimilarityThreshold; - // Block samples above BaseHotThreshold or TestHotThreshold are considered hot - // for tracking hot blocks. - uint64_t BaseHotThreshold; - uint64_t TestHotThreshold; - // A small threshold used to round the results of floating point accumulations - // to resolve imprecision. - const double Epsilon; - std::multimap> - FuncSimilarityDump; - // FuncFilter carries specifications in options --value-cutoff and - // --function. - OverlapFuncFilters FuncFilter; - // Column offsets for printing the function-level details table. - static const unsigned int TestWeightCol = 15; - static const unsigned int SimilarityCol = 30; - static const unsigned int OverlapCol = 43; - static const unsigned int BaseUniqueCol = 53; - static const unsigned int TestUniqueCol = 67; - static const unsigned int BaseSampleCol = 81; - static const unsigned int TestSampleCol = 96; - static const unsigned int FuncNameCol = 111; - - /// Return a similarity of two line/block sample counters in the same - /// function in base and test profiles. The line/block-similarity BS(i) is - /// computed as follows: - /// For an offsets i, given the sample count at i in base profile BB(i), - /// the sample count at i in test profile BT(i), the sum of sample counts - /// in this function in base profile SB, and the sum of sample counts in - /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB - - /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap. - double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample, - const SampleOverlapStats &FuncOverlap) const; - - void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample, - uint64_t HotBlockCount); - - void getHotFunctions(const FuncSampleStatsMap &ProfStats, - FuncSampleStatsMap &HotFunc, - uint64_t HotThreshold) const; - - void computeHotFuncOverlap(); - - /// This function updates statistics in FuncOverlap, HotBlockOverlap, and - /// Difference for two sample units in a matched function according to the - /// given match status. - void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample, - uint64_t HotBlockCount, - SampleOverlapStats &FuncOverlap, - double &Difference, MatchStatus Status); - - /// This function updates statistics in FuncOverlap, HotBlockOverlap, and - /// Difference for unmatched callees that only present in one profile in a - /// matched caller function. - void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func, - SampleOverlapStats &FuncOverlap, - double &Difference, MatchStatus Status); - - /// This function updates sample overlap statistics of an overlap function in - /// base and test profile. It also calculates a function-internal similarity - /// FIS as follows: - /// For offsets i that have samples in at least one profile in this - /// function A, given BS(i) returned by computeBlockSimilarity(), compute - /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with - /// 0.0 meaning no overlap. - double computeSampleFunctionInternalOverlap( - const sampleprof::FunctionSamples &BaseFunc, - const sampleprof::FunctionSamples &TestFunc, - SampleOverlapStats &FuncOverlap); - - /// Function-level similarity (FS) is a weighted value over function internal - /// similarity (FIS). This function computes a function's FS from its FIS by - /// applying the weight. - double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample, - uint64_t TestFuncSample) const; - - /// The function-level similarity FS(A) for a function A is computed as - /// follows: - /// Compute a function-internal similarity FIS(A) by - /// computeSampleFunctionInternalOverlap(). Then, with the weight of - /// function A in base profile WB(A), and the weight of function A in test - /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A))) - /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap. - double - computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc, - const sampleprof::FunctionSamples *TestFunc, - SampleOverlapStats *FuncOverlap, - uint64_t BaseFuncSample, - uint64_t TestFuncSample); - - /// Profile-level similarity (PS) is a weighted aggregate over function-level - /// similarities (FS). This method weights the FS value by the function - /// weights in the base and test profiles for the aggregation. - double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample, - uint64_t TestFuncSample) const; -}; -} // end anonymous namespace - -bool SampleOverlapAggregator::detectZeroSampleProfile( - raw_fd_ostream &OS) const { - bool HaveZeroSample = false; - if (ProfOverlap.BaseSample == 0) { - OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n"; - HaveZeroSample = true; - } - if (ProfOverlap.TestSample == 0) { - OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n"; - HaveZeroSample = true; - } - return HaveZeroSample; -} - -double SampleOverlapAggregator::computeBlockSimilarity( - uint64_t BaseSample, uint64_t TestSample, - const SampleOverlapStats &FuncOverlap) const { - double BaseFrac = 0.0; - double TestFrac = 0.0; - if (FuncOverlap.BaseSample > 0) - BaseFrac = static_cast(BaseSample) / FuncOverlap.BaseSample; - if (FuncOverlap.TestSample > 0) - TestFrac = static_cast(TestSample) / FuncOverlap.TestSample; - return 1.0 - std::fabs(BaseFrac - TestFrac); -} - -void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample, - uint64_t TestSample, - uint64_t HotBlockCount) { - bool IsBaseHot = (BaseSample >= BaseHotThreshold); - bool IsTestHot = (TestSample >= TestHotThreshold); - if (!IsBaseHot && !IsTestHot) - return; - - HotBlockOverlap.UnionCount += HotBlockCount; - if (IsBaseHot) - HotBlockOverlap.BaseCount += HotBlockCount; - if (IsTestHot) - HotBlockOverlap.TestCount += HotBlockCount; - if (IsBaseHot && IsTestHot) - HotBlockOverlap.OverlapCount += HotBlockCount; -} - -void SampleOverlapAggregator::getHotFunctions( - const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc, - uint64_t HotThreshold) const { - for (const auto &F : ProfStats) { - if (isFunctionHot(F.second, HotThreshold)) - HotFunc.emplace(F.first, F.second); - } -} - -void SampleOverlapAggregator::computeHotFuncOverlap() { - FuncSampleStatsMap BaseHotFunc; - getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold); - HotFuncOverlap.BaseCount = BaseHotFunc.size(); - - FuncSampleStatsMap TestHotFunc; - getHotFunctions(TestStats, TestHotFunc, TestHotThreshold); - HotFuncOverlap.TestCount = TestHotFunc.size(); - HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount; - - for (const auto &F : BaseHotFunc) { - if (TestHotFunc.count(F.first)) - ++HotFuncOverlap.OverlapCount; - else - ++HotFuncOverlap.UnionCount; - } -} - -void SampleOverlapAggregator::updateOverlapStatsForFunction( - uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount, - SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) { - assert(Status != MS_None && - "Match status should be updated before updating overlap statistics"); - if (Status == MS_FirstUnique) { - TestSample = 0; - FuncOverlap.BaseUniqueSample += BaseSample; - } else if (Status == MS_SecondUnique) { - BaseSample = 0; - FuncOverlap.TestUniqueSample += TestSample; - } else { - ++FuncOverlap.OverlapCount; - } - - FuncOverlap.UnionSample += std::max(BaseSample, TestSample); - FuncOverlap.OverlapSample += std::min(BaseSample, TestSample); - Difference += - 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap); - updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount); -} - -void SampleOverlapAggregator::updateForUnmatchedCallee( - const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap, - double &Difference, MatchStatus Status) { - assert((Status == MS_FirstUnique || Status == MS_SecondUnique) && - "Status must be either of the two unmatched cases"); - FuncSampleStats FuncStats; - if (Status == MS_FirstUnique) { - getFuncSampleStats(Func, FuncStats, BaseHotThreshold); - updateOverlapStatsForFunction(FuncStats.SampleSum, 0, - FuncStats.HotBlockCount, FuncOverlap, - Difference, Status); - } else { - getFuncSampleStats(Func, FuncStats, TestHotThreshold); - updateOverlapStatsForFunction(0, FuncStats.SampleSum, - FuncStats.HotBlockCount, FuncOverlap, - Difference, Status); - } -} - -double SampleOverlapAggregator::computeSampleFunctionInternalOverlap( - const sampleprof::FunctionSamples &BaseFunc, - const sampleprof::FunctionSamples &TestFunc, - SampleOverlapStats &FuncOverlap) { - - using namespace sampleprof; - - double Difference = 0; - - // Accumulate Difference for regular line/block samples in the function. - // We match them through sort-merge join algorithm because - // FunctionSamples::getBodySamples() returns a map of sample counters ordered - // by their offsets. - MatchStep BlockIterStep( - BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(), - TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend()); - BlockIterStep.updateOneStep(); - while (!BlockIterStep.areBothFinished()) { - uint64_t BaseSample = - BlockIterStep.isFirstFinished() - ? 0 - : BlockIterStep.getFirstIter()->second.getSamples(); - uint64_t TestSample = - BlockIterStep.isSecondFinished() - ? 0 - : BlockIterStep.getSecondIter()->second.getSamples(); - updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap, - Difference, BlockIterStep.getMatchStatus()); - - BlockIterStep.updateOneStep(); - } - - // Accumulate Difference for callsite lines in the function. We match - // them through sort-merge algorithm because - // FunctionSamples::getCallsiteSamples() returns a map of callsite records - // ordered by their offsets. - MatchStep CallsiteIterStep( - BaseFunc.getCallsiteSamples().cbegin(), - BaseFunc.getCallsiteSamples().cend(), - TestFunc.getCallsiteSamples().cbegin(), - TestFunc.getCallsiteSamples().cend()); - CallsiteIterStep.updateOneStep(); - while (!CallsiteIterStep.areBothFinished()) { - MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus(); - assert(CallsiteStepStatus != MS_None && - "Match status should be updated before entering loop body"); - - if (CallsiteStepStatus != MS_Match) { - auto Callsite = (CallsiteStepStatus == MS_FirstUnique) - ? CallsiteIterStep.getFirstIter() - : CallsiteIterStep.getSecondIter(); - for (const auto &F : Callsite->second) - updateForUnmatchedCallee(F.second, FuncOverlap, Difference, - CallsiteStepStatus); - } else { - // There may be multiple inlinees at the same offset, so we need to try - // matching all of them. This match is implemented through sort-merge - // algorithm because callsite records at the same offset are ordered by - // function names. - MatchStep CalleeIterStep( - CallsiteIterStep.getFirstIter()->second.cbegin(), - CallsiteIterStep.getFirstIter()->second.cend(), - CallsiteIterStep.getSecondIter()->second.cbegin(), - CallsiteIterStep.getSecondIter()->second.cend()); - CalleeIterStep.updateOneStep(); - while (!CalleeIterStep.areBothFinished()) { - MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus(); - if (CalleeStepStatus != MS_Match) { - auto Callee = (CalleeStepStatus == MS_FirstUnique) - ? CalleeIterStep.getFirstIter() - : CalleeIterStep.getSecondIter(); - updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference, - CalleeStepStatus); - } else { - // An inlined function can contain other inlinees inside, so compute - // the Difference recursively. - Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap( - CalleeIterStep.getFirstIter()->second, - CalleeIterStep.getSecondIter()->second, - FuncOverlap); - } - CalleeIterStep.updateOneStep(); - } - } - CallsiteIterStep.updateOneStep(); - } - - // Difference reflects the total differences of line/block samples in this - // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to - // reflect the similarity between function profiles in [0.0f to 1.0f]. - return (2.0 - Difference) / 2; -} - -double SampleOverlapAggregator::weightForFuncSimilarity( - double FuncInternalSimilarity, uint64_t BaseFuncSample, - uint64_t TestFuncSample) const { - // Compute the weight as the distance between the function weights in two - // profiles. - double BaseFrac = 0.0; - double TestFrac = 0.0; - assert(ProfOverlap.BaseSample > 0 && - "Total samples in base profile should be greater than 0"); - BaseFrac = static_cast(BaseFuncSample) / ProfOverlap.BaseSample; - assert(ProfOverlap.TestSample > 0 && - "Total samples in test profile should be greater than 0"); - TestFrac = static_cast(TestFuncSample) / ProfOverlap.TestSample; - double WeightDistance = std::fabs(BaseFrac - TestFrac); - - // Take WeightDistance into the similarity. - return FuncInternalSimilarity * (1 - WeightDistance); -} - -double -SampleOverlapAggregator::weightByImportance(double FuncSimilarity, - uint64_t BaseFuncSample, - uint64_t TestFuncSample) const { - - double BaseFrac = 0.0; - double TestFrac = 0.0; - assert(ProfOverlap.BaseSample > 0 && - "Total samples in base profile should be greater than 0"); - BaseFrac = static_cast(BaseFuncSample) / ProfOverlap.BaseSample / 2.0; - assert(ProfOverlap.TestSample > 0 && - "Total samples in test profile should be greater than 0"); - TestFrac = static_cast(TestFuncSample) / ProfOverlap.TestSample / 2.0; - return FuncSimilarity * (BaseFrac + TestFrac); -} - -double SampleOverlapAggregator::computeSampleFunctionOverlap( - const sampleprof::FunctionSamples *BaseFunc, - const sampleprof::FunctionSamples *TestFunc, - SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample, - uint64_t TestFuncSample) { - // Default function internal similarity before weighted, meaning two functions - // has no overlap. - const double DefaultFuncInternalSimilarity = 0; - double FuncSimilarity; - double FuncInternalSimilarity; - - // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap. - // In this case, we use DefaultFuncInternalSimilarity as the function internal - // similarity. - if (!BaseFunc || !TestFunc) { - FuncInternalSimilarity = DefaultFuncInternalSimilarity; - } else { - assert(FuncOverlap != nullptr && - "FuncOverlap should be provided in this case"); - FuncInternalSimilarity = computeSampleFunctionInternalOverlap( - *BaseFunc, *TestFunc, *FuncOverlap); - // Now, FuncInternalSimilarity may be a little less than 0 due to - // imprecision of floating point accumulations. Make it zero if the - // difference is below Epsilon. - FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon) - ? 0 - : FuncInternalSimilarity; - } - FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity, - BaseFuncSample, TestFuncSample); - return FuncSimilarity; -} - -void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) { - using namespace sampleprof; - - std::unordered_map - BaseFuncProf; - const auto &BaseProfiles = BaseReader->getProfiles(); - for (const auto &BaseFunc : BaseProfiles) { - BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second)); - } - ProfOverlap.UnionCount = BaseFuncProf.size(); - - const auto &TestProfiles = TestReader->getProfiles(); - for (const auto &TestFunc : TestProfiles) { - SampleOverlapStats FuncOverlap; - FuncOverlap.TestName = TestFunc.second.getContext(); - assert(TestStats.count(FuncOverlap.TestName) && - "TestStats should have records for all functions in test profile " - "except inlinees"); - FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum; - - bool Matched = false; - const auto Match = BaseFuncProf.find(FuncOverlap.TestName); - if (Match == BaseFuncProf.end()) { - const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName]; - ++ProfOverlap.TestUniqueCount; - ProfOverlap.TestUniqueSample += FuncStats.SampleSum; - FuncOverlap.TestUniqueSample = FuncStats.SampleSum; - - updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount); - - double FuncSimilarity = computeSampleFunctionOverlap( - nullptr, nullptr, nullptr, 0, FuncStats.SampleSum); - ProfOverlap.Similarity += - weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum); - - ++ProfOverlap.UnionCount; - ProfOverlap.UnionSample += FuncStats.SampleSum; - } else { - ++ProfOverlap.OverlapCount; - - // Two functions match with each other. Compute function-level overlap and - // aggregate them into profile-level overlap. - FuncOverlap.BaseName = Match->second->getContext(); - assert(BaseStats.count(FuncOverlap.BaseName) && - "BaseStats should have records for all functions in base profile " - "except inlinees"); - FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum; - - FuncOverlap.Similarity = computeSampleFunctionOverlap( - Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample, - FuncOverlap.TestSample); - ProfOverlap.Similarity += - weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample, - FuncOverlap.TestSample); - ProfOverlap.OverlapSample += FuncOverlap.OverlapSample; - ProfOverlap.UnionSample += FuncOverlap.UnionSample; - - // Accumulate the percentage of base unique and test unique samples into - // ProfOverlap. - ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample; - ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample; - - // Remove matched base functions for later reporting functions not found - // in test profile. - BaseFuncProf.erase(Match); - Matched = true; - } - - // Print function-level similarity information if specified by options. - assert(TestStats.count(FuncOverlap.TestName) && - "TestStats should have records for all functions in test profile " - "except inlinees"); - if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff || - (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) || - (Matched && !FuncFilter.NameFilter.empty() && - FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) != - std::string::npos)) { - assert(ProfOverlap.BaseSample > 0 && - "Total samples in base profile should be greater than 0"); - FuncOverlap.BaseWeight = - static_cast(FuncOverlap.BaseSample) / ProfOverlap.BaseSample; - assert(ProfOverlap.TestSample > 0 && - "Total samples in test profile should be greater than 0"); - FuncOverlap.TestWeight = - static_cast(FuncOverlap.TestSample) / ProfOverlap.TestSample; - FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap); - } - } - - // Traverse through functions in base profile but not in test profile. - for (const auto &F : BaseFuncProf) { - assert(BaseStats.count(F.second->getContext()) && - "BaseStats should have records for all functions in base profile " - "except inlinees"); - const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()]; - ++ProfOverlap.BaseUniqueCount; - ProfOverlap.BaseUniqueSample += FuncStats.SampleSum; - - updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount); - - double FuncSimilarity = computeSampleFunctionOverlap( - nullptr, nullptr, nullptr, FuncStats.SampleSum, 0); - ProfOverlap.Similarity += - weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0); - - ProfOverlap.UnionSample += FuncStats.SampleSum; - } - - // Now, ProfSimilarity may be a little greater than 1 due to imprecision - // of floating point accumulations. Make it 1.0 if the difference is below - // Epsilon. - ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon) - ? 1 - : ProfOverlap.Similarity; - - computeHotFuncOverlap(); -} - -void SampleOverlapAggregator::initializeSampleProfileOverlap() { - const auto &BaseProf = BaseReader->getProfiles(); - for (const auto &I : BaseProf) { - ++ProfOverlap.BaseCount; - FuncSampleStats FuncStats; - getFuncSampleStats(I.second, FuncStats, BaseHotThreshold); - ProfOverlap.BaseSample += FuncStats.SampleSum; - BaseStats.emplace(I.second.getContext(), FuncStats); - } - - const auto &TestProf = TestReader->getProfiles(); - for (const auto &I : TestProf) { - ++ProfOverlap.TestCount; - FuncSampleStats FuncStats; - getFuncSampleStats(I.second, FuncStats, TestHotThreshold); - ProfOverlap.TestSample += FuncStats.SampleSum; - TestStats.emplace(I.second.getContext(), FuncStats); - } - - ProfOverlap.BaseName = StringRef(BaseFilename); - ProfOverlap.TestName = StringRef(TestFilename); -} - -void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const { - using namespace sampleprof; - - if (FuncSimilarityDump.empty()) - return; - - formatted_raw_ostream FOS(OS); - FOS << "Function-level details:\n"; - FOS << "Base weight"; - FOS.PadToColumn(TestWeightCol); - FOS << "Test weight"; - FOS.PadToColumn(SimilarityCol); - FOS << "Similarity"; - FOS.PadToColumn(OverlapCol); - FOS << "Overlap"; - FOS.PadToColumn(BaseUniqueCol); - FOS << "Base unique"; - FOS.PadToColumn(TestUniqueCol); - FOS << "Test unique"; - FOS.PadToColumn(BaseSampleCol); - FOS << "Base samples"; - FOS.PadToColumn(TestSampleCol); - FOS << "Test samples"; - FOS.PadToColumn(FuncNameCol); - FOS << "Function name\n"; - for (const auto &F : FuncSimilarityDump) { - double OverlapPercent = - F.second.UnionSample > 0 - ? static_cast(F.second.OverlapSample) / F.second.UnionSample - : 0; - double BaseUniquePercent = - F.second.BaseSample > 0 - ? static_cast(F.second.BaseUniqueSample) / - F.second.BaseSample - : 0; - double TestUniquePercent = - F.second.TestSample > 0 - ? static_cast(F.second.TestUniqueSample) / - F.second.TestSample - : 0; - - FOS << format("%.2f%%", F.second.BaseWeight * 100); - FOS.PadToColumn(TestWeightCol); - FOS << format("%.2f%%", F.second.TestWeight * 100); - FOS.PadToColumn(SimilarityCol); - FOS << format("%.2f%%", F.second.Similarity * 100); - FOS.PadToColumn(OverlapCol); - FOS << format("%.2f%%", OverlapPercent * 100); - FOS.PadToColumn(BaseUniqueCol); - FOS << format("%.2f%%", BaseUniquePercent * 100); - FOS.PadToColumn(TestUniqueCol); - FOS << format("%.2f%%", TestUniquePercent * 100); - FOS.PadToColumn(BaseSampleCol); - FOS << F.second.BaseSample; - FOS.PadToColumn(TestSampleCol); - FOS << F.second.TestSample; - FOS.PadToColumn(FuncNameCol); - FOS << F.second.TestName.toString() << "\n"; - } -} - -void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const { - OS << "Profile overlap infomation for base_profile: " - << ProfOverlap.BaseName.toString() - << " and test_profile: " << ProfOverlap.TestName.toString() - << "\nProgram level:\n"; - - OS << " Whole program profile similarity: " - << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n"; - - assert(ProfOverlap.UnionSample > 0 && - "Total samples in two profile should be greater than 0"); - double OverlapPercent = - static_cast(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample; - assert(ProfOverlap.BaseSample > 0 && - "Total samples in base profile should be greater than 0"); - double BaseUniquePercent = static_cast(ProfOverlap.BaseUniqueSample) / - ProfOverlap.BaseSample; - assert(ProfOverlap.TestSample > 0 && - "Total samples in test profile should be greater than 0"); - double TestUniquePercent = static_cast(ProfOverlap.TestUniqueSample) / - ProfOverlap.TestSample; - - OS << " Whole program sample overlap: " - << format("%.3f%%", OverlapPercent * 100) << "\n"; - OS << " percentage of samples unique in base profile: " - << format("%.3f%%", BaseUniquePercent * 100) << "\n"; - OS << " percentage of samples unique in test profile: " - << format("%.3f%%", TestUniquePercent * 100) << "\n"; - OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n" - << " total samples in test profile: " << ProfOverlap.TestSample << "\n"; - - assert(ProfOverlap.UnionCount > 0 && - "There should be at least one function in two input profiles"); - double FuncOverlapPercent = - static_cast(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount; - OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100) - << "\n"; - OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n"; - OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount - << "\n"; - OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount - << "\n"; -} - -void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap( - raw_fd_ostream &OS) const { - assert(HotFuncOverlap.UnionCount > 0 && - "There should be at least one hot function in two input profiles"); - OS << " Hot-function overlap: " - << format("%.3f%%", static_cast(HotFuncOverlap.OverlapCount) / - HotFuncOverlap.UnionCount * 100) - << "\n"; - OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n"; - OS << " hot functions unique in base profile: " - << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n"; - OS << " hot functions unique in test profile: " - << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n"; - - assert(HotBlockOverlap.UnionCount > 0 && - "There should be at least one hot block in two input profiles"); - OS << " Hot-block overlap: " - << format("%.3f%%", static_cast(HotBlockOverlap.OverlapCount) / - HotBlockOverlap.UnionCount * 100) - << "\n"; - OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n"; - OS << " hot blocks unique in base profile: " - << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n"; - OS << " hot blocks unique in test profile: " - << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n"; -} - -std::error_code SampleOverlapAggregator::loadProfiles() { - using namespace sampleprof; - - LLVMContext Context; - auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, - FSDiscriminatorPassOption); - if (std::error_code EC = BaseReaderOrErr.getError()) - exitWithErrorCode(EC, BaseFilename); - - auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, - FSDiscriminatorPassOption); - if (std::error_code EC = TestReaderOrErr.getError()) - exitWithErrorCode(EC, TestFilename); - - BaseReader = std::move(BaseReaderOrErr.get()); - TestReader = std::move(TestReaderOrErr.get()); - - if (std::error_code EC = BaseReader->read()) - exitWithErrorCode(EC, BaseFilename); - if (std::error_code EC = TestReader->read()) - exitWithErrorCode(EC, TestFilename); - if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased()) - exitWithError( - "cannot compare probe-based profile with non-probe-based profile"); - if (BaseReader->profileIsCSFlat() != TestReader->profileIsCSFlat()) - exitWithError("cannot compare CS profile with non-CS profile"); - - // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in - // profile summary. - ProfileSummary &BasePS = BaseReader->getSummary(); - ProfileSummary &TestPS = TestReader->getSummary(); - BaseHotThreshold = - ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary()); - TestHotThreshold = - ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary()); - - return std::error_code(); -} - -void overlapSampleProfile(const std::string &BaseFilename, - const std::string &TestFilename, - const OverlapFuncFilters &FuncFilter, - uint64_t SimilarityCutoff, raw_fd_ostream &OS) { - using namespace sampleprof; - - // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics - // report 2--3 places after decimal point in percentage numbers. - SampleOverlapAggregator OverlapAggr( - BaseFilename, TestFilename, - static_cast(SimilarityCutoff) / 1000000, 0.000005, FuncFilter); - if (std::error_code EC = OverlapAggr.loadProfiles()) - exitWithErrorCode(EC); - - OverlapAggr.initializeSampleProfileOverlap(); - if (OverlapAggr.detectZeroSampleProfile(OS)) - return; - - OverlapAggr.computeSampleProfileOverlap(OS); - - OverlapAggr.dumpProgramSummary(OS); - OverlapAggr.dumpHotFuncAndBlockOverlap(OS); - OverlapAggr.dumpFuncSimilarity(OS); -} - -static int overlap_main(int argc, const char *argv[]) { - cl::opt BaseFilename(cl::Positional, cl::Required, - cl::desc("")); - cl::opt TestFilename(cl::Positional, cl::Required, - cl::desc("")); - cl::opt Output("output", cl::value_desc("output"), cl::init("-"), - cl::desc("Output file")); - cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output)); - cl::opt IsCS( - "cs", cl::init(false), - cl::desc("For context sensitive PGO counts. Does not work with CSSPGO.")); - cl::opt ValueCutoff( - "value-cutoff", cl::init(-1), - cl::desc( - "Function level overlap information for every function (with calling " - "context for csspgo) in test " - "profile with max count value greater then the parameter value")); - cl::opt FuncNameFilter( - "function", - cl::desc("Function level overlap information for matching functions. For " - "CSSPGO this takes a a function name with calling context")); - cl::opt SimilarityCutoff( - "similarity-cutoff", cl::init(0), - cl::desc("For sample profiles, list function names (with calling context " - "for csspgo) for overlapped functions " - "with similarities below the cutoff (percentage times 10000).")); - cl::opt ProfileKind( - cl::desc("Profile kind:"), cl::init(instr), - cl::values(clEnumVal(instr, "Instrumentation profile (default)"), - clEnumVal(sample, "Sample profile"))); - cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n"); - - std::error_code EC; - raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_TextWithCRLF); - if (EC) - exitWithErrorCode(EC, Output); - - if (ProfileKind == instr) - overlapInstrProfile(BaseFilename, TestFilename, - OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS, - IsCS); - else - overlapSampleProfile(BaseFilename, TestFilename, - OverlapFuncFilters{ValueCutoff, FuncNameFilter}, - SimilarityCutoff, OS); - - return 0; -} - -namespace { -struct ValueSitesStats { - ValueSitesStats() - : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0), - TotalNumValues(0) {} - uint64_t TotalNumValueSites; - uint64_t TotalNumValueSitesWithValueProfile; - uint64_t TotalNumValues; - std::vector ValueSitesHistogram; -}; -} // namespace - -static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK, - ValueSitesStats &Stats, raw_fd_ostream &OS, - InstrProfSymtab *Symtab) { - uint32_t NS = Func.getNumValueSites(VK); - Stats.TotalNumValueSites += NS; - for (size_t I = 0; I < NS; ++I) { - uint32_t NV = Func.getNumValueDataForSite(VK, I); - std::unique_ptr VD = Func.getValueForSite(VK, I); - Stats.TotalNumValues += NV; - if (NV) { - Stats.TotalNumValueSitesWithValueProfile++; - if (NV > Stats.ValueSitesHistogram.size()) - Stats.ValueSitesHistogram.resize(NV, 0); - Stats.ValueSitesHistogram[NV - 1]++; - } - - uint64_t SiteSum = 0; - for (uint32_t V = 0; V < NV; V++) - SiteSum += VD[V].Count; - if (SiteSum == 0) - SiteSum = 1; - - for (uint32_t V = 0; V < NV; V++) { - OS << "\t[ " << format("%2u", I) << ", "; - if (Symtab == nullptr) - OS << format("%4" PRIu64, VD[V].Value); - else - OS << Symtab->getFuncName(VD[V].Value); - OS << ", " << format("%10" PRId64, VD[V].Count) << " ] (" - << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n"; - } - } -} - -static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK, - ValueSitesStats &Stats) { - OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n"; - OS << " Total number of sites with values: " - << Stats.TotalNumValueSitesWithValueProfile << "\n"; - OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n"; - - OS << " Value sites histogram:\n\tNumTargets, SiteCount\n"; - for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) { - if (Stats.ValueSitesHistogram[I] > 0) - OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n"; - } -} - -static int showInstrProfile(const std::string &Filename, bool ShowCounts, - uint32_t TopN, bool ShowIndirectCallTargets, - bool ShowMemOPSizes, bool ShowDetailedSummary, - std::vector DetailedSummaryCutoffs, - bool ShowAllFunctions, bool ShowCS, - uint64_t ValueCutoff, bool OnlyListBelow, - const std::string &ShowFunction, bool TextFormat, - bool ShowBinaryIds, bool ShowCovered, - raw_fd_ostream &OS) { - auto ReaderOrErr = InstrProfReader::create(Filename); - std::vector Cutoffs = std::move(DetailedSummaryCutoffs); - if (ShowDetailedSummary && Cutoffs.empty()) { - Cutoffs = {800000, 900000, 950000, 990000, 999000, 999900, 999990}; - } - InstrProfSummaryBuilder Builder(std::move(Cutoffs)); - if (Error E = ReaderOrErr.takeError()) - exitWithError(std::move(E), Filename); - - auto Reader = std::move(ReaderOrErr.get()); - bool IsIRInstr = Reader->isIRLevelProfile(); - size_t ShownFunctions = 0; - size_t BelowCutoffFunctions = 0; - int NumVPKind = IPVK_Last - IPVK_First + 1; - std::vector VPStats(NumVPKind); - - auto MinCmp = [](const std::pair &v1, - const std::pair &v2) { - return v1.second > v2.second; - }; - - std::priority_queue, - std::vector>, - decltype(MinCmp)> - HottestFuncs(MinCmp); - - if (!TextFormat && OnlyListBelow) { - OS << "The list of functions with the maximum counter less than " - << ValueCutoff << ":\n"; - } - - // Add marker so that IR-level instrumentation round-trips properly. - if (TextFormat && IsIRInstr) - OS << ":ir\n"; - - for (const auto &Func : *Reader) { - if (Reader->isIRLevelProfile()) { - bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); - if (FuncIsCS != ShowCS) - continue; - } - bool Show = ShowAllFunctions || - (!ShowFunction.empty() && Func.Name.contains(ShowFunction)); - - bool doTextFormatDump = (Show && TextFormat); - - if (doTextFormatDump) { - InstrProfSymtab &Symtab = Reader->getSymtab(); - InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab, - OS); - continue; - } - - assert(Func.Counts.size() > 0 && "function missing entry counter"); - Builder.addRecord(Func); - - if (ShowCovered) { - if (std::any_of(Func.Counts.begin(), Func.Counts.end(), - [](uint64_t C) { return C; })) - OS << Func.Name << "\n"; - continue; - } - - uint64_t FuncMax = 0; - uint64_t FuncSum = 0; - for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) { - if (Func.Counts[I] == (uint64_t)-1) - continue; - FuncMax = std::max(FuncMax, Func.Counts[I]); - FuncSum += Func.Counts[I]; - } - - if (FuncMax < ValueCutoff) { - ++BelowCutoffFunctions; - if (OnlyListBelow) { - OS << " " << Func.Name << ": (Max = " << FuncMax - << " Sum = " << FuncSum << ")\n"; - } - continue; - } else if (OnlyListBelow) - continue; - - if (TopN) { - if (HottestFuncs.size() == TopN) { - if (HottestFuncs.top().second < FuncMax) { - HottestFuncs.pop(); - HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax)); - } - } else - HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax)); - } - - if (Show) { - if (!ShownFunctions) - OS << "Counters:\n"; - - ++ShownFunctions; - - OS << " " << Func.Name << ":\n" - << " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n" - << " Counters: " << Func.Counts.size() << "\n"; - if (!IsIRInstr) - OS << " Function count: " << Func.Counts[0] << "\n"; - - if (ShowIndirectCallTargets) - OS << " Indirect Call Site Count: " - << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; - - uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize); - if (ShowMemOPSizes && NumMemOPCalls > 0) - OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls - << "\n"; - - if (ShowCounts) { - OS << " Block counts: ["; - size_t Start = (IsIRInstr ? 0 : 1); - for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) { - OS << (I == Start ? "" : ", ") << Func.Counts[I]; - } - OS << "]\n"; - } - - if (ShowIndirectCallTargets) { - OS << " Indirect Target Results:\n"; - traverseAllValueSites(Func, IPVK_IndirectCallTarget, - VPStats[IPVK_IndirectCallTarget], OS, - &(Reader->getSymtab())); - } - - if (ShowMemOPSizes && NumMemOPCalls > 0) { - OS << " Memory Intrinsic Size Results:\n"; - traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS, - nullptr); - } - } - } - if (Reader->hasError()) - exitWithError(Reader->getError(), Filename); - - if (TextFormat || ShowCovered) - return 0; - std::unique_ptr PS(Builder.getSummary()); - bool IsIR = Reader->isIRLevelProfile(); - OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end"); - if (IsIR) - OS << " entry_first = " << Reader->instrEntryBBEnabled(); - OS << "\n"; - if (ShowAllFunctions || !ShowFunction.empty()) - OS << "Functions shown: " << ShownFunctions << "\n"; - OS << "Total functions: " << PS->getNumFunctions() << "\n"; - if (ValueCutoff > 0) { - OS << "Number of functions with maximum count (< " << ValueCutoff - << "): " << BelowCutoffFunctions << "\n"; - OS << "Number of functions with maximum count (>= " << ValueCutoff - << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n"; - } - OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n"; - OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n"; - - if (TopN) { - std::vector> SortedHottestFuncs; - while (!HottestFuncs.empty()) { - SortedHottestFuncs.emplace_back(HottestFuncs.top()); - HottestFuncs.pop(); - } - OS << "Top " << TopN - << " functions with the largest internal block counts: \n"; - for (auto &hotfunc : llvm::reverse(SortedHottestFuncs)) - OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n"; - } - - if (ShownFunctions && ShowIndirectCallTargets) { - OS << "Statistics for indirect call sites profile:\n"; - showValueSitesStats(OS, IPVK_IndirectCallTarget, - VPStats[IPVK_IndirectCallTarget]); - } - - if (ShownFunctions && ShowMemOPSizes) { - OS << "Statistics for memory intrinsic calls sizes profile:\n"; - showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); - } - - if (ShowDetailedSummary) { - OS << "Total number of blocks: " << PS->getNumCounts() << "\n"; - OS << "Total count: " << PS->getTotalCount() << "\n"; - PS->printDetailedSummary(OS); - } - - if (ShowBinaryIds) - if (Error E = Reader->printBinaryIds(OS)) - exitWithError(std::move(E), Filename); - - return 0; -} - -static void showSectionInfo(sampleprof::SampleProfileReader *Reader, - raw_fd_ostream &OS) { - if (!Reader->dumpSectionInfo(OS)) { - WithColor::warning() << "-show-sec-info-only is only supported for " - << "sample profile in extbinary format and is " - << "ignored for other formats.\n"; - return; - } -} - -namespace { -struct HotFuncInfo { - std::string FuncName; - uint64_t TotalCount; - double TotalCountPercent; - uint64_t MaxCount; - uint64_t EntryCount; - - HotFuncInfo() - : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {} - - HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES) - : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP), - MaxCount(MS), EntryCount(ES) {} -}; -} // namespace - -// Print out detailed information about hot functions in PrintValues vector. -// Users specify titles and offset of every columns through ColumnTitle and -// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same -// and at least 4. Besides, users can optionally give a HotFuncMetric string to -// print out or let it be an empty string. -static void dumpHotFunctionList(const std::vector &ColumnTitle, - const std::vector &ColumnOffset, - const std::vector &PrintValues, - uint64_t HotFuncCount, uint64_t TotalFuncCount, - uint64_t HotProfCount, uint64_t TotalProfCount, - const std::string &HotFuncMetric, - uint32_t TopNFunctions, raw_fd_ostream &OS) { - assert(ColumnOffset.size() == ColumnTitle.size() && - "ColumnOffset and ColumnTitle should have the same size"); - assert(ColumnTitle.size() >= 4 && - "ColumnTitle should have at least 4 elements"); - assert(TotalFuncCount > 0 && - "There should be at least one function in the profile"); - double TotalProfPercent = 0; - if (TotalProfCount > 0) - TotalProfPercent = static_cast(HotProfCount) / TotalProfCount * 100; - - formatted_raw_ostream FOS(OS); - FOS << HotFuncCount << " out of " << TotalFuncCount - << " functions with profile (" - << format("%.2f%%", - (static_cast(HotFuncCount) / TotalFuncCount * 100)) - << ") are considered hot functions"; - if (!HotFuncMetric.empty()) - FOS << " (" << HotFuncMetric << ")"; - FOS << ".\n"; - FOS << HotProfCount << " out of " << TotalProfCount << " profile counts (" - << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n"; - - for (size_t I = 0; I < ColumnTitle.size(); ++I) { - FOS.PadToColumn(ColumnOffset[I]); - FOS << ColumnTitle[I]; - } - FOS << "\n"; - - uint32_t Count = 0; - for (const auto &R : PrintValues) { - if (TopNFunctions && (Count++ == TopNFunctions)) - break; - FOS.PadToColumn(ColumnOffset[0]); - FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")"; - FOS.PadToColumn(ColumnOffset[1]); - FOS << R.MaxCount; - FOS.PadToColumn(ColumnOffset[2]); - FOS << R.EntryCount; - FOS.PadToColumn(ColumnOffset[3]); - FOS << R.FuncName << "\n"; - } -} - -static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles, - ProfileSummary &PS, uint32_t TopN, - raw_fd_ostream &OS) { - using namespace sampleprof; - - const uint32_t HotFuncCutoff = 990000; - auto &SummaryVector = PS.getDetailedSummary(); - uint64_t MinCountThreshold = 0; - for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) { - if (SummaryEntry.Cutoff == HotFuncCutoff) { - MinCountThreshold = SummaryEntry.MinCount; - break; - } - } - - // Traverse all functions in the profile and keep only hot functions. - // The following loop also calculates the sum of total samples of all - // functions. - std::multimap, - std::greater> - HotFunc; - uint64_t ProfileTotalSample = 0; - uint64_t HotFuncSample = 0; - uint64_t HotFuncCount = 0; - - for (const auto &I : Profiles) { - FuncSampleStats FuncStats; - const FunctionSamples &FuncProf = I.second; - ProfileTotalSample += FuncProf.getTotalSamples(); - getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold); - - if (isFunctionHot(FuncStats, MinCountThreshold)) { - HotFunc.emplace(FuncProf.getTotalSamples(), - std::make_pair(&(I.second), FuncStats.MaxSample)); - HotFuncSample += FuncProf.getTotalSamples(); - ++HotFuncCount; - } - } - - std::vector ColumnTitle{"Total sample (%)", "Max sample", - "Entry sample", "Function name"}; - std::vector ColumnOffset{0, 24, 42, 58}; - std::string Metric = - std::string("max sample >= ") + std::to_string(MinCountThreshold); - std::vector PrintValues; - for (const auto &FuncPair : HotFunc) { - const FunctionSamples &Func = *FuncPair.second.first; - double TotalSamplePercent = - (ProfileTotalSample > 0) - ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample - : 0; - PrintValues.emplace_back(HotFuncInfo( - Func.getContext().toString(), Func.getTotalSamples(), - TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples())); - } - dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, - Profiles.size(), HotFuncSample, ProfileTotalSample, - Metric, TopN, OS); - - return 0; -} - -static int showSampleProfile(const std::string &Filename, bool ShowCounts, - uint32_t TopN, bool ShowAllFunctions, - bool ShowDetailedSummary, - const std::string &ShowFunction, - bool ShowProfileSymbolList, - bool ShowSectionInfoOnly, bool ShowHotFuncList, - raw_fd_ostream &OS) { - using namespace sampleprof; - LLVMContext Context; - auto ReaderOrErr = - SampleProfileReader::create(Filename, Context, FSDiscriminatorPassOption); - if (std::error_code EC = ReaderOrErr.getError()) - exitWithErrorCode(EC, Filename); - - auto Reader = std::move(ReaderOrErr.get()); - if (ShowSectionInfoOnly) { - showSectionInfo(Reader.get(), OS); - return 0; - } - - if (std::error_code EC = Reader->read()) - exitWithErrorCode(EC, Filename); - - if (ShowAllFunctions || ShowFunction.empty()) - Reader->dump(OS); - else - // TODO: parse context string to support filtering by contexts. - Reader->dumpFunctionProfile(StringRef(ShowFunction), OS); - - if (ShowProfileSymbolList) { - std::unique_ptr ReaderList = - Reader->getProfileSymbolList(); - ReaderList->dump(OS); - } - - if (ShowDetailedSummary) { - auto &PS = Reader->getSummary(); - PS.printSummary(OS); - PS.printDetailedSummary(OS); - } - - if (ShowHotFuncList || TopN) - showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS); - - return 0; -} - -static int showMemProfProfile(const std::string &Filename, raw_fd_ostream &OS) { - auto ReaderOr = llvm::memprof::RawMemProfReader::create(Filename); - if (Error E = ReaderOr.takeError()) - exitWithError(std::move(E), Filename); - - std::unique_ptr Reader( - ReaderOr.get().release()); - Reader->printSummaries(OS); - return 0; -} - -static int showDebugInfoCorrelation(const std::string &Filename, - bool ShowDetailedSummary, - bool ShowProfileSymbolList, - raw_fd_ostream &OS) { - std::unique_ptr Correlator; - if (auto Err = InstrProfCorrelator::get(Filename).moveInto(Correlator)) - exitWithError(std::move(Err), Filename); - if (auto Err = Correlator->correlateProfileData()) - exitWithError(std::move(Err), Filename); - - InstrProfSymtab Symtab; - if (auto Err = Symtab.create( - StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize()))) - exitWithError(std::move(Err), Filename); - - if (ShowProfileSymbolList) - Symtab.dumpNames(OS); - // TODO: Read "Profile Data Type" from debug info to compute and show how many - // counters the section holds. - if (ShowDetailedSummary) - OS << "Counters section size: 0x" - << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n"; - OS << "Found " << Correlator->getDataSize() << " functions\n"; - - return 0; -} - -static int show_main(int argc, const char *argv[]) { - cl::opt Filename(cl::Positional, cl::desc("")); - - cl::opt ShowCounts("counts", cl::init(false), - cl::desc("Show counter values for shown functions")); - cl::opt TextFormat( - "text", cl::init(false), - cl::desc("Show instr profile data in text dump format")); - cl::opt ShowIndirectCallTargets( - "ic-targets", cl::init(false), - cl::desc("Show indirect call site target values for shown functions")); - cl::opt ShowMemOPSizes( - "memop-sizes", cl::init(false), - cl::desc("Show the profiled sizes of the memory intrinsic calls " - "for shown functions")); - cl::opt ShowDetailedSummary("detailed-summary", cl::init(false), - cl::desc("Show detailed profile summary")); - cl::list DetailedSummaryCutoffs( - cl::CommaSeparated, "detailed-summary-cutoffs", - cl::desc( - "Cutoff percentages (times 10000) for generating detailed summary"), - cl::value_desc("800000,901000,999999")); - cl::opt ShowHotFuncList( - "hot-func-list", cl::init(false), - cl::desc("Show profile summary of a list of hot functions")); - cl::opt ShowAllFunctions("all-functions", cl::init(false), - cl::desc("Details for every function")); - cl::opt ShowCS("showcs", cl::init(false), - cl::desc("Show context sensitive counts")); - cl::opt ShowFunction("function", - cl::desc("Details for matching functions")); - - cl::opt OutputFilename("output", cl::value_desc("output"), - cl::init("-"), cl::desc("Output file")); - cl::alias OutputFilenameA("o", cl::desc("Alias for --output"), - cl::aliasopt(OutputFilename)); - cl::opt ProfileKind( - cl::desc("Profile kind:"), cl::init(instr), - cl::values(clEnumVal(instr, "Instrumentation profile (default)"), - clEnumVal(sample, "Sample profile"), - clEnumVal(memory, "MemProf memory access profile"))); - cl::opt TopNFunctions( - "topn", cl::init(0), - cl::desc("Show the list of functions with the largest internal counts")); - cl::opt ValueCutoff( - "value-cutoff", cl::init(0), - cl::desc("Set the count value cutoff. Functions with the maximum count " - "less than this value will not be printed out. (Default is 0)")); - cl::opt OnlyListBelow( - "list-below-cutoff", cl::init(false), - cl::desc("Only output names of functions whose max count values are " - "below the cutoff value")); - cl::opt ShowProfileSymbolList( - "show-prof-sym-list", cl::init(false), - cl::desc("Show profile symbol list if it exists in the profile. ")); - cl::opt ShowSectionInfoOnly( - "show-sec-info-only", cl::init(false), - cl::desc("Show the information of each section in the sample profile. " - "The flag is only usable when the sample profile is in " - "extbinary format")); - cl::opt ShowBinaryIds("binary-ids", cl::init(false), - cl::desc("Show binary ids in the profile. ")); - cl::opt DebugInfoFilename( - "debug-info", cl::init(""), - cl::desc("Read and extract profile metadata from debug info and show " - "the functions it found.")); - cl::opt ShowCovered( - "covered", cl::init(false), - cl::desc("Show only the functions that have been executed.")); - - cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); - - if (Filename.empty() && DebugInfoFilename.empty()) - exitWithError( - "the positional argument '' is required unless '--" + - DebugInfoFilename.ArgStr + "' is provided"); - - if (Filename == OutputFilename) { - errs() << sys::path::filename(argv[0]) - << ": Input file name cannot be the same as the output file name!\n"; - return 1; - } - - std::error_code EC; - raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF); - if (EC) - exitWithErrorCode(EC, OutputFilename); - - if (ShowAllFunctions && !ShowFunction.empty()) - WithColor::warning() << "-function argument ignored: showing all functions\n"; - - if (!DebugInfoFilename.empty()) - return showDebugInfoCorrelation(DebugInfoFilename, ShowDetailedSummary, - ShowProfileSymbolList, OS); - - if (ProfileKind == instr) - return showInstrProfile( - Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, - ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, - ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, - TextFormat, ShowBinaryIds, ShowCovered, OS); - if (ProfileKind == sample) - return showSampleProfile(Filename, ShowCounts, TopNFunctions, - ShowAllFunctions, ShowDetailedSummary, - ShowFunction, ShowProfileSymbolList, - ShowSectionInfoOnly, ShowHotFuncList, OS); - return showMemProfProfile(Filename, OS); -} - -int main(int argc, const char *argv[]) { - InitLLVM X(argc, argv); - - StringRef ProgName(sys::path::filename(argv[0])); - if (argc > 1) { - int (*func)(int, const char *[]) = nullptr; - - if (strcmp(argv[1], "merge") == 0) - func = merge_main; - else if (strcmp(argv[1], "show") == 0) - func = show_main; - else if (strcmp(argv[1], "overlap") == 0) - func = overlap_main; - - if (func) { - std::string Invocation(ProgName.str() + " " + argv[1]); - argv[1] = Invocation.c_str(); - return func(argc - 1, argv + 1); - } - - if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 || - strcmp(argv[1], "--help") == 0) { - - errs() << "OVERVIEW: LLVM profile data tools\n\n" - << "USAGE: " << ProgName << " [args...]\n" - << "USAGE: " << ProgName << " -help\n\n" - << "See each individual command --help for more details.\n" - << "Available commands: merge, show, overlap\n"; - return 0; - } - } - - if (argc < 2) - errs() << ProgName << ": No command specified!\n"; - else - errs() << ProgName << ": Unknown command!\n"; - - errs() << "USAGE: " << ProgName << " [args...]\n"; - return 1; -} diff --git a/tools/ldc-profgen/ldc-profgen-14.0/CMakeLists.txt b/tools/ldc-profgen/ldc-profgen-14.0/CMakeLists.txt deleted file mode 100644 index b3e05a94856..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ - -set(LLVM_LINK_COMPONENTS - AllTargetsDescs - AllTargetsDisassemblers - AllTargetsInfos - DebugInfoDWARF - Core - MC - IPO - MCDisassembler - Object - ProfileData - Support - Symbolize - ) - -add_llvm_tool(llvm-profgen - llvm-profgen.cpp - PerfReader.cpp - CSPreInliner.cpp - ProfiledBinary.cpp - ProfileGenerator.cpp - ) diff --git a/tools/ldc-profgen/ldc-profgen-14.0/CSPreInliner.cpp b/tools/ldc-profgen/ldc-profgen-14.0/CSPreInliner.cpp deleted file mode 100644 index 1e642639902..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/CSPreInliner.cpp +++ /dev/null @@ -1,285 +0,0 @@ -//===-- CSPreInliner.cpp - Profile guided preinliner -------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "CSPreInliner.h" -#include "ProfiledBinary.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/Statistic.h" -#include -#include - -#define DEBUG_TYPE "cs-preinliner" - -using namespace llvm; -using namespace sampleprof; - -STATISTIC(PreInlNumCSInlined, - "Number of functions inlined with context sensitive profile"); -STATISTIC(PreInlNumCSNotInlined, - "Number of functions not inlined with context sensitive profile"); -STATISTIC(PreInlNumCSInlinedHitMinLimit, - "Number of functions with FDO inline stopped due to min size limit"); -STATISTIC(PreInlNumCSInlinedHitMaxLimit, - "Number of functions with FDO inline stopped due to max size limit"); -STATISTIC( - PreInlNumCSInlinedHitGrowthLimit, - "Number of functions with FDO inline stopped due to growth size limit"); - -// The switches specify inline thresholds used in SampleProfileLoader inlining. -// TODO: the actual threshold to be tuned here because the size here is based -// on machine code not LLVM IR. -extern cl::opt SampleHotCallSiteThreshold; -extern cl::opt SampleColdCallSiteThreshold; -extern cl::opt ProfileInlineGrowthLimit; -extern cl::opt ProfileInlineLimitMin; -extern cl::opt ProfileInlineLimitMax; -extern cl::opt SortProfiledSCC; - -cl::opt EnableCSPreInliner( - "csspgo-preinliner", cl::Hidden, cl::init(true), - cl::desc("Run a global pre-inliner to merge context profile based on " - "estimated global top-down inline decisions")); - -cl::opt UseContextCostForPreInliner( - "use-context-cost-for-preinliner", cl::Hidden, cl::init(true), - cl::desc("Use context-sensitive byte size cost for preinliner decisions")); - -static cl::opt SamplePreInlineReplay( - "csspgo-replay-preinline", cl::Hidden, cl::init(false), - cl::desc( - "Replay previous inlining and adjust context profile accordingly")); - -CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, - uint64_t HotThreshold, uint64_t ColdThreshold) - : UseContextCost(UseContextCostForPreInliner), - // TODO: Pass in a guid-to-name map in order for - // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes - // as their profile context. - ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary), - HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) { - // Set default preinliner hot/cold call site threshold tuned with CSSPGO. - // for good performance with reasonable profile size. - if (!SampleHotCallSiteThreshold.getNumOccurrences()) - SampleHotCallSiteThreshold = 1500; - if (!SampleColdCallSiteThreshold.getNumOccurrences()) - SampleColdCallSiteThreshold = 0; -} - -std::vector CSPreInliner::buildTopDownOrder() { - std::vector Order; - ProfiledCallGraph ProfiledCG(ContextTracker); - - // Now that we have a profiled call graph, construct top-down order - // by building up SCC and reversing SCC order. - scc_iterator I = scc_begin(&ProfiledCG); - while (!I.isAtEnd()) { - auto Range = *I; - if (SortProfiledSCC) { - // Sort nodes in one SCC based on callsite hotness. - scc_member_iterator SI(*I); - Range = *SI; - } - for (auto *Node : Range) { - if (Node != ProfiledCG.getEntryNode()) - Order.push_back(Node->Name); - } - ++I; - } - std::reverse(Order.begin(), Order.end()); - - return Order; -} - -bool CSPreInliner::getInlineCandidates(ProfiledCandidateQueue &CQueue, - const FunctionSamples *CallerSamples) { - assert(CallerSamples && "Expect non-null caller samples"); - - // Ideally we want to consider everything a function calls, but as far as - // context profile is concerned, only those frames that are children of - // current one in the trie is relavent. So we walk the trie instead of call - // targets from function profile. - ContextTrieNode *CallerNode = - ContextTracker.getContextFor(CallerSamples->getContext()); - - bool HasNewCandidate = false; - for (auto &Child : CallerNode->getAllChildContext()) { - ContextTrieNode *CalleeNode = &Child.second; - FunctionSamples *CalleeSamples = CalleeNode->getFunctionSamples(); - if (!CalleeSamples) - continue; - - // Call site count is more reliable, so we look up the corresponding call - // target profile in caller's context profile to retrieve call site count. - uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); - uint64_t CallsiteCount = 0; - LineLocation Callsite = CalleeNode->getCallSiteLoc(); - if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { - SampleRecord::CallTargetMap &TargetCounts = CallTargets.get(); - auto It = TargetCounts.find(CalleeSamples->getName()); - if (It != TargetCounts.end()) - CallsiteCount = It->second; - } - - // TODO: call site and callee entry count should be mostly consistent, add - // check for that. - HasNewCandidate = true; - uint32_t CalleeSize = getFuncSize(*CalleeSamples); - CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount), - CalleeSize); - } - - return HasNewCandidate; -} - -uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) { - if (UseContextCost) { - return Binary.getFuncSizeForContext(FSamples.getContext()); - } - - return FSamples.getBodySamples().size(); -} - -bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) { - // If replay inline is requested, simply follow the inline decision of the - // profiled binary. - if (SamplePreInlineReplay) - return Candidate.CalleeSamples->getContext().hasAttribute( - ContextWasInlined); - - // Adjust threshold based on call site hotness, only do this for callsite - // prioritized inliner because otherwise cost-benefit check is done earlier. - unsigned int SampleThreshold = SampleColdCallSiteThreshold; - if (Candidate.CallsiteCount > HotCountThreshold) - SampleThreshold = SampleHotCallSiteThreshold; - - // TODO: for small cold functions, we may inlined them and we need to keep - // context profile accordingly. - if (Candidate.CallsiteCount < ColdCountThreshold) - SampleThreshold = SampleColdCallSiteThreshold; - - return (Candidate.SizeCost < SampleThreshold); -} - -void CSPreInliner::processFunction(const StringRef Name) { - FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name); - if (!FSamples) - return; - - unsigned FuncSize = getFuncSize(*FSamples); - unsigned FuncFinalSize = FuncSize; - unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit; - SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); - SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin); - - LLVM_DEBUG(dbgs() << "Process " << Name - << " for context-sensitive pre-inlining (pre-inline size: " - << FuncSize << ", size limit: " << SizeLimit << ")\n"); - - ProfiledCandidateQueue CQueue; - getInlineCandidates(CQueue, FSamples); - - while (!CQueue.empty() && FuncFinalSize < SizeLimit) { - ProfiledInlineCandidate Candidate = CQueue.top(); - CQueue.pop(); - bool ShouldInline = false; - if ((ShouldInline = shouldInline(Candidate))) { - // We mark context as inlined as the corresponding context profile - // won't be merged into that function's base profile. - ++PreInlNumCSInlined; - ContextTracker.markContextSamplesInlined(Candidate.CalleeSamples); - Candidate.CalleeSamples->getContext().setAttribute( - ContextShouldBeInlined); - FuncFinalSize += Candidate.SizeCost; - getInlineCandidates(CQueue, Candidate.CalleeSamples); - } else { - ++PreInlNumCSNotInlined; - } - LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined") - << " context profile for: " - << Candidate.CalleeSamples->getContext().toString() - << " (callee size: " << Candidate.SizeCost - << ", call count:" << Candidate.CallsiteCount << ")\n"); - } - - if (!CQueue.empty()) { - if (SizeLimit == (unsigned)ProfileInlineLimitMax) - ++PreInlNumCSInlinedHitMaxLimit; - else if (SizeLimit == (unsigned)ProfileInlineLimitMin) - ++PreInlNumCSInlinedHitMinLimit; - else - ++PreInlNumCSInlinedHitGrowthLimit; - } - - LLVM_DEBUG({ - if (!CQueue.empty()) - dbgs() << " Inline candidates ignored due to size limit (inliner " - "original size: " - << FuncSize << ", inliner final size: " << FuncFinalSize - << ", size limit: " << SizeLimit << ")\n"; - - while (!CQueue.empty()) { - ProfiledInlineCandidate Candidate = CQueue.top(); - CQueue.pop(); - bool WasInlined = - Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined); - dbgs() << " " << Candidate.CalleeSamples->getContext().toString() - << " (candidate size:" << Candidate.SizeCost - << ", call count: " << Candidate.CallsiteCount << ", previously " - << (WasInlined ? "inlined)\n" : "not inlined)\n"); - } - }); -} - -void CSPreInliner::run() { -#ifndef NDEBUG - auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) { - dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles (" - << Profiles.size() << " total):\n"; - for (auto &It : Profiles) { - const FunctionSamples &Samples = It.second; - dbgs() << " [" << Samples.getContext().toString() << "] " - << Samples.getTotalSamples() << ":" << Samples.getHeadSamples() - << "\n"; - } - }; -#endif - - LLVM_DEBUG(printProfileNames(ProfileMap, true)); - - // Execute global pre-inliner to estimate a global top-down inline - // decision and merge profiles accordingly. This helps with profile - // merge for ThinLTO otherwise we won't be able to merge profiles back - // to base profile across module/thin-backend boundaries. - // It also helps better compress context profile to control profile - // size, as we now only need context profile for functions going to - // be inlined. - for (StringRef FuncName : buildTopDownOrder()) { - processFunction(FuncName); - } - - // Not inlined context profiles are merged into its base, so we can - // trim out such profiles from the output. - std::vector ProfilesToBeRemoved; - for (auto &It : ProfileMap) { - SampleContext &Context = It.second.getContext(); - if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) { - assert(Context.hasState(MergedContext) && - "Not inlined context profile should be merged already"); - ProfilesToBeRemoved.push_back(It.first); - } - } - - for (auto &ContextName : ProfilesToBeRemoved) { - ProfileMap.erase(ContextName); - } - - // Make sure ProfileMap's key is consistent with FunctionSamples' name. - SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles(); - - LLVM_DEBUG(printProfileNames(ProfileMap, false)); -} diff --git a/tools/ldc-profgen/ldc-profgen-14.0/CSPreInliner.h b/tools/ldc-profgen/ldc-profgen-14.0/CSPreInliner.h deleted file mode 100644 index 9f63f7ef7be..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/CSPreInliner.h +++ /dev/null @@ -1,95 +0,0 @@ -//===-- CSPreInliner.h - Profile guided preinliner ---------------- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H -#define LLVM_TOOLS_LLVM_PROFGEN_PGOINLINEADVISOR_H - -#include "ProfiledBinary.h" -#include "llvm/ADT/PriorityQueue.h" -#include "llvm/ProfileData/ProfileCommon.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/Transforms/IPO/ProfiledCallGraph.h" -#include "llvm/Transforms/IPO/SampleContextTracker.h" - -using namespace llvm; -using namespace sampleprof; - -namespace llvm { -namespace sampleprof { - -// Inline candidate seen from profile -struct ProfiledInlineCandidate { - ProfiledInlineCandidate(const FunctionSamples *Samples, uint64_t Count, - uint32_t Size) - : CalleeSamples(Samples), CallsiteCount(Count), SizeCost(Size) {} - // Context-sensitive function profile for inline candidate - const FunctionSamples *CalleeSamples; - // Call site count for an inline candidate - // TODO: make sure entry count for context profile and call site - // target count for corresponding call are consistent. - uint64_t CallsiteCount; - // Size proxy for function under particular call context. - uint64_t SizeCost; -}; - -// Inline candidate comparer using call site weight -struct ProfiledCandidateComparer { - bool operator()(const ProfiledInlineCandidate &LHS, - const ProfiledInlineCandidate &RHS) { - if (LHS.CallsiteCount != RHS.CallsiteCount) - return LHS.CallsiteCount < RHS.CallsiteCount; - - if (LHS.SizeCost != RHS.SizeCost) - return LHS.SizeCost > RHS.SizeCost; - - // Tie breaker using GUID so we have stable/deterministic inlining order - assert(LHS.CalleeSamples && RHS.CalleeSamples && - "Expect non-null FunctionSamples"); - return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) < - RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName()); - } -}; - -using ProfiledCandidateQueue = - PriorityQueue, - ProfiledCandidateComparer>; - -// Pre-compilation inliner based on context-sensitive profile. -// The PreInliner estimates inline decision using hotness from profile -// and cost estimation from machine code size. It helps merges context -// profile globally and achieves better post-inine profile quality, which -// otherwise won't be possible for ThinLTO. It also reduce context profile -// size by only keep context that is estimated to be inlined. -class CSPreInliner { -public: - CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary, - uint64_t HotThreshold, uint64_t ColdThreshold); - void run(); - -private: - bool getInlineCandidates(ProfiledCandidateQueue &CQueue, - const FunctionSamples *FCallerContextSamples); - std::vector buildTopDownOrder(); - void processFunction(StringRef Name); - bool shouldInline(ProfiledInlineCandidate &Candidate); - uint32_t getFuncSize(const FunctionSamples &FSamples); - bool UseContextCost; - SampleContextTracker ContextTracker; - SampleProfileMap &ProfileMap; - ProfiledBinary &Binary; - - // Count thresholds to answer isHotCount and isColdCount queries. - // Mirrors the threshold in ProfileSummaryInfo. - uint64_t HotCountThreshold; - uint64_t ColdCountThreshold; -}; - -} // end namespace sampleprof -} // end namespace llvm - -#endif diff --git a/tools/ldc-profgen/ldc-profgen-14.0/CallContext.h b/tools/ldc-profgen/ldc-profgen-14.0/CallContext.h deleted file mode 100644 index 5e552130d03..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/CallContext.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- CallContext.h - Call Context Handler ---------------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H -#define LLVM_TOOLS_LLVM_PROFGEN_CALLCONTEXT_H - -#include "llvm/ProfileData/SampleProf.h" -#include -#include -#include - -namespace llvm { -namespace sampleprof { - -inline std::string getCallSite(const SampleContextFrame &Callsite) { - std::string CallsiteStr = Callsite.FuncName.str(); - CallsiteStr += ":"; - CallsiteStr += Twine(Callsite.Location.LineOffset).str(); - if (Callsite.Location.Discriminator > 0) { - CallsiteStr += "."; - CallsiteStr += Twine(Callsite.Location.Discriminator).str(); - } - return CallsiteStr; -} - -// TODO: This operation is expansive. If it ever gets called multiple times we -// may think of making a class wrapper with internal states for it. -inline std::string getLocWithContext(const SampleContextFrameVector &Context) { - std::ostringstream OContextStr; - for (const auto &Callsite : Context) { - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << getCallSite(Callsite); - } - return OContextStr.str(); -} - -// Reverse call context, i.e., in the order of callee frames to caller frames, -// is useful during instruction printing or pseudo probe printing. -inline std::string -getReversedLocWithContext(const SampleContextFrameVector &Context) { - std::ostringstream OContextStr; - for (const auto &Callsite : reverse(Context)) { - if (OContextStr.str().size()) - OContextStr << " @ "; - OContextStr << getCallSite(Callsite); - } - return OContextStr.str(); -} - -} // end namespace sampleprof -} // end namespace llvm - -#endif diff --git a/tools/ldc-profgen/ldc-profgen-14.0/ErrorHandling.h b/tools/ldc-profgen/ldc-profgen-14.0/ErrorHandling.h deleted file mode 100644 index b797add8a89..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/ErrorHandling.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- ErrorHandling.h - Error handler -------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H -#define LLVM_TOOLS_LLVM_PROFGEN_ERRORHANDLING_H - -#include "llvm/ADT/Twine.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/WithColor.h" -#include - -using namespace llvm; - -[[noreturn]] inline void exitWithError(const Twine &Message, - StringRef Whence = StringRef(), - StringRef Hint = StringRef()) { - WithColor::error(errs(), "llvm-profgen"); - if (!Whence.empty()) - errs() << Whence.str() << ": "; - errs() << Message << "\n"; - if (!Hint.empty()) - WithColor::note() << Hint.str() << "\n"; - ::exit(EXIT_FAILURE); -} - -[[noreturn]] inline void exitWithError(std::error_code EC, - StringRef Whence = StringRef()) { - exitWithError(EC.message(), Whence); -} - -[[noreturn]] inline void exitWithError(Error E, StringRef Whence) { - exitWithError(errorToErrorCode(std::move(E)), Whence); -} - -template -T unwrapOrError(Expected EO, Ts &&... Args) { - if (EO) - return std::move(*EO); - exitWithError(EO.takeError(), std::forward(Args)...); -} - -inline void emitWarningSummary(uint64_t Num, uint64_t Total, StringRef Msg) { - if (!Total || !Num) - return; - WithColor::warning() << format("%.2f", static_cast(Num) * 100 / Total) - << "%(" << Num << "/" << Total << ") " << Msg << "\n"; -} - -#endif diff --git a/tools/ldc-profgen/ldc-profgen-14.0/PerfReader.cpp b/tools/ldc-profgen/ldc-profgen-14.0/PerfReader.cpp deleted file mode 100644 index 98b4c7cdf16..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/PerfReader.cpp +++ /dev/null @@ -1,1222 +0,0 @@ -//===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "PerfReader.h" -#include "ProfileGenerator.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Process.h" - -#define DEBUG_TYPE "perf-reader" - -cl::opt SkipSymbolization("skip-symbolization", cl::init(false), - cl::ZeroOrMore, - cl::desc("Dump the unsymbolized profile to the " - "output file. It will show unwinder " - "output for CS profile generation.")); - -static cl::opt ShowMmapEvents("show-mmap-events", cl::init(false), - cl::ZeroOrMore, - cl::desc("Print binary load events.")); - -static cl::opt - UseOffset("use-offset", cl::init(true), cl::ZeroOrMore, - cl::desc("Work with `--skip-symbolization` or " - "`--unsymbolized-profile` to write/read the " - "offset instead of virtual address.")); - -static cl::opt UseLoadableSegmentAsBase( - "use-first-loadable-segment-as-base", cl::init(false), cl::ZeroOrMore, - cl::desc("Use first loadable segment address as base address " - "for offsets in unsymbolized profile. By default " - "first executable segment address is used")); - -static cl::opt - IgnoreStackSamples("ignore-stack-samples", cl::init(false), cl::ZeroOrMore, - cl::desc("Ignore call stack samples for hybrid samples " - "and produce context-insensitive profile.")); -cl::opt ShowDetailedWarning("show-detailed-warning", cl::init(false), - cl::ZeroOrMore, - cl::desc("Show detailed warning message.")); - -extern cl::opt PerfTraceFilename; -extern cl::opt ShowDisassemblyOnly; -extern cl::opt ShowSourceLocations; -extern cl::opt OutputFilename; - -namespace llvm { -namespace sampleprof { - -void VirtualUnwinder::unwindCall(UnwindState &State) { - uint64_t Source = State.getCurrentLBRSource(); - // An artificial return should push an external frame and an artificial call - // will match it and pop the external frame so that the context before and - // after the external call will be the same. - if (State.getCurrentLBR().IsArtificial) { - NumExtCallBranch++; - // A return is matched and pop the external frame. - if (State.getParentFrame()->isExternalFrame()) { - State.popFrame(); - } else { - // An artificial return is missing, it happens that the sample is just hit - // in the middle of the external code. In this case, the leading branch is - // a call to external, we just keep unwinding use a context-less stack. - if (State.getParentFrame() != State.getDummyRootPtr()) - NumMissingExternalFrame++; - State.clearCallStack(); - State.pushFrame(Source); - State.InstPtr.update(Source); - return; - } - } - - auto *ParentFrame = State.getParentFrame(); - // The 2nd frame after leaf could be missing if stack sample is - // taken when IP is within prolog/epilog, as frame chain isn't - // setup yet. Fill in the missing frame in that case. - // TODO: Currently we just assume all the addr that can't match the - // 2nd frame is in prolog/epilog. In the future, we will switch to - // pro/epi tracker(Dwarf CFI) for the precise check. - if (ParentFrame == State.getDummyRootPtr() || - ParentFrame->Address != Source) { - State.switchToFrame(Source); - if (ParentFrame != State.getDummyRootPtr()) { - if (State.getCurrentLBR().IsArtificial) - NumMismatchedExtCallBranch++; - else - NumMismatchedProEpiBranch++; - } - } else { - State.popFrame(); - } - State.InstPtr.update(Source); -} - -void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) { - InstructionPointer &IP = State.InstPtr; - uint64_t Target = State.getCurrentLBRTarget(); - uint64_t End = IP.Address; - if (Binary->usePseudoProbes()) { - // We don't need to top frame probe since it should be extracted - // from the range. - // The outcome of the virtual unwinding with pseudo probes is a - // map from a context key to the address range being unwound. - // This means basically linear unwinding is not needed for pseudo - // probes. The range will be simply recorded here and will be - // converted to a list of pseudo probes to report in ProfileGenerator. - State.getParentFrame()->recordRangeCount(Target, End, Repeat); - } else { - // Unwind linear execution part. - // Split and record the range by different inline context. For example: - // [0x01] ... main:1 # Target - // [0x02] ... main:2 - // [0x03] ... main:3 @ foo:1 - // [0x04] ... main:3 @ foo:2 - // [0x05] ... main:3 @ foo:3 - // [0x06] ... main:4 - // [0x07] ... main:5 # End - // It will be recorded: - // [main:*] : [0x06, 0x07], [0x01, 0x02] - // [main:3 @ foo:*] : [0x03, 0x05] - while (IP.Address > Target) { - uint64_t PrevIP = IP.Address; - IP.backward(); - // Break into segments for implicit call/return due to inlining - bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address); - if (!SameInlinee) { - State.switchToFrame(PrevIP); - State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat); - End = IP.Address; - } - } - assert(IP.Address == Target && "The last one must be the target address."); - // Record the remaining range, [0x01, 0x02] in the example - State.switchToFrame(IP.Address); - State.CurrentLeafFrame->recordRangeCount(IP.Address, End, Repeat); - } -} - -void VirtualUnwinder::unwindReturn(UnwindState &State) { - // Add extra frame as we unwind through the return - const LBREntry &LBR = State.getCurrentLBR(); - uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target); - State.switchToFrame(CallAddr); - // Push an external frame for the case of returning to external - // address(callback), later if an aitificial call is matched and it will be - // popped up. This is to 1)avoid context being interrupted by callback, - // context before or after the callback should be the same. 2) the call stack - // of function called by callback should be truncated which is done during - // recording the context on trie. For example: - // main (call)--> foo (call)--> callback (call)--> bar (return)--> callback - // (return)--> foo (return)--> main - // Context for bar should not include main and foo. - // For the code of foo, the context of before and after callback should both - // be [foo, main]. - if (LBR.IsArtificial) - State.pushFrame(ExternalAddr); - State.pushFrame(LBR.Source); - State.InstPtr.update(LBR.Source); -} - -void VirtualUnwinder::unwindBranch(UnwindState &State) { - // TODO: Tolerate tail call for now, as we may see tail call from libraries. - // This is only for intra function branches, excluding tail calls. - uint64_t Source = State.getCurrentLBRSource(); - State.switchToFrame(Source); - State.InstPtr.update(Source); -} - -std::shared_ptr FrameStack::getContextKey() { - std::shared_ptr KeyStr = - std::make_shared(); - KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined); - if (KeyStr->Context.empty()) - return nullptr; - return KeyStr; -} - -std::shared_ptr ProbeStack::getContextKey() { - std::shared_ptr ProbeBasedKey = - std::make_shared(); - for (auto CallProbe : Stack) { - ProbeBasedKey->Probes.emplace_back(CallProbe); - } - CSProfileGenerator::compressRecursionContext( - ProbeBasedKey->Probes); - CSProfileGenerator::trimContext( - ProbeBasedKey->Probes); - return ProbeBasedKey; -} - -template -void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, - T &Stack) { - if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty()) - return; - - std::shared_ptr Key = Stack.getContextKey(); - if (Key == nullptr) - return; - auto Ret = CtxCounterMap->emplace(Hashable(Key), SampleCounter()); - SampleCounter &SCounter = Ret.first->second; - for (auto &Item : Cur->RangeSamples) { - uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); - uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); - SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item)); - } - - for (auto &Item : Cur->BranchSamples) { - uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item)); - uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item)); - SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item)); - } -} - -template -void VirtualUnwinder::collectSamplesFromFrameTrie( - UnwindState::ProfiledFrame *Cur, T &Stack) { - if (!Cur->isDummyRoot()) { - // Truncate the context for external frame since this isn't a real call - // context the compiler will see. - if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) { - // Process truncated context - // Start a new traversal ignoring its bottom context - T EmptyStack(Binary); - collectSamplesFromFrame(Cur, EmptyStack); - for (const auto &Item : Cur->Children) { - collectSamplesFromFrameTrie(Item.second.get(), EmptyStack); - } - - // Keep note of untracked call site and deduplicate them - // for warning later. - if (!Cur->isLeafFrame()) - UntrackedCallsites.insert(Cur->Address); - - return; - } - } - - collectSamplesFromFrame(Cur, Stack); - // Process children frame - for (const auto &Item : Cur->Children) { - collectSamplesFromFrameTrie(Item.second.get(), Stack); - } - // Recover the call stack - Stack.popFrame(); -} - -void VirtualUnwinder::collectSamplesFromFrameTrie( - UnwindState::ProfiledFrame *Cur) { - if (Binary->usePseudoProbes()) { - ProbeStack Stack(Binary); - collectSamplesFromFrameTrie(Cur, Stack); - } else { - FrameStack Stack(Binary); - collectSamplesFromFrameTrie(Cur, Stack); - } -} - -void VirtualUnwinder::recordBranchCount(const LBREntry &Branch, - UnwindState &State, uint64_t Repeat) { - if (Branch.IsArtificial || Branch.Target == ExternalAddr) - return; - - if (Binary->usePseudoProbes()) { - // Same as recordRangeCount, We don't need to top frame probe since we will - // extract it from branch's source address - State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target, - Repeat); - } else { - State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target, - Repeat); - } -} - -bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) { - // Capture initial state as starting point for unwinding. - UnwindState State(Sample, Binary); - - // Sanity check - making sure leaf of LBR aligns with leaf of stack sample - // Stack sample sometimes can be unreliable, so filter out bogus ones. - if (!State.validateInitialState()) - return false; - - // Now process the LBR samples in parrallel with stack sample - // Note that we do not reverse the LBR entry order so we can - // unwind the sample stack as we walk through LBR entries. - while (State.hasNextLBR()) { - State.checkStateConsistency(); - - // Do not attempt linear unwind for the leaf range as it's incomplete. - if (!State.IsLastLBR()) { - // Unwind implicit calls/returns from inlining, along the linear path, - // break into smaller sub section each with its own calling context. - unwindLinear(State, Repeat); - } - - // Save the LBR branch before it gets unwound. - const LBREntry &Branch = State.getCurrentLBR(); - - if (isCallState(State)) { - // Unwind calls - we know we encountered call if LBR overlaps with - // transition between leaf the 2nd frame. Note that for calls that - // were not in the original stack sample, we should have added the - // extra frame when processing the return paired with this call. - unwindCall(State); - } else if (isReturnState(State)) { - // Unwind returns - check whether the IP is indeed at a return instruction - unwindReturn(State); - } else { - // Unwind branches - // For regular intra function branches, we only need to record branch with - // context. For an artificial branch cross function boundaries, we got an - // issue with returning to external code. Take the two LBR enties for - // example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader, - // we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo - // return to bar. Here we detect and treat this case as BRANCH instead of - // RETURN which only update the source address. - unwindBranch(State); - } - State.advanceLBR(); - // Record `branch` with calling context after unwinding. - recordBranchCount(Branch, State, Repeat); - } - // As samples are aggregated on trie, record them into counter map - collectSamplesFromFrameTrie(State.getDummyRootPtr()); - - return true; -} - -std::unique_ptr -PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput) { - std::unique_ptr PerfReader; - - if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) { - PerfReader.reset( - new UnsymbolizedProfileReader(Binary, PerfInput.InputFile)); - return PerfReader; - } - - // For perf data input, we need to convert them into perf script first. - if (PerfInput.Format == PerfFormat::PerfData) - PerfInput = PerfScriptReader::convertPerfDataToTrace(Binary, PerfInput); - - assert((PerfInput.Format == PerfFormat::PerfScript) && - "Should be a perfscript!"); - - PerfInput.Content = - PerfScriptReader::checkPerfScriptType(PerfInput.InputFile); - if (PerfInput.Content == PerfContent::LBRStack) { - PerfReader.reset(new HybridPerfReader(Binary, PerfInput.InputFile)); - } else if (PerfInput.Content == PerfContent::LBR) { - PerfReader.reset(new LBRPerfReader(Binary, PerfInput.InputFile)); - } else { - exitWithError("Unsupported perfscript!"); - } - - return PerfReader; -} - -PerfInputFile PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, - PerfInputFile &File) { - StringRef PerfData = File.InputFile; - // Run perf script to retrieve PIDs matching binary we're interested in. - auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf"); - if (!PerfExecutable) { - exitWithError("Perf not found."); - } - std::string PerfPath = *PerfExecutable; - std::string PerfTraceFile = PerfData.str() + ".script.tmp"; - StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events", - "-F", "comm,pid", "-i", - PerfData}; - Optional Redirects[] = {llvm::None, // Stdin - StringRef(PerfTraceFile), // Stdout - StringRef(PerfTraceFile)}; // Stderr - sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, llvm::None, Redirects); - - // Collect the PIDs - TraceStream TraceIt(PerfTraceFile); - std::string PIDs; - std::unordered_set PIDSet; - while (!TraceIt.isAtEoF()) { - MMapEvent MMap; - if (isMMap2Event(TraceIt.getCurrentLine()) && - extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) { - auto It = PIDSet.emplace(MMap.PID); - if (It.second) { - if (!PIDs.empty()) { - PIDs.append(","); - } - PIDs.append(utostr(MMap.PID)); - } - } - TraceIt.advance(); - } - - if (PIDs.empty()) { - exitWithError("No relevant mmap event is found in perf data."); - } - - // Run perf script again to retrieve events for PIDs collected above - StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events", - "-F", "ip,brstack", "--pid", - PIDs, "-i", PerfData}; - sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, llvm::None, Redirects); - - return {PerfTraceFile, PerfFormat::PerfScript, PerfContent::UnknownContent}; -} - -void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { - // Drop the event which doesn't belong to user-provided binary - StringRef BinaryName = llvm::sys::path::filename(Event.BinaryPath); - if (Binary->getName() != BinaryName) - return; - - // Drop the event if its image is loaded at the same address - if (Event.Address == Binary->getBaseAddress()) { - Binary->setIsLoadedByMMap(true); - return; - } - - if (Event.Offset == Binary->getTextSegmentOffset()) { - // A binary image could be unloaded and then reloaded at different - // place, so update binary load address. - // Only update for the first executable segment and assume all other - // segments are loaded at consecutive memory addresses, which is the case on - // X64. - Binary->setBaseAddress(Event.Address); - Binary->setIsLoadedByMMap(true); - } else { - // Verify segments are loaded consecutively. - const auto &Offsets = Binary->getTextSegmentOffsets(); - auto It = std::lower_bound(Offsets.begin(), Offsets.end(), Event.Offset); - if (It != Offsets.end() && *It == Event.Offset) { - // The event is for loading a separate executable segment. - auto I = std::distance(Offsets.begin(), It); - const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses(); - if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() != - Event.Address - Binary->getBaseAddress()) - exitWithError("Executable segments not loaded consecutively"); - } else { - if (It == Offsets.begin()) - exitWithError("File offset not found"); - else { - // Find the segment the event falls in. A large segment could be loaded - // via multiple mmap calls with consecutive memory addresses. - --It; - assert(*It < Event.Offset); - if (Event.Offset - *It != Event.Address - Binary->getBaseAddress()) - exitWithError("Segment not loaded by consecutive mmaps"); - } - } - } -} - -static std::string getContextKeyStr(ContextKey *K, - const ProfiledBinary *Binary) { - if (const auto *CtxKey = dyn_cast(K)) { - return SampleContext::getContextString(CtxKey->Context); - } else if (const auto *CtxKey = dyn_cast(K)) { - SampleContextFrameVector ContextStack; - for (const auto *Probe : CtxKey->Probes) { - Binary->getInlineContextForProbe(Probe, ContextStack, true); - } - // Probe context key at this point does not have leaf probe, so do not - // include the leaf inline location. - return SampleContext::getContextString(ContextStack, true); - } else { - llvm_unreachable("unexpected key type"); - } -} - -void HybridPerfReader::unwindSamples() { - if (Binary->useFSDiscriminator()) - exitWithError("FS discriminator is not supported in CS profile."); - VirtualUnwinder Unwinder(&SampleCounters, Binary); - for (const auto &Item : AggregatedSamples) { - const PerfSample *Sample = Item.first.getPtr(); - Unwinder.unwind(Sample, Item.second); - } - - // Warn about untracked frames due to missing probes. - if (ShowDetailedWarning) { - for (auto Address : Unwinder.getUntrackedCallsites()) - WithColor::warning() << "Profile context truncated due to missing probe " - << "for call instruction at " - << format("0x%" PRIx64, Address) << "\n"; - } - - emitWarningSummary(Unwinder.getUntrackedCallsites().size(), - SampleCounters.size(), - "of profiled contexts are truncated due to missing probe " - "for call instruction."); - - emitWarningSummary( - Unwinder.NumMismatchedExtCallBranch, Unwinder.NumTotalBranches, - "of branches'source is a call instruction but doesn't match call frame " - "stack, likely due to unwinding error of external frame."); - - emitWarningSummary( - Unwinder.NumMismatchedProEpiBranch, Unwinder.NumTotalBranches, - "of branches'source is a call instruction but doesn't match call frame " - "stack, likely due to frame in prolog/epilog."); - - emitWarningSummary(Unwinder.NumMissingExternalFrame, - Unwinder.NumExtCallBranch, - "of artificial call branches but doesn't have an external " - "frame to match."); -} - -bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, - SmallVectorImpl &LBRStack) { - // The raw format of LBR stack is like: - // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... - // ... 0x4005c8/0x4005dc/P/-/-/0 - // It's in FIFO order and seperated by whitespace. - SmallVector Records; - TraceIt.getCurrentLine().split(Records, " ", -1, false); - auto WarnInvalidLBR = [](TraceStream &TraceIt) { - WithColor::warning() << "Invalid address in LBR record at line " - << TraceIt.getLineNumber() << ": " - << TraceIt.getCurrentLine() << "\n"; - }; - - // Skip the leading instruction pointer. - size_t Index = 0; - uint64_t LeadingAddr; - if (!Records.empty() && !Records[0].contains('/')) { - if (Records[0].getAsInteger(16, LeadingAddr)) { - WarnInvalidLBR(TraceIt); - TraceIt.advance(); - return false; - } - Index = 1; - } - // Now extract LBR samples - note that we do not reverse the - // LBR entry order so we can unwind the sample stack as we walk - // through LBR entries. - uint64_t PrevTrDst = 0; - - while (Index < Records.size()) { - auto &Token = Records[Index++]; - if (Token.size() == 0) - continue; - - SmallVector Addresses; - Token.split(Addresses, "/"); - uint64_t Src; - uint64_t Dst; - - // Stop at broken LBR records. - if (Addresses.size() < 2 || Addresses[0].substr(2).getAsInteger(16, Src) || - Addresses[1].substr(2).getAsInteger(16, Dst)) { - WarnInvalidLBR(TraceIt); - break; - } - - bool SrcIsInternal = Binary->addressIsCode(Src); - bool DstIsInternal = Binary->addressIsCode(Dst); - bool IsExternal = !SrcIsInternal && !DstIsInternal; - bool IsIncoming = !SrcIsInternal && DstIsInternal; - bool IsOutgoing = SrcIsInternal && !DstIsInternal; - bool IsArtificial = false; - - // Ignore branches outside the current binary. - if (IsExternal) { - if (!PrevTrDst && !LBRStack.empty()) { - WithColor::warning() - << "Invalid transfer to external code in LBR record at line " - << TraceIt.getLineNumber() << ": " << TraceIt.getCurrentLine() - << "\n"; - } - // Do not ignore the entire samples, the remaining LBR can still be - // unwound using a context-less stack. - continue; - } - - if (IsOutgoing) { - if (!PrevTrDst) { - // This is a leading outgoing LBR, we should keep processing the LBRs. - if (LBRStack.empty()) { - NumLeadingOutgoingLBR++; - // Record this LBR since current source and next LBR' target is still - // a valid range. - LBRStack.emplace_back(LBREntry(Src, ExternalAddr, false)); - continue; - } - // This is middle unpaired outgoing jump which is likely due to - // interrupt or incomplete LBR trace. Ignore current and subsequent - // entries since they are likely in different contexts. - break; - } - - // For transition to external code, group the Source with the next - // availabe transition target. - Dst = PrevTrDst; - PrevTrDst = 0; - IsArtificial = true; - } else { - if (PrevTrDst) { - // If we have seen an incoming transition from external code to internal - // code, but not a following outgoing transition, the incoming - // transition is likely due to interrupt which is usually unpaired. - // Ignore current and subsequent entries since they are likely in - // different contexts. - break; - } - - if (IsIncoming) { - // For transition from external code (such as dynamic libraries) to - // the current binary, keep track of the branch target which will be - // grouped with the Source of the last transition from the current - // binary. - PrevTrDst = Dst; - continue; - } - } - - // TODO: filter out buggy duplicate branches on Skylake - - LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial)); - } - TraceIt.advance(); - return !LBRStack.empty(); -} - -bool PerfScriptReader::extractCallstack(TraceStream &TraceIt, - SmallVectorImpl &CallStack) { - // The raw format of call stack is like: - // 4005dc # leaf frame - // 400634 - // 400684 # root frame - // It's in bottom-up order with each frame in one line. - - // Extract stack frames from sample - while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { - StringRef FrameStr = TraceIt.getCurrentLine().ltrim(); - uint64_t FrameAddr = 0; - if (FrameStr.getAsInteger(16, FrameAddr)) { - // We might parse a non-perf sample line like empty line and comments, - // skip it - TraceIt.advance(); - return false; - } - TraceIt.advance(); - // Currently intermixed frame from different binaries is not supported. - if (!Binary->addressIsCode(FrameAddr)) { - if (CallStack.empty()) - NumLeafExternalFrame++; - // Push a special value(ExternalAddr) for the external frames so that - // unwinder can still work on this with artificial Call/Return branch. - // After unwinding, the context will be truncated for external frame. - // Also deduplicate the consecutive external addresses. - if (CallStack.empty() || CallStack.back() != ExternalAddr) - CallStack.emplace_back(ExternalAddr); - continue; - } - - // We need to translate return address to call address for non-leaf frames. - if (!CallStack.empty()) { - auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr); - if (!CallAddr) { - // Stop at an invalid return address caused by bad unwinding. This could - // happen to frame-pointer-based unwinding and the callee functions that - // do not have the frame pointer chain set up. - InvalidReturnAddresses.insert(FrameAddr); - break; - } - FrameAddr = CallAddr; - } - - CallStack.emplace_back(FrameAddr); - } - - // Strip out the bottom external addr. - if (CallStack.size() > 1 && CallStack.back() == ExternalAddr) - CallStack.pop_back(); - - // Skip other unrelated line, find the next valid LBR line - // Note that even for empty call stack, we should skip the address at the - // bottom, otherwise the following pass may generate a truncated callstack - while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) { - TraceIt.advance(); - } - // Filter out broken stack sample. We may not have complete frame info - // if sample end up in prolog/epilog, the result is dangling context not - // connected to entry point. This should be relatively rare thus not much - // impact on overall profile quality. However we do want to filter them - // out to reduce the number of different calling contexts. One instance - // of such case - when sample landed in prolog/epilog, somehow stack - // walking will be broken in an unexpected way that higher frames will be - // missing. - return !CallStack.empty() && - !Binary->addressInPrologEpilog(CallStack.front()); -} - -void PerfScriptReader::warnIfMissingMMap() { - if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) { - WithColor::warning() << "No relevant mmap event is matched for " - << Binary->getName() - << ", will use preferred address (" - << format("0x%" PRIx64, - Binary->getPreferredBaseAddress()) - << ") as the base loading address!\n"; - // Avoid redundant warning, only warn at the first unmatched sample. - Binary->setMissingMMapWarned(true); - } -} - -void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { - // The raw hybird sample started with call stack in FILO order and followed - // intermediately by LBR sample - // e.g. - // 4005dc # call stack leaf - // 400634 - // 400684 # call stack root - // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... - // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries - // - std::shared_ptr Sample = std::make_shared(); - - // Parsing call stack and populate into PerfSample.CallStack - if (!extractCallstack(TraceIt, Sample->CallStack)) { - // Skip the next LBR line matched current call stack - if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) - TraceIt.advance(); - return; - } - - warnIfMissingMMap(); - - if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) { - // Parsing LBR stack and populate into PerfSample.LBRStack - if (extractLBRStack(TraceIt, Sample->LBRStack)) { - if (IgnoreStackSamples) { - Sample->CallStack.clear(); - } else { - // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR - // ranges - Sample->CallStack.front() = Sample->LBRStack[0].Target; - } - // Record samples by aggregation - AggregatedSamples[Hashable(Sample)] += Count; - } - } else { - // LBR sample is encoded in single line after stack sample - exitWithError("'Hybrid perf sample is corrupted, No LBR sample line"); - } -} - -void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) { - std::error_code EC; - raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF); - if (EC) - exitWithError(EC, Filename); - writeUnsymbolizedProfile(OS); -} - -// Use ordered map to make the output deterministic -using OrderedCounterForPrint = std::map; - -void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) { - OrderedCounterForPrint OrderedCounters; - for (auto &CI : SampleCounters) { - OrderedCounters[getContextKeyStr(CI.first.getPtr(), Binary)] = &CI.second; - } - - auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator, - uint32_t Indent) { - OS.indent(Indent); - OS << Counter.size() << "\n"; - for (auto &I : Counter) { - uint64_t Start = I.first.first; - uint64_t End = I.first.second; - - if (!UseOffset || (UseOffset && UseLoadableSegmentAsBase)) { - Start = Binary->offsetToVirtualAddr(Start); - End = Binary->offsetToVirtualAddr(End); - } - - if (UseOffset && UseLoadableSegmentAsBase) { - Start -= Binary->getFirstLoadableAddress(); - End -= Binary->getFirstLoadableAddress(); - } - - OS.indent(Indent); - OS << Twine::utohexstr(Start) << Separator << Twine::utohexstr(End) << ":" - << I.second << "\n"; - } - }; - - for (auto &CI : OrderedCounters) { - uint32_t Indent = 0; - if (ProfileIsCSFlat) { - // Context string key - OS << "[" << CI.first << "]\n"; - Indent = 2; - } - - SampleCounter &Counter = *CI.second; - SCounterPrinter(Counter.RangeCounter, "-", Indent); - SCounterPrinter(Counter.BranchCounter, "->", Indent); - } -} - -// Format of input: -// number of entries in RangeCounter -// from_1-to_1:count_1 -// from_2-to_2:count_2 -// ...... -// from_n-to_n:count_n -// number of entries in BranchCounter -// src_1->dst_1:count_1 -// src_2->dst_2:count_2 -// ...... -// src_n->dst_n:count_n -void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt, - SampleCounter &SCounters) { - auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) { - std::string Msg = TraceIt.isAtEoF() - ? "Invalid raw profile!" - : "Invalid raw profile at line " + - Twine(TraceIt.getLineNumber()).str() + ": " + - TraceIt.getCurrentLine().str(); - exitWithError(Msg); - }; - auto ReadNumber = [&](uint64_t &Num) { - if (TraceIt.isAtEoF()) - exitWithErrorForTraceLine(TraceIt); - if (TraceIt.getCurrentLine().ltrim().getAsInteger(10, Num)) - exitWithErrorForTraceLine(TraceIt); - TraceIt.advance(); - }; - - auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) { - uint64_t Num = 0; - ReadNumber(Num); - while (Num--) { - if (TraceIt.isAtEoF()) - exitWithErrorForTraceLine(TraceIt); - StringRef Line = TraceIt.getCurrentLine().ltrim(); - - uint64_t Count = 0; - auto LineSplit = Line.split(":"); - if (LineSplit.second.empty() || LineSplit.second.getAsInteger(10, Count)) - exitWithErrorForTraceLine(TraceIt); - - uint64_t Source = 0; - uint64_t Target = 0; - auto Range = LineSplit.first.split(Separator); - if (Range.second.empty() || Range.first.getAsInteger(16, Source) || - Range.second.getAsInteger(16, Target)) - exitWithErrorForTraceLine(TraceIt); - - if (!UseOffset || (UseOffset && UseLoadableSegmentAsBase)) { - uint64_t BaseAddr = 0; - if (UseOffset && UseLoadableSegmentAsBase) - BaseAddr = Binary->getFirstLoadableAddress(); - - Source = Binary->virtualAddrToOffset(Source + BaseAddr); - Target = Binary->virtualAddrToOffset(Target + BaseAddr); - } - - Counter[{Source, Target}] += Count; - TraceIt.advance(); - } - }; - - ReadCounter(SCounters.RangeCounter, "-"); - ReadCounter(SCounters.BranchCounter, "->"); -} - -void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) { - TraceStream TraceIt(FileName); - while (!TraceIt.isAtEoF()) { - std::shared_ptr Key = - std::make_shared(); - StringRef Line = TraceIt.getCurrentLine(); - // Read context stack for CS profile. - if (Line.startswith("[")) { - ProfileIsCSFlat = true; - auto I = ContextStrSet.insert(Line.str()); - SampleContext::createCtxVectorFromStr(*I.first, Key->Context); - TraceIt.advance(); - } - auto Ret = - SampleCounters.emplace(Hashable(Key), SampleCounter()); - readSampleCounters(TraceIt, Ret.first->second); - } -} - -void UnsymbolizedProfileReader::parsePerfTraces() { - readUnsymbolizedProfile(PerfTraceFile); -} - -void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, - uint64_t Repeat) { - SampleCounter &Counter = SampleCounters.begin()->second; - uint64_t EndOffeset = 0; - for (const LBREntry &LBR : Sample->LBRStack) { - assert(LBR.Source != ExternalAddr && - "Branch' source should not be an external address, it should be " - "converted to aritificial branch."); - uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); - uint64_t TargetOffset = LBR.Target == static_cast(ExternalAddr) - ? static_cast(ExternalAddr) - : Binary->virtualAddrToOffset(LBR.Target); - - if (!LBR.IsArtificial && TargetOffset != ExternalAddr) { - Counter.recordBranchCount(SourceOffset, TargetOffset, Repeat); - } - - // If this not the first LBR, update the range count between TO of current - // LBR and FROM of next LBR. - uint64_t StartOffset = TargetOffset; - if (EndOffeset != 0) - Counter.recordRangeCount(StartOffset, EndOffeset, Repeat); - EndOffeset = SourceOffset; - } -} - -void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { - std::shared_ptr Sample = std::make_shared(); - // Parsing LBR stack and populate into PerfSample.LBRStack - if (extractLBRStack(TraceIt, Sample->LBRStack)) { - warnIfMissingMMap(); - // Record LBR only samples by aggregation - AggregatedSamples[Hashable(Sample)] += Count; - } -} - -void PerfScriptReader::generateUnsymbolizedProfile() { - // There is no context for LBR only sample, so initialize one entry with - // fake "empty" context key. - assert(SampleCounters.empty() && - "Sample counter map should be empty before raw profile generation"); - std::shared_ptr Key = - std::make_shared(); - SampleCounters.emplace(Hashable(Key), SampleCounter()); - for (const auto &Item : AggregatedSamples) { - const PerfSample *Sample = Item.first.getPtr(); - computeCounterFromLBR(Sample, Item.second); - } -} - -uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) { - // The aggregated count is optional, so do not skip the line and return 1 if - // it's unmatched - uint64_t Count = 1; - if (!TraceIt.getCurrentLine().getAsInteger(10, Count)) - TraceIt.advance(); - return Count; -} - -void PerfScriptReader::parseSample(TraceStream &TraceIt) { - NumTotalSample++; - uint64_t Count = parseAggregatedCount(TraceIt); - assert(Count >= 1 && "Aggregated count should be >= 1!"); - parseSample(TraceIt, Count); -} - -bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary, - StringRef Line, - MMapEvent &MMap) { - // Parse a line like: - // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 - // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so - constexpr static const char *const Pattern = - "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " - "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " - "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; - // Field 0 - whole line - // Field 1 - PID - // Field 2 - base address - // Field 3 - mmapped size - // Field 4 - page offset - // Field 5 - binary path - enum EventIndex { - WHOLE_LINE = 0, - PID = 1, - MMAPPED_ADDRESS = 2, - MMAPPED_SIZE = 3, - PAGE_OFFSET = 4, - BINARY_PATH = 5 - }; - - Regex RegMmap2(Pattern); - SmallVector Fields; - bool R = RegMmap2.match(Line, &Fields); - if (!R) { - std::string ErrorMsg = "Cannot parse mmap event: " + Line.str() + " \n"; - exitWithError(ErrorMsg); - } - Fields[PID].getAsInteger(10, MMap.PID); - Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address); - Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size); - Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset); - MMap.BinaryPath = Fields[BINARY_PATH]; - if (ShowMmapEvents) { - outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at " - << format("0x%" PRIx64 ":", MMap.Address) << " \n"; - } - - StringRef BinaryName = llvm::sys::path::filename(MMap.BinaryPath); - return Binary->getName() == BinaryName; -} - -void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) { - MMapEvent MMap; - if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) - updateBinaryAddress(MMap); - TraceIt.advance(); -} - -void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) { - if (isMMap2Event(TraceIt.getCurrentLine())) - parseMMap2Event(TraceIt); - else - parseSample(TraceIt); -} - -void PerfScriptReader::parseAndAggregateTrace() { - // Trace line iterator - TraceStream TraceIt(PerfTraceFile); - while (!TraceIt.isAtEoF()) - parseEventOrSample(TraceIt); -} - -// A LBR sample is like: -// 40062f 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... -// A heuristic for fast detection by checking whether a -// leading " 0x" and the '/' exist. -bool PerfScriptReader::isLBRSample(StringRef Line) { - // Skip the leading instruction pointer - SmallVector Records; - Line.trim().split(Records, " ", 2, false); - if (Records.size() < 2) - return false; - if (Records[1].startswith("0x") && Records[1].contains('/')) - return true; - return false; -} - -bool PerfScriptReader::isMMap2Event(StringRef Line) { - // Short cut to avoid string find is possible. - if (Line.empty() || Line.size() < 50) - return false; - - if (std::isdigit(Line[0])) - return false; - - // PERF_RECORD_MMAP2 does not appear at the beginning of the line - // for ` perf script --show-mmap-events -i ...` - return Line.contains("PERF_RECORD_MMAP2"); -} - -// The raw hybird sample is like -// e.g. -// 4005dc # call stack leaf -// 400634 -// 400684 # call stack root -// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... -// ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries -// Determine the perfscript contains hybrid samples(call stack + LBRs) by -// checking whether there is a non-empty call stack immediately followed by -// a LBR sample -PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) { - TraceStream TraceIt(FileName); - uint64_t FrameAddr = 0; - while (!TraceIt.isAtEoF()) { - // Skip the aggregated count - if (!TraceIt.getCurrentLine().getAsInteger(10, FrameAddr)) - TraceIt.advance(); - - // Detect sample with call stack - int32_t Count = 0; - while (!TraceIt.isAtEoF() && - !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) { - Count++; - TraceIt.advance(); - } - if (!TraceIt.isAtEoF()) { - if (isLBRSample(TraceIt.getCurrentLine())) { - if (Count > 0) - return PerfContent::LBRStack; - else - return PerfContent::LBR; - } - TraceIt.advance(); - } - } - - exitWithError("Invalid perf script input!"); - return PerfContent::UnknownContent; -} - -void HybridPerfReader::generateUnsymbolizedProfile() { - ProfileIsCSFlat = !IgnoreStackSamples; - if (ProfileIsCSFlat) - unwindSamples(); - else - PerfScriptReader::generateUnsymbolizedProfile(); -} - -void PerfScriptReader::warnTruncatedStack() { - if (ShowDetailedWarning) { - for (auto Address : InvalidReturnAddresses) { - WithColor::warning() - << "Truncated stack sample due to invalid return address at " - << format("0x%" PRIx64, Address) - << ", likely caused by frame pointer omission\n"; - } - } - emitWarningSummary( - InvalidReturnAddresses.size(), AggregatedSamples.size(), - "of truncated stack samples due to invalid return address, " - "likely caused by frame pointer omission."); -} - -void PerfScriptReader::warnInvalidRange() { - std::unordered_map, uint64_t, - pair_hash> - Ranges; - - for (const auto &Item : AggregatedSamples) { - const PerfSample *Sample = Item.first.getPtr(); - uint64_t Count = Item.second; - uint64_t EndOffeset = 0; - for (const LBREntry &LBR : Sample->LBRStack) { - uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); - uint64_t StartOffset = Binary->virtualAddrToOffset(LBR.Target); - if (EndOffeset != 0) - Ranges[{StartOffset, EndOffeset}] += Count; - EndOffeset = SourceOffset; - } - } - - if (Ranges.empty()) { - WithColor::warning() << "No samples in perf script!\n"; - return; - } - - auto WarnInvalidRange = - [&](uint64_t StartOffset, uint64_t EndOffset, StringRef Msg) { - if (!ShowDetailedWarning) - return; - WithColor::warning() - << "[" - << format("%8" PRIx64, Binary->offsetToVirtualAddr(StartOffset)) - << "," - << format("%8" PRIx64, Binary->offsetToVirtualAddr(EndOffset)) - << "]: " << Msg << "\n"; - }; - - const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " - "likely due to profile and binary mismatch."; - const char *DanglingRangeMsg = "Range does not belong to any functions, " - "likely from PLT, .init or .fini section."; - const char *RangeCrossFuncMsg = - "Fall through range should not cross function boundaries, likely due to " - "profile and binary mismatch."; - - uint64_t InstNotBoundary = 0; - uint64_t UnmatchedRange = 0; - uint64_t RangeCrossFunc = 0; - - for (auto &I : Ranges) { - uint64_t StartOffset = I.first.first; - uint64_t EndOffset = I.first.second; - - if (!Binary->offsetIsCode(StartOffset) || - !Binary->offsetIsTransfer(EndOffset)) { - InstNotBoundary++; - WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg); - } - - auto *FRange = Binary->findFuncRangeForOffset(StartOffset); - if (!FRange) { - UnmatchedRange++; - WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg); - continue; - } - - if (EndOffset >= FRange->EndOffset) { - RangeCrossFunc++; - WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); - } - } - - uint64_t TotalRangeNum = Ranges.size(); - emitWarningSummary(InstNotBoundary, TotalRangeNum, - "of profiled ranges are not on instruction boundary."); - emitWarningSummary(UnmatchedRange, TotalRangeNum, - "of profiled ranges do not belong to any functions."); - emitWarningSummary(RangeCrossFunc, TotalRangeNum, - "of profiled ranges do cross function boundaries."); -} - -void PerfScriptReader::parsePerfTraces() { - // Parse perf traces and do aggregation. - parseAndAggregateTrace(); - - emitWarningSummary(NumLeafExternalFrame, NumTotalSample, - "of samples have leaf external frame in call stack."); - emitWarningSummary(NumLeadingOutgoingLBR, NumTotalSample, - "of samples have leading external LBR."); - - // Generate unsymbolized profile. - warnTruncatedStack(); - warnInvalidRange(); - generateUnsymbolizedProfile(); - AggregatedSamples.clear(); - - if (SkipSymbolization) - writeUnsymbolizedProfile(OutputFilename); -} - -} // end namespace sampleprof -} // end namespace llvm diff --git a/tools/ldc-profgen/ldc-profgen-14.0/PerfReader.h b/tools/ldc-profgen/ldc-profgen-14.0/PerfReader.h deleted file mode 100644 index 9d84ad34bb3..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/PerfReader.h +++ /dev/null @@ -1,728 +0,0 @@ -//===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H -#define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H -#include "ErrorHandling.h" -#include "ProfiledBinary.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Regex.h" -#include -#include -#include -#include -#include - -using namespace llvm; -using namespace sampleprof; - -namespace llvm { -namespace sampleprof { - -// Stream based trace line iterator -class TraceStream { - std::string CurrentLine; - std::ifstream Fin; - bool IsAtEoF = false; - uint64_t LineNumber = 0; - -public: - TraceStream(StringRef Filename) : Fin(Filename.str()) { - if (!Fin.good()) - exitWithError("Error read input perf script file", Filename); - advance(); - } - - StringRef getCurrentLine() { - assert(!IsAtEoF && "Line iterator reaches the End-of-File!"); - return CurrentLine; - } - - uint64_t getLineNumber() { return LineNumber; } - - bool isAtEoF() { return IsAtEoF; } - - // Read the next line - void advance() { - if (!std::getline(Fin, CurrentLine)) { - IsAtEoF = true; - return; - } - LineNumber++; - } -}; - -// The type of input format. -enum PerfFormat { - UnknownFormat = 0, - PerfData = 1, // Raw linux perf.data. - PerfScript = 2, // Perf script create by `perf script` command. - UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen. - -}; - -// The type of perfscript content. -enum PerfContent { - UnknownContent = 0, - LBR = 1, // Only LBR sample. - LBRStack = 2, // Hybrid sample including call stack and LBR stack. -}; - -struct PerfInputFile { - std::string InputFile; - PerfFormat Format = PerfFormat::UnknownFormat; - PerfContent Content = PerfContent::UnknownContent; -}; - -// The parsed LBR sample entry. -struct LBREntry { - uint64_t Source = 0; - uint64_t Target = 0; - // An artificial branch stands for a series of consecutive branches starting - // from the current binary with a transition through external code and - // eventually landing back in the current binary. - bool IsArtificial = false; - LBREntry(uint64_t S, uint64_t T, bool I) - : Source(S), Target(T), IsArtificial(I) {} - -#ifndef NDEBUG - void print() const { - dbgs() << "from " << format("%#010x", Source) << " to " - << format("%#010x", Target); - if (IsArtificial) - dbgs() << " Artificial"; - } -#endif -}; - -#ifndef NDEBUG -static inline void printLBRStack(const SmallVectorImpl &LBRStack) { - for (size_t I = 0; I < LBRStack.size(); I++) { - dbgs() << "[" << I << "] "; - LBRStack[I].print(); - dbgs() << "\n"; - } -} - -static inline void printCallStack(const SmallVectorImpl &CallStack) { - for (size_t I = 0; I < CallStack.size(); I++) { - dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n"; - } -} -#endif - -// Hash interface for generic data of type T -// Data should implement a \fn getHashCode and a \fn isEqual -// Currently getHashCode is non-virtual to avoid the overhead of calling vtable, -// i.e we explicitly calculate hash of derived class, assign to base class's -// HashCode. This also provides the flexibility for calculating the hash code -// incrementally(like rolling hash) during frame stack unwinding since unwinding -// only changes the leaf of frame stack. \fn isEqual is a virtual function, -// which will have perf overhead. In the future, if we redesign a better hash -// function, then we can just skip this or switch to non-virtual function(like -// just ignore comparision if hash conflicts probabilities is low) -template class Hashable { -public: - std::shared_ptr Data; - Hashable(const std::shared_ptr &D) : Data(D) {} - - // Hash code generation - struct Hash { - uint64_t operator()(const Hashable &Key) const { - // Don't make it virtual for getHashCode - uint64_t Hash = Key.Data->getHashCode(); - assert(Hash && "Should generate HashCode for it!"); - return Hash; - } - }; - - // Hash equal - struct Equal { - bool operator()(const Hashable &LHS, const Hashable &RHS) const { - // Precisely compare the data, vtable will have overhead. - return LHS.Data->isEqual(RHS.Data.get()); - } - }; - - T *getPtr() const { return Data.get(); } -}; - -struct PerfSample { - // LBR stack recorded in FIFO order. - SmallVector LBRStack; - // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile - // generation - SmallVector CallStack; - - virtual ~PerfSample() = default; - uint64_t getHashCode() const { - // Use simple DJB2 hash - auto HashCombine = [](uint64_t H, uint64_t V) { - return ((H << 5) + H) + V; - }; - uint64_t Hash = 5381; - for (const auto &Value : CallStack) { - Hash = HashCombine(Hash, Value); - } - for (const auto &Entry : LBRStack) { - Hash = HashCombine(Hash, Entry.Source); - Hash = HashCombine(Hash, Entry.Target); - } - return Hash; - } - - bool isEqual(const PerfSample *Other) const { - const SmallVector &OtherCallStack = Other->CallStack; - const SmallVector &OtherLBRStack = Other->LBRStack; - - if (CallStack.size() != OtherCallStack.size() || - LBRStack.size() != OtherLBRStack.size()) - return false; - - if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin())) - return false; - - for (size_t I = 0; I < OtherLBRStack.size(); I++) { - if (LBRStack[I].Source != OtherLBRStack[I].Source || - LBRStack[I].Target != OtherLBRStack[I].Target) - return false; - } - return true; - } - -#ifndef NDEBUG - void print() const { - dbgs() << "LBR stack\n"; - printLBRStack(LBRStack); - dbgs() << "Call stack\n"; - printCallStack(CallStack); - } -#endif -}; -// After parsing the sample, we record the samples by aggregating them -// into this counter. The key stores the sample data and the value is -// the sample repeat times. -using AggregatedCounter = - std::unordered_map, uint64_t, - Hashable::Hash, Hashable::Equal>; - -using SampleVector = SmallVector, 16>; - -// The state for the unwinder, it doesn't hold the data but only keep the -// pointer/index of the data, While unwinding, the CallStack is changed -// dynamicially and will be recorded as the context of the sample -struct UnwindState { - // Profiled binary that current frame address belongs to - const ProfiledBinary *Binary; - // Call stack trie node - struct ProfiledFrame { - const uint64_t Address = DummyRoot; - ProfiledFrame *Parent; - SampleVector RangeSamples; - SampleVector BranchSamples; - std::unordered_map> Children; - - ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr) - : Address(Addr), Parent(P) {} - ProfiledFrame *getOrCreateChildFrame(uint64_t Address) { - assert(Address && "Address can't be zero!"); - auto Ret = Children.emplace( - Address, std::make_unique(Address, this)); - return Ret.first->second.get(); - } - void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) { - RangeSamples.emplace_back(std::make_tuple(Start, End, Count)); - } - void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) { - BranchSamples.emplace_back(std::make_tuple(Source, Target, Count)); - } - bool isDummyRoot() { return Address == DummyRoot; } - bool isExternalFrame() { return Address == ExternalAddr; } - bool isLeafFrame() { return Children.empty(); } - }; - - ProfiledFrame DummyTrieRoot; - ProfiledFrame *CurrentLeafFrame; - // Used to fall through the LBR stack - uint32_t LBRIndex = 0; - // Reference to PerfSample.LBRStack - const SmallVector &LBRStack; - // Used to iterate the address range - InstructionPointer InstPtr; - UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary) - : Binary(Binary), LBRStack(Sample->LBRStack), - InstPtr(Binary, Sample->CallStack.front()) { - initFrameTrie(Sample->CallStack); - } - - bool validateInitialState() { - uint64_t LBRLeaf = LBRStack[LBRIndex].Target; - uint64_t LeafAddr = CurrentLeafFrame->Address; - assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) && - "External leading LBR should match the leaf frame."); - - // When we take a stack sample, ideally the sampling distance between the - // leaf IP of stack and the last LBR target shouldn't be very large. - // Use a heuristic size (0x100) to filter out broken records. - if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) { - WithColor::warning() << "Bogus trace: stack tip = " - << format("%#010x", LeafAddr) - << ", LBR tip = " << format("%#010x\n", LBRLeaf); - return false; - } - return true; - } - - void checkStateConsistency() { - assert(InstPtr.Address == CurrentLeafFrame->Address && - "IP should align with context leaf"); - } - - bool hasNextLBR() const { return LBRIndex < LBRStack.size(); } - uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; } - uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; } - const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; } - bool IsLastLBR() const { return LBRIndex == 0; } - bool getLBRStackSize() const { return LBRStack.size(); } - void advanceLBR() { LBRIndex++; } - ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; } - - void pushFrame(uint64_t Address) { - CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address); - } - - void switchToFrame(uint64_t Address) { - if (CurrentLeafFrame->Address == Address) - return; - CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address); - } - - void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; } - - void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; } - - void initFrameTrie(const SmallVectorImpl &CallStack) { - ProfiledFrame *Cur = &DummyTrieRoot; - for (auto Address : reverse(CallStack)) { - Cur = Cur->getOrCreateChildFrame(Address); - } - CurrentLeafFrame = Cur; - } - - ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; } -}; - -// Base class for sample counter key with context -struct ContextKey { - uint64_t HashCode = 0; - virtual ~ContextKey() = default; - uint64_t getHashCode() { - if (HashCode == 0) - genHashCode(); - return HashCode; - } - virtual void genHashCode() = 0; - virtual bool isEqual(const ContextKey *K) const { - return HashCode == K->HashCode; - }; - - // Utilities for LLVM-style RTTI - enum ContextKind { CK_StringBased, CK_ProbeBased }; - const ContextKind Kind; - ContextKind getKind() const { return Kind; } - ContextKey(ContextKind K) : Kind(K){}; -}; - -// String based context id -struct StringBasedCtxKey : public ContextKey { - SampleContextFrameVector Context; - - bool WasLeafInlined; - StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){}; - static bool classof(const ContextKey *K) { - return K->getKind() == CK_StringBased; - } - - bool isEqual(const ContextKey *K) const override { - const StringBasedCtxKey *Other = dyn_cast(K); - return Context == Other->Context; - } - - void genHashCode() override { - HashCode = hash_value(SampleContextFrames(Context)); - } -}; - -// Probe based context key as the intermediate key of context -// String based context key will introduce redundant string handling -// since the callee context is inferred from the context string which -// need to be splitted by '@' to get the last location frame, so we -// can just use probe instead and generate the string in the end. -struct ProbeBasedCtxKey : public ContextKey { - SmallVector Probes; - - ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {} - static bool classof(const ContextKey *K) { - return K->getKind() == CK_ProbeBased; - } - - bool isEqual(const ContextKey *K) const override { - const ProbeBasedCtxKey *O = dyn_cast(K); - assert(O != nullptr && "Probe based key shouldn't be null in isEqual"); - return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(), - O->Probes.end()); - } - - void genHashCode() override { - for (const auto *P : Probes) { - HashCode = hash_combine(HashCode, P); - } - if (HashCode == 0) { - // Avoid zero value of HashCode when it's an empty list - HashCode = 1; - } - } -}; - -// The counter of branch samples for one function indexed by the branch, -// which is represented as the source and target offset pair. -using BranchSample = std::map, uint64_t>; -// The counter of range samples for one function indexed by the range, -// which is represented as the start and end offset pair. -using RangeSample = std::map, uint64_t>; -// Wrapper for sample counters including range counter and branch counter -struct SampleCounter { - RangeSample RangeCounter; - BranchSample BranchCounter; - - void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) { - assert(Start <= End && "Invalid instruction range"); - RangeCounter[{Start, End}] += Repeat; - } - void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) { - BranchCounter[{Source, Target}] += Repeat; - } -}; - -// Sample counter with context to support context-sensitive profile -using ContextSampleCounterMap = - std::unordered_map, SampleCounter, - Hashable::Hash, Hashable::Equal>; - -struct FrameStack { - SmallVector Stack; - ProfiledBinary *Binary; - FrameStack(ProfiledBinary *B) : Binary(B) {} - bool pushFrame(UnwindState::ProfiledFrame *Cur) { - assert(!Cur->isExternalFrame() && - "External frame's not expected for context stack."); - Stack.push_back(Cur->Address); - return true; - } - - void popFrame() { - if (!Stack.empty()) - Stack.pop_back(); - } - std::shared_ptr getContextKey(); -}; - -struct ProbeStack { - SmallVector Stack; - ProfiledBinary *Binary; - ProbeStack(ProfiledBinary *B) : Binary(B) {} - bool pushFrame(UnwindState::ProfiledFrame *Cur) { - assert(!Cur->isExternalFrame() && - "External frame's not expected for context stack."); - const MCDecodedPseudoProbe *CallProbe = - Binary->getCallProbeForAddr(Cur->Address); - // We may not find a probe for a merged or external callsite. - // Callsite merging may cause the loss of original probe IDs. - // Cutting off the context from here since the inliner will - // not know how to consume a context with unknown callsites. - if (!CallProbe) - return false; - Stack.push_back(CallProbe); - return true; - } - - void popFrame() { - if (!Stack.empty()) - Stack.pop_back(); - } - // Use pseudo probe based context key to get the sample counter - // A context stands for a call path from 'main' to an uninlined - // callee with all inline frames recovered on that path. The probes - // belonging to that call path is the probes either originated from - // the callee or from any functions inlined into the callee. Since - // pseudo probes are organized in a tri-tree style after decoded, - // the tree path from the tri-tree root (which is the uninlined - // callee) to the probe node forms an inline context. - // Here we use a list of probe(pointer) as the context key to speed up - // aggregation and the final context string will be generate in - // ProfileGenerator - std::shared_ptr getContextKey(); -}; - -/* -As in hybrid sample we have a group of LBRs and the most recent sampling call -stack, we can walk through those LBRs to infer more call stacks which would be -used as context for profile. VirtualUnwinder is the class to do the call stack -unwinding based on LBR state. Two types of unwinding are processd here: -1) LBR unwinding and 2) linear range unwinding. -Specifically, for each LBR entry(can be classified into call, return, regular -branch), LBR unwinding will replay the operation by pushing, popping or -switching leaf frame towards the call stack and since the initial call stack -is most recently sampled, the replay should be in anti-execution order, i.e. for -the regular case, pop the call stack when LBR is call, push frame on call stack -when LBR is return. After each LBR processed, it also needs to align with the -next LBR by going through instructions from previous LBR's target to current -LBR's source, which is the linear unwinding. As instruction from linear range -can come from different function by inlining, linear unwinding will do the range -splitting and record counters by the range with same inline context. Over those -unwinding process we will record each call stack as context id and LBR/linear -range as sample counter for further CS profile generation. -*/ -class VirtualUnwinder { -public: - VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B) - : CtxCounterMap(Counter), Binary(B) {} - bool unwind(const PerfSample *Sample, uint64_t Repeat); - std::set &getUntrackedCallsites() { return UntrackedCallsites; } - - uint64_t NumTotalBranches = 0; - uint64_t NumExtCallBranch = 0; - uint64_t NumMissingExternalFrame = 0; - uint64_t NumMismatchedProEpiBranch = 0; - uint64_t NumMismatchedExtCallBranch = 0; - -private: - bool isCallState(UnwindState &State) const { - // The tail call frame is always missing here in stack sample, we will - // use a specific tail call tracker to infer it. - return Binary->addressIsCall(State.getCurrentLBRSource()); - } - - bool isReturnState(UnwindState &State) const { - // Simply check addressIsReturn, as ret is always reliable, both for - // regular call and tail call. - if (!Binary->addressIsReturn(State.getCurrentLBRSource())) - return false; - - // In a callback case, a return from internal code, say A, to external - // runtime can happen. The external runtime can then call back to - // another internal routine, say B. Making an artificial branch that - // looks like a return from A to B can confuse the unwinder to treat - // the instruction before B as the call instruction. Here we detect this - // case if the return target is not the next inst of call inst, then we just - // do not treat it as a return. - uint64_t CallAddr = - Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()); - return (CallAddr != 0); - } - - void unwindCall(UnwindState &State); - void unwindLinear(UnwindState &State, uint64_t Repeat); - void unwindReturn(UnwindState &State); - void unwindBranch(UnwindState &State); - - template - void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack); - // Collect each samples on trie node by DFS traversal - template - void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack); - void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur); - - void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State, - uint64_t Repeat); - void recordBranchCount(const LBREntry &Branch, UnwindState &State, - uint64_t Repeat); - - ContextSampleCounterMap *CtxCounterMap; - // Profiled binary that current frame address belongs to - ProfiledBinary *Binary; - // Keep track of all untracked callsites - std::set UntrackedCallsites; -}; - -// Read perf trace to parse the events and samples. -class PerfReaderBase { -public: - PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace) - : Binary(B), PerfTraceFile(PerfTrace) { - // Initialize the base address to preferred address. - Binary->setBaseAddress(Binary->getPreferredBaseAddress()); - }; - virtual ~PerfReaderBase() = default; - static std::unique_ptr create(ProfiledBinary *Binary, - PerfInputFile &PerfInput); - - // Entry of the reader to parse multiple perf traces - virtual void parsePerfTraces() = 0; - const ContextSampleCounterMap &getSampleCounters() const { - return SampleCounters; - } - bool profileIsCSFlat() { return ProfileIsCSFlat; } - -protected: - ProfiledBinary *Binary = nullptr; - StringRef PerfTraceFile; - - ContextSampleCounterMap SampleCounters; - bool ProfileIsCSFlat = false; - - uint64_t NumTotalSample = 0; - uint64_t NumLeafExternalFrame = 0; - uint64_t NumLeadingOutgoingLBR = 0; -}; - -// Read perf script to parse the events and samples. -class PerfScriptReader : public PerfReaderBase { -public: - PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace) - : PerfReaderBase(B, PerfTrace){}; - - // Entry of the reader to parse multiple perf traces - virtual void parsePerfTraces() override; - // Generate perf script from perf data - static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary, - PerfInputFile &File); - // Extract perf script type by peaking at the input - static PerfContent checkPerfScriptType(StringRef FileName); - -protected: - // The parsed MMap event - struct MMapEvent { - uint64_t PID = 0; - uint64_t Address = 0; - uint64_t Size = 0; - uint64_t Offset = 0; - StringRef BinaryPath; - }; - - // Check whether a given line is LBR sample - static bool isLBRSample(StringRef Line); - // Check whether a given line is MMAP event - static bool isMMap2Event(StringRef Line); - // Parse a single line of a PERF_RECORD_MMAP2 event looking for a - // mapping between the binary name and its memory layout. - static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line, - MMapEvent &MMap); - // Update base address based on mmap events - void updateBinaryAddress(const MMapEvent &Event); - // Parse mmap event and update binary address - void parseMMap2Event(TraceStream &TraceIt); - // Parse perf events/samples and do aggregation - void parseAndAggregateTrace(); - // Parse either an MMAP event or a perf sample - void parseEventOrSample(TraceStream &TraceIt); - // Warn if the relevant mmap event is missing. - void warnIfMissingMMap(); - // Emit accumulate warnings. - void warnTruncatedStack(); - // Warn if range is invalid. - void warnInvalidRange(); - // Extract call stack from the perf trace lines - bool extractCallstack(TraceStream &TraceIt, - SmallVectorImpl &CallStack); - // Extract LBR stack from one perf trace line - bool extractLBRStack(TraceStream &TraceIt, - SmallVectorImpl &LBRStack); - uint64_t parseAggregatedCount(TraceStream &TraceIt); - // Parse one sample from multiple perf lines, override this for different - // sample type - void parseSample(TraceStream &TraceIt); - // An aggregated count is given to indicate how many times the sample is - // repeated. - virtual void parseSample(TraceStream &TraceIt, uint64_t Count){}; - void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat); - // Post process the profile after trace aggregation, we will do simple range - // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample). - virtual void generateUnsymbolizedProfile(); - void writeUnsymbolizedProfile(StringRef Filename); - void writeUnsymbolizedProfile(raw_fd_ostream &OS); - - // Samples with the repeating time generated by the perf reader - AggregatedCounter AggregatedSamples; - // Keep track of all invalid return addresses - std::set InvalidReturnAddresses; -}; - -/* - The reader of LBR only perf script. - A typical LBR sample is like: - 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... - ... 0x4005c8/0x4005dc/P/-/-/0 -*/ -class LBRPerfReader : public PerfScriptReader { -public: - LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace) - : PerfScriptReader(Binary, PerfTrace){}; - // Parse the LBR only sample. - virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override; -}; - -/* - Hybrid perf script includes a group of hybrid samples(LBRs + call stack), - which is used to generate CS profile. An example of hybrid sample: - 4005dc # call stack leaf - 400634 - 400684 # call stack root - 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... - ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries -*/ -class HybridPerfReader : public PerfScriptReader { -public: - HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace) - : PerfScriptReader(Binary, PerfTrace){}; - // Parse the hybrid sample including the call and LBR line - void parseSample(TraceStream &TraceIt, uint64_t Count) override; - void generateUnsymbolizedProfile() override; - -private: - // Unwind the hybrid samples after aggregration - void unwindSamples(); -}; - -/* - Format of unsymbolized profile: - - [frame1 @ frame2 @ ...] # If it's a CS profile - number of entries in RangeCounter - from_1-to_1:count_1 - from_2-to_2:count_2 - ...... - from_n-to_n:count_n - number of entries in BranchCounter - src_1->dst_1:count_1 - src_2->dst_2:count_2 - ...... - src_n->dst_n:count_n - [frame1 @ frame2 @ ...] # Next context - ...... - -Note that non-CS profile doesn't have the empty `[]` context. -*/ -class UnsymbolizedProfileReader : public PerfReaderBase { -public: - UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace) - : PerfReaderBase(Binary, PerfTrace){}; - void parsePerfTraces() override; - -private: - void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters); - void readUnsymbolizedProfile(StringRef Filename); - - std::unordered_set ContextStrSet; -}; - -} // end namespace sampleprof -} // end namespace llvm - -#endif diff --git a/tools/ldc-profgen/ldc-profgen-14.0/ProfileGenerator.cpp b/tools/ldc-profgen/ldc-profgen-14.0/ProfileGenerator.cpp deleted file mode 100644 index 1248e37dc50..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/ProfileGenerator.cpp +++ /dev/null @@ -1,979 +0,0 @@ -//===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ProfileGenerator.h" -#include "ErrorHandling.h" -#include "ProfiledBinary.h" -#include "llvm/ProfileData/ProfileCommon.h" -#include -#include - -cl::opt OutputFilename("output", cl::value_desc("output"), - cl::Required, - cl::desc("Output profile file")); -static cl::alias OutputA("o", cl::desc("Alias for --output"), - cl::aliasopt(OutputFilename)); - -static cl::opt OutputFormat( - "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), - cl::values( - clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), - clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"), - clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), - clEnumValN(SPF_Text, "text", "Text encoding"), - clEnumValN(SPF_GCC, "gcc", - "GCC encoding (only meaningful for -sample)"))); - -cl::opt UseMD5( - "use-md5", cl::init(false), cl::Hidden, - cl::desc("Use md5 to represent function names in the output profile (only " - "meaningful for -extbinary)")); - -static cl::opt PopulateProfileSymbolList( - "populate-profile-symbol-list", cl::init(false), cl::Hidden, - cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); - -static cl::opt FillZeroForAllFuncs( - "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, - cl::desc("Attribute all functions' range with zero count " - "even it's not hit by any samples.")); - -static cl::opt RecursionCompression( - "compress-recursion", - cl::desc("Compressing recursion by deduplicating adjacent frame " - "sequences up to the specified size. -1 means no size limit."), - cl::Hidden, - cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); - -static cl::opt - TrimColdProfile("trim-cold-profile", cl::init(false), cl::ZeroOrMore, - cl::desc("If the total count of the profile is smaller " - "than threshold, it will be trimmed.")); - -static cl::opt CSProfMergeColdContext( - "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore, - cl::desc("If the total count of context profile is smaller than " - "the threshold, it will be merged into context-less base " - "profile.")); - -static cl::opt CSProfMaxColdContextDepth( - "csprof-max-cold-context-depth", cl::init(1), cl::ZeroOrMore, - cl::desc("Keep the last K contexts while merging cold profile. 1 means the " - "context-less base profile")); - -static cl::opt CSProfMaxContextDepth( - "csprof-max-context-depth", cl::ZeroOrMore, - cl::desc("Keep the last K contexts while merging profile. -1 means no " - "depth limit."), - cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); - -static cl::opt HotFunctionDensityThreshold( - "hot-function-density-threshold", llvm::cl::init(1000), - llvm::cl::desc( - "specify density threshold for hot functions (default: 1000)"), - llvm::cl::Optional); -static cl::opt ShowDensity("show-density", llvm::cl::init(false), - llvm::cl::desc("show profile density details"), - llvm::cl::Optional); - -static cl::opt UpdateTotalSamples( - "update-total-samples", llvm::cl::init(false), - llvm::cl::desc( - "Update total samples by accumulating all its body samples."), - llvm::cl::Optional); - -extern cl::opt ProfileSummaryCutoffHot; - -static cl::opt GenCSNestedProfile( - "gen-cs-nested-profile", cl::Hidden, cl::init(false), - cl::desc("Generate nested function profiles for CSSPGO")); - -using namespace llvm; -using namespace sampleprof; - -namespace llvm { -namespace sampleprof { - -// Initialize the MaxCompressionSize to -1 which means no size limit -int32_t CSProfileGenerator::MaxCompressionSize = -1; - -int CSProfileGenerator::MaxContextDepth = -1; - -bool ProfileGeneratorBase::UseFSDiscriminator = false; - -std::unique_ptr -ProfileGeneratorBase::create(ProfiledBinary *Binary, - const ContextSampleCounterMap &SampleCounters, - bool ProfileIsCSFlat) { - std::unique_ptr Generator; - if (ProfileIsCSFlat) { - if (Binary->useFSDiscriminator()) - exitWithError("FS discriminator is not supported in CS profile."); - Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); - } else { - Generator.reset(new ProfileGenerator(Binary, SampleCounters)); - } - ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); - FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); - - return Generator; -} - -void ProfileGeneratorBase::write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap) { - // Populate profile symbol list if extended binary format is used. - ProfileSymbolList SymbolList; - - if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { - Binary->populateSymbolListFromDWARF(SymbolList); - Writer->setProfileSymbolList(&SymbolList); - } - - if (std::error_code EC = Writer->write(ProfileMap)) - exitWithError(std::move(EC)); -} - -void ProfileGeneratorBase::write() { - auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); - if (std::error_code EC = WriterOrErr.getError()) - exitWithError(EC, OutputFilename); - - if (UseMD5) { - if (OutputFormat != SPF_Ext_Binary) - WithColor::warning() << "-use-md5 is ignored. Specify " - "--format=extbinary to enable it\n"; - else - WriterOrErr.get()->setUseMD5(); - } - - write(std::move(WriterOrErr.get()), ProfileMap); -} - -void ProfileGeneratorBase::showDensitySuggestion(double Density) { - if (Density == 0.0) - WithColor::warning() << "The --profile-summary-cutoff-hot option may be " - "set too low. Please check your command.\n"; - else if (Density < HotFunctionDensityThreshold) - WithColor::warning() - << "AutoFDO is estimated to optimize better with " - << format("%.1f", HotFunctionDensityThreshold / Density) - << "x more samples. Please consider increasing sampling rate or " - "profiling for longer duration to get more samples.\n"; - - if (ShowDensity) - outs() << "Minimum profile density for hot functions with top " - << format("%.2f", - static_cast(ProfileSummaryCutoffHot.getValue()) / - 10000) - << "% total samples: " << format("%.1f", Density) << "\n"; -} - -double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles, - uint64_t HotCntThreshold) { - double Density = DBL_MAX; - std::vector HotFuncs; - for (auto &I : Profiles) { - auto &FuncSamples = I.second; - if (FuncSamples.getTotalSamples() < HotCntThreshold) - continue; - HotFuncs.emplace_back(&FuncSamples); - } - - for (auto *FuncSamples : HotFuncs) { - auto *Func = Binary->getBinaryFunction(FuncSamples->getName()); - if (!Func) - continue; - uint64_t FuncSize = Func->getFuncSize(); - if (FuncSize == 0) - continue; - Density = - std::min(Density, static_cast(FuncSamples->getTotalSamples()) / - FuncSize); - } - - return Density == DBL_MAX ? 0.0 : Density; -} - -void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, - const RangeSample &Ranges) { - - /* - Regions may overlap with each other. Using the boundary info, find all - disjoint ranges and their sample count. BoundaryPoint contains the count - multiple samples begin/end at this points. - - |<--100-->| Sample1 - |<------200------>| Sample2 - A B C - - In the example above, - Sample1 begins at A, ends at B, its value is 100. - Sample2 beings at A, ends at C, its value is 200. - For A, BeginCount is the sum of sample begins at A, which is 300 and no - samples ends at A, so EndCount is 0. - Then boundary points A, B, and C with begin/end counts are: - A: (300, 0) - B: (0, 100) - C: (0, 200) - */ - struct BoundaryPoint { - // Sum of sample counts beginning at this point - uint64_t BeginCount = UINT64_MAX; - // Sum of sample counts ending at this point - uint64_t EndCount = UINT64_MAX; - // Is the begin point of a zero range. - bool IsZeroRangeBegin = false; - // Is the end point of a zero range. - bool IsZeroRangeEnd = false; - - void addBeginCount(uint64_t Count) { - if (BeginCount == UINT64_MAX) - BeginCount = 0; - BeginCount += Count; - } - - void addEndCount(uint64_t Count) { - if (EndCount == UINT64_MAX) - EndCount = 0; - EndCount += Count; - } - }; - - /* - For the above example. With boundary points, follwing logic finds two - disjoint region of - - [A,B]: 300 - [B+1,C]: 200 - - If there is a boundary point that both begin and end, the point itself - becomes a separate disjoint region. For example, if we have original - ranges of - - |<--- 100 --->| - |<--- 200 --->| - A B C - - there are three boundary points with their begin/end counts of - - A: (100, 0) - B: (200, 100) - C: (0, 200) - - the disjoint ranges would be - - [A, B-1]: 100 - [B, B]: 300 - [B+1, C]: 200. - - Example for zero value range: - - |<--- 100 --->| - |<--- 200 --->| - |<--------------- 0 ----------------->| - A B C D E F - - [A, B-1] : 0 - [B, C] : 100 - [C+1, D-1]: 0 - [D, E] : 200 - [E+1, F] : 0 - */ - std::map Boundaries; - - for (const auto &Item : Ranges) { - assert(Item.first.first <= Item.first.second && - "Invalid instruction range"); - auto &BeginPoint = Boundaries[Item.first.first]; - auto &EndPoint = Boundaries[Item.first.second]; - uint64_t Count = Item.second; - - BeginPoint.addBeginCount(Count); - EndPoint.addEndCount(Count); - if (Count == 0) { - BeginPoint.IsZeroRangeBegin = true; - EndPoint.IsZeroRangeEnd = true; - } - } - - // Use UINT64_MAX to indicate there is no existing range between BeginAddress - // and the next valid address - uint64_t BeginAddress = UINT64_MAX; - int ZeroRangeDepth = 0; - uint64_t Count = 0; - for (const auto &Item : Boundaries) { - uint64_t Address = Item.first; - const BoundaryPoint &Point = Item.second; - if (Point.BeginCount != UINT64_MAX) { - if (BeginAddress != UINT64_MAX) - DisjointRanges[{BeginAddress, Address - 1}] = Count; - Count += Point.BeginCount; - BeginAddress = Address; - ZeroRangeDepth += Point.IsZeroRangeBegin; - } - if (Point.EndCount != UINT64_MAX) { - assert((BeginAddress != UINT64_MAX) && - "First boundary point cannot be 'end' point"); - DisjointRanges[{BeginAddress, Address}] = Count; - assert(Count >= Point.EndCount && "Mismatched live ranges"); - Count -= Point.EndCount; - BeginAddress = Address + 1; - ZeroRangeDepth -= Point.IsZeroRangeEnd; - // If the remaining count is zero and it's no longer in a zero range, this - // means we consume all the ranges before, thus mark BeginAddress as - // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: - // [<---- 10 ---->] - // [<---- 20 ---->] - // A B C D - // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't - // have the [B+1, C-1] zero range. - if (Count == 0 && ZeroRangeDepth == 0) - BeginAddress = UINT64_MAX; - } - } -} - -void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( - FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, - uint64_t Count) { - // Use the maximum count of samples with same line location - uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); - - // Use duplication factor to compensated for loop unroll/vectorization. - // Note that this is only needed when we're taking MAX of the counts at - // the location instead of SUM. - Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); - - ErrorOr R = - FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); - - uint64_t PreviousCount = R ? R.get() : 0; - if (PreviousCount <= Count) { - FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, - Count - PreviousCount); - } -} - -void ProfileGeneratorBase::updateTotalSamples() { - if (!UpdateTotalSamples) - return; - - for (auto &Item : ProfileMap) { - FunctionSamples &FunctionProfile = Item.second; - FunctionProfile.updateTotalSamples(); - } -} - -FunctionSamples & -ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) { - SampleContext Context(FuncName); - auto Ret = ProfileMap.emplace(Context, FunctionSamples()); - if (Ret.second) { - FunctionSamples &FProfile = Ret.first->second; - FProfile.setContext(Context); - } - return Ret.first->second; -} - -void ProfileGenerator::generateProfile() { - if (Binary->usePseudoProbes()) { - // TODO: Support probe based profile generation - exitWithError("Probe based profile generation not supported for AutoFDO, " - "consider dropping `--ignore-stack-samples` or adding `--use-dwarf-correlation`."); - } else { - generateLineNumBasedProfile(); - } - postProcessProfiles(); -} - -void ProfileGenerator::postProcessProfiles() { - computeSummaryAndThreshold(); - trimColdProfiles(ProfileMap, ColdCountThreshold); - calculateAndShowDensity(ProfileMap); -} - -void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, - uint64_t ColdCntThreshold) { - if (!TrimColdProfile) - return; - - // Move cold profiles into a tmp container. - std::vector ColdProfiles; - for (const auto &I : ProfileMap) { - if (I.second.getTotalSamples() < ColdCntThreshold) - ColdProfiles.emplace_back(I.first); - } - - // Remove the cold profile from ProfileMap. - for (const auto &I : ColdProfiles) - ProfileMap.erase(I); -} - -void ProfileGenerator::generateLineNumBasedProfile() { - assert(SampleCounters.size() == 1 && - "Must have one entry for profile generation."); - const SampleCounter &SC = SampleCounters.begin()->second; - // Fill in function body samples - populateBodySamplesForAllFunctions(SC.RangeCounter); - // Fill in boundary sample counts as well as call site samples for calls - populateBoundarySamplesForAllFunctions(SC.BranchCounter); - - updateTotalSamples(); -} - -FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( - const SampleContextFrameVector &FrameVec, uint64_t Count) { - // Get top level profile - FunctionSamples *FunctionProfile = - &getTopLevelFunctionProfile(FrameVec[0].FuncName); - FunctionProfile->addTotalSamples(Count); - - for (size_t I = 1; I < FrameVec.size(); I++) { - LineLocation Callsite( - FrameVec[I - 1].Location.LineOffset, - getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); - FunctionSamplesMap &SamplesMap = - FunctionProfile->functionSamplesAt(Callsite); - auto Ret = - SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples()); - if (Ret.second) { - SampleContext Context(FrameVec[I].FuncName); - Ret.first->second.setContext(Context); - } - FunctionProfile = &Ret.first->second; - FunctionProfile->addTotalSamples(Count); - } - - return *FunctionProfile; -} - -RangeSample -ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { - RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); - if (FillZeroForAllFuncs) { - for (auto &FuncI : Binary->getAllBinaryFunctions()) { - for (auto &R : FuncI.second.Ranges) { - Ranges[{R.first, R.second - 1}] += 0; - } - } - } else { - // For each range, we search for all ranges of the function it belongs to - // and initialize it with zero count, so it remains zero if doesn't hit any - // samples. This is to be consistent with compiler that interpret zero count - // as unexecuted(cold). - for (const auto &I : RangeCounter) { - uint64_t StartOffset = I.first.first; - for (const auto &Range : Binary->getRangesForOffset(StartOffset)) - Ranges[{Range.first, Range.second - 1}] += 0; - } - } - RangeSample DisjointRanges; - findDisjointRanges(DisjointRanges, Ranges); - return DisjointRanges; -} - -void ProfileGenerator::populateBodySamplesForAllFunctions( - const RangeSample &RangeCounter) { - for (const auto &Range : preprocessRangeCounter(RangeCounter)) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); - uint64_t Count = Range.second; - - InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, - // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range - // can be Addr1+1 to Addr2-1. We should ignore such range. - if (IP.Address > RangeEnd) - continue; - - do { - uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); - const SampleContextFrameVector &FrameVec = - Binary->getFrameLocationStack(Offset); - if (!FrameVec.empty()) { - // FIXME: As accumulating total count per instruction caused some - // regression, we changed to accumulate total count per byte as a - // workaround. Tuning hotness threshold on the compiler side might be - // necessary in the future. - FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( - FrameVec, Count * Binary->getInstSize(Offset)); - updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), - Count); - } - } while (IP.advance() && IP.Address <= RangeEnd); - } -} - -StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) { - // Get the function range by branch target if it's a call branch. - auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset); - - // We won't accumulate sample count for a range whose start is not the real - // function entry such as outlined function or inner labels. - if (!FRange || !FRange->IsFuncEntry) - return StringRef(); - - return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); -} - -void ProfileGenerator::populateBoundarySamplesForAllFunctions( - const BranchSample &BranchCounters) { - for (const auto &Entry : BranchCounters) { - uint64_t SourceOffset = Entry.first.first; - uint64_t TargetOffset = Entry.first.second; - uint64_t Count = Entry.second; - assert(Count != 0 && "Unexpected zero weight branch"); - - StringRef CalleeName = getCalleeNameForOffset(TargetOffset); - if (CalleeName.size() == 0) - continue; - // Record called target sample and its count. - const SampleContextFrameVector &FrameVec = - Binary->getFrameLocationStack(SourceOffset); - if (!FrameVec.empty()) { - FunctionSamples &FunctionProfile = - getLeafProfileAndAddTotalSamples(FrameVec, 0); - FunctionProfile.addCalledTargetSamples( - FrameVec.back().Location.LineOffset, - getBaseDiscriminator(FrameVec.back().Location.Discriminator), - CalleeName, Count); - } - // Add head samples for callee. - FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName); - CalleeProfile.addHeadSamples(Count); - } -} - -void ProfileGeneratorBase::calculateAndShowDensity( - const SampleProfileMap &Profiles) { - double Density = calculateDensity(Profiles, HotCountThreshold); - showDensitySuggestion(Density); -} - -FunctionSamples &CSProfileGenerator::getFunctionProfileForContext( - const SampleContextFrameVector &Context, bool WasLeafInlined) { - auto I = ProfileMap.find(SampleContext(Context)); - if (I == ProfileMap.end()) { - // Save the new context for future references. - SampleContextFrames NewContext = *Contexts.insert(Context).first; - SampleContext FContext(NewContext, RawContext); - auto Ret = ProfileMap.emplace(FContext, FunctionSamples()); - if (WasLeafInlined) - FContext.setAttribute(ContextWasInlined); - FunctionSamples &FProfile = Ret.first->second; - FProfile.setContext(FContext); - return Ret.first->second; - } - return I->second; -} - -void CSProfileGenerator::generateProfile() { - FunctionSamples::ProfileIsCSFlat = true; - - if (Binary->getTrackFuncContextSize()) - computeSizeForProfiledFunctions(); - - if (Binary->usePseudoProbes()) { - // Enable pseudo probe functionalities in SampleProf - FunctionSamples::ProfileIsProbeBased = true; - generateProbeBasedProfile(); - } else { - generateLineNumBasedProfile(); - } - postProcessProfiles(); -} - -void CSProfileGenerator::computeSizeForProfiledFunctions() { - // Hash map to deduplicate the function range and the item is a pair of - // function start and end offset. - std::unordered_map AggregatedRanges; - // Go through all the ranges in the CS counters, use the start of the range to - // look up the function it belongs and record the function range. - for (const auto &CI : SampleCounters) { - for (const auto &Item : CI.second.RangeCounter) { - // FIXME: Filter the bogus crossing function range. - uint64_t StartOffset = Item.first.first; - // Note that a function can be spilt into multiple ranges, so get all - // ranges of the function. - for (const auto &Range : Binary->getRangesForOffset(StartOffset)) - AggregatedRanges[Range.first] = Range.second; - } - } - - for (const auto &I : AggregatedRanges) { - uint64_t StartOffset = I.first; - uint64_t EndOffset = I.second; - Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); - } -} - -void CSProfileGenerator::generateLineNumBasedProfile() { - for (const auto &CI : SampleCounters) { - const auto *CtxKey = cast(CI.first.getPtr()); - - // Get or create function profile for the range - FunctionSamples &FunctionProfile = - getFunctionProfileForContext(CtxKey->Context, CtxKey->WasLeafInlined); - - // Fill in function body samples - populateBodySamplesForFunction(FunctionProfile, CI.second.RangeCounter); - // Fill in boundary sample counts as well as call site samples for calls - populateBoundarySamplesForFunction(CtxKey->Context, FunctionProfile, - CI.second.BranchCounter); - } - // Fill in call site value sample for inlined calls and also use context to - // infer missing samples. Since we don't have call count for inlined - // functions, we estimate it from inlinee's profile using the entry of the - // body sample. - populateInferredFunctionSamples(); - - updateTotalSamples(); -} - -void CSProfileGenerator::populateBodySamplesForFunction( - FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { - // Compute disjoint ranges first, so we can use MAX - // for calculating count for each location. - RangeSample Ranges; - findDisjointRanges(Ranges, RangeCounter); - for (const auto &Range : Ranges) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); - uint64_t Count = Range.second; - // Disjoint ranges have introduce zero-filled gap that - // doesn't belong to current context, filter them out. - if (Count == 0) - continue; - - InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, - // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range - // can be Addr1+1 to Addr2-1. We should ignore such range. - if (IP.Address > RangeEnd) - continue; - - do { - uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); - auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); - if (LeafLoc.hasValue()) { - // Recording body sample for this specific context - updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); - FunctionProfile.addTotalSamples(Count); - } - } while (IP.advance() && IP.Address <= RangeEnd); - } -} - -void CSProfileGenerator::populateBoundarySamplesForFunction( - SampleContextFrames ContextId, FunctionSamples &FunctionProfile, - const BranchSample &BranchCounters) { - - for (const auto &Entry : BranchCounters) { - uint64_t SourceOffset = Entry.first.first; - uint64_t TargetOffset = Entry.first.second; - uint64_t Count = Entry.second; - assert(Count != 0 && "Unexpected zero weight branch"); - - StringRef CalleeName = getCalleeNameForOffset(TargetOffset); - if (CalleeName.size() == 0) - continue; - - // Record called target sample and its count - auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset); - if (!LeafLoc.hasValue()) - continue; - FunctionProfile.addCalledTargetSamples( - LeafLoc->Location.LineOffset, - getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName, - Count); - - // Record head sample for called target(callee) - SampleContextFrameVector CalleeCtx(ContextId.begin(), ContextId.end()); - assert(CalleeCtx.back().FuncName == LeafLoc->FuncName && - "Leaf function name doesn't match"); - CalleeCtx.back() = *LeafLoc; - CalleeCtx.emplace_back(CalleeName, LineLocation(0, 0)); - FunctionSamples &CalleeProfile = getFunctionProfileForContext(CalleeCtx); - CalleeProfile.addHeadSamples(Count); - } -} - -static SampleContextFrame -getCallerContext(SampleContextFrames CalleeContext, - SampleContextFrameVector &CallerContext) { - assert(CalleeContext.size() > 1 && "Unexpected empty context"); - CalleeContext = CalleeContext.drop_back(); - CallerContext.assign(CalleeContext.begin(), CalleeContext.end()); - SampleContextFrame CallerFrame = CallerContext.back(); - CallerContext.back().Location = LineLocation(0, 0); - return CallerFrame; -} - -void CSProfileGenerator::populateInferredFunctionSamples() { - for (const auto &Item : ProfileMap) { - const auto &CalleeContext = Item.first; - const FunctionSamples &CalleeProfile = Item.second; - - // If we already have head sample counts, we must have value profile - // for call sites added already. Skip to avoid double counting. - if (CalleeProfile.getHeadSamples()) - continue; - // If we don't have context, nothing to do for caller's call site. - // This could happen for entry point function. - if (CalleeContext.isBaseContext()) - continue; - - // Infer Caller's frame loc and context ID through string splitting - SampleContextFrameVector CallerContextId; - SampleContextFrame &&CallerLeafFrameLoc = - getCallerContext(CalleeContext.getContextFrames(), CallerContextId); - SampleContextFrames CallerContext(CallerContextId); - - // It's possible that we haven't seen any sample directly in the caller, - // in which case CallerProfile will not exist. But we can't modify - // ProfileMap while iterating it. - // TODO: created function profile for those callers too - if (ProfileMap.find(CallerContext) == ProfileMap.end()) - continue; - FunctionSamples &CallerProfile = ProfileMap[CallerContext]; - - // Since we don't have call count for inlined functions, we - // estimate it from inlinee's profile using entry body sample. - uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples(); - // If we don't have samples with location, use 1 to indicate live. - if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size()) - EstimatedCallCount = 1; - CallerProfile.addCalledTargetSamples( - CallerLeafFrameLoc.Location.LineOffset, - CallerLeafFrameLoc.Location.Discriminator, - CalleeProfile.getContext().getName(), EstimatedCallCount); - CallerProfile.addBodySamples(CallerLeafFrameLoc.Location.LineOffset, - CallerLeafFrameLoc.Location.Discriminator, - EstimatedCallCount); - CallerProfile.addTotalSamples(EstimatedCallCount); - } -} - -void CSProfileGenerator::postProcessProfiles() { - // Compute hot/cold threshold based on profile. This will be used for cold - // context profile merging/trimming. - computeSummaryAndThreshold(); - - // Run global pre-inliner to adjust/merge context profile based on estimated - // inline decisions. - if (EnableCSPreInliner) { - CSPreInliner(ProfileMap, *Binary, HotCountThreshold, ColdCountThreshold) - .run(); - // Turn off the profile merger by default unless it is explicitly enabled. - if (!CSProfMergeColdContext.getNumOccurrences()) - CSProfMergeColdContext = false; - } - - // Trim and merge cold context profile using cold threshold above. - if (TrimColdProfile || CSProfMergeColdContext) { - SampleContextTrimmer(ProfileMap) - .trimAndMergeColdContextProfiles( - HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, - CSProfMaxColdContextDepth, EnableCSPreInliner); - } - - // Merge function samples of CS profile to calculate profile density. - sampleprof::SampleProfileMap ContextLessProfiles; - for (const auto &I : ProfileMap) { - ContextLessProfiles[I.second.getName()].merge(I.second); - } - - calculateAndShowDensity(ContextLessProfiles); - if (GenCSNestedProfile) { - CSProfileConverter CSConverter(ProfileMap); - CSConverter.convertProfiles(); - FunctionSamples::ProfileIsCSFlat = false; - FunctionSamples::ProfileIsCSNested = EnableCSPreInliner; - } -} - -void ProfileGeneratorBase::computeSummaryAndThreshold() { - SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - auto Summary = Builder.computeSummaryForProfiles(ProfileMap); - HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( - (Summary->getDetailedSummary())); - ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( - (Summary->getDetailedSummary())); -} - -// Helper function to extract context prefix string stack -// Extract context stack for reusing, leaf context stack will -// be added compressed while looking up function profile -static void extractPrefixContextStack( - SampleContextFrameVector &ContextStack, - const SmallVectorImpl &Probes, - ProfiledBinary *Binary) { - for (const auto *P : Probes) { - Binary->getInlineContextForProbe(P, ContextStack, true); - } -} - -void CSProfileGenerator::generateProbeBasedProfile() { - for (const auto &CI : SampleCounters) { - const ProbeBasedCtxKey *CtxKey = - dyn_cast(CI.first.getPtr()); - SampleContextFrameVector ContextStack; - extractPrefixContextStack(ContextStack, CtxKey->Probes, Binary); - // Fill in function body samples from probes, also infer caller's samples - // from callee's probe - populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); - // Fill in boundary samples for a call probe - populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); - } -} - -void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, - ProbeCounterMap &ProbeCounter) { - RangeSample Ranges; - findDisjointRanges(Ranges, RangeCounter); - for (const auto &Range : Ranges) { - uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first); - uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second); - uint64_t Count = Range.second; - // Disjoint ranges have introduce zero-filled gap that - // doesn't belong to current context, filter them out. - if (Count == 0) - continue; - - InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, - // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range - // can be Addr1+1 to Addr2-1. We should ignore such range. - if (IP.Address > RangeEnd) - continue; - - do { - const AddressProbesMap &Address2ProbesMap = - Binary->getAddress2ProbesMap(); - auto It = Address2ProbesMap.find(IP.Address); - if (It != Address2ProbesMap.end()) { - for (const auto &Probe : It->second) { - if (!Probe.isBlock()) - continue; - ProbeCounter[&Probe] += Count; - } - } - } while (IP.advance() && IP.Address <= RangeEnd); - } -} - -void CSProfileGenerator::populateBodySamplesWithProbes( - const RangeSample &RangeCounter, SampleContextFrames ContextStack) { - ProbeCounterMap ProbeCounter; - // Extract the top frame probes by looking up each address among the range in - // the Address2ProbeMap - extractProbesFromRange(RangeCounter, ProbeCounter); - std::unordered_map> - FrameSamples; - for (const auto &PI : ProbeCounter) { - const MCDecodedPseudoProbe *Probe = PI.first; - uint64_t Count = PI.second; - FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStack, Probe); - // Record the current frame and FunctionProfile whenever samples are - // collected for non-danglie probes. This is for reporting all of the - // zero count probes of the frame later. - FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); - FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); - FunctionProfile.addTotalSamples(Count); - if (Probe->isEntry()) { - FunctionProfile.addHeadSamples(Count); - // Look up for the caller's function profile - const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); - SampleContextFrames CalleeContextId = - FunctionProfile.getContext().getContextFrames(); - if (InlinerDesc != nullptr && CalleeContextId.size() > 1) { - // Since the context id will be compressed, we have to use callee's - // context id to infer caller's context id to ensure they share the - // same context prefix. - SampleContextFrameVector CallerContextId; - SampleContextFrame &&CallerLeafFrameLoc = - getCallerContext(CalleeContextId, CallerContextId); - uint64_t CallerIndex = CallerLeafFrameLoc.Location.LineOffset; - assert(CallerIndex && - "Inferred caller's location index shouldn't be zero!"); - FunctionSamples &CallerProfile = - getFunctionProfileForContext(CallerContextId); - CallerProfile.setFunctionHash(InlinerDesc->FuncHash); - CallerProfile.addBodySamples(CallerIndex, 0, Count); - CallerProfile.addTotalSamples(Count); - CallerProfile.addCalledTargetSamples( - CallerIndex, 0, FunctionProfile.getContext().getName(), Count); - } - } - } - - // Assign zero count for remaining probes without sample hits to - // differentiate from probes optimized away, of which the counts are unknown - // and will be inferred by the compiler. - for (auto &I : FrameSamples) { - for (auto *FunctionProfile : I.second) { - for (auto *Probe : I.first->getProbes()) { - FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); - } - } - } -} - -void CSProfileGenerator::populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, SampleContextFrames ContextStack) { - for (const auto &BI : BranchCounter) { - uint64_t SourceOffset = BI.first.first; - uint64_t TargetOffset = BI.first.second; - uint64_t Count = BI.second; - uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset); - const MCDecodedPseudoProbe *CallProbe = - Binary->getCallProbeForAddr(SourceAddress); - if (CallProbe == nullptr) - continue; - FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStack, CallProbe); - FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); - FunctionProfile.addTotalSamples(Count); - StringRef CalleeName = getCalleeNameForOffset(TargetOffset); - if (CalleeName.size() == 0) - continue; - FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, - Count); - } -} - -FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( - SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { - - // Explicitly copy the context for appending the leaf context - SampleContextFrameVector NewContextStack(ContextStack.begin(), - ContextStack.end()); - Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); - // For leaf inlined context with the top frame, we should strip off the top - // frame's probe id, like: - // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" - auto LeafFrame = NewContextStack.back(); - LeafFrame.Location = LineLocation(0, 0); - NewContextStack.pop_back(); - // Compress the context string except for the leaf frame - CSProfileGenerator::compressRecursionContext(NewContextStack); - CSProfileGenerator::trimContext(NewContextStack); - NewContextStack.push_back(LeafFrame); - - const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); - bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); - FunctionSamples &FunctionProile = - getFunctionProfileForContext(NewContextStack, WasLeafInlined); - FunctionProile.setFunctionHash(FuncDesc->FuncHash); - return FunctionProile; -} - -} // end namespace sampleprof -} // end namespace llvm diff --git a/tools/ldc-profgen/ldc-profgen-14.0/ProfileGenerator.h b/tools/ldc-profgen/ldc-profgen-14.0/ProfileGenerator.h deleted file mode 100644 index af349ac9911..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/ProfileGenerator.h +++ /dev/null @@ -1,312 +0,0 @@ -//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H -#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H -#include "CSPreInliner.h" -#include "ErrorHandling.h" -#include "PerfReader.h" -#include "ProfiledBinary.h" -#include "llvm/ProfileData/SampleProfWriter.h" -#include -#include - -using namespace llvm; -using namespace sampleprof; - -namespace llvm { -namespace sampleprof { - -// This base class for profile generation of sample-based PGO. We reuse all -// structures relating to function profiles and profile writers as seen in -// /ProfileData/SampleProf.h. -class ProfileGeneratorBase { - -public: - ProfileGeneratorBase(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : Binary(Binary), SampleCounters(Counters){}; - virtual ~ProfileGeneratorBase() = default; - static std::unique_ptr - create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters, - bool ProfileIsCSFlat); - virtual void generateProfile() = 0; - void write(); - - static uint32_t - getDuplicationFactor(unsigned Discriminator, - bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) { - return UseFSD ? 1 - : llvm::DILocation::getDuplicationFactorFromDiscriminator( - Discriminator); - } - - static uint32_t - getBaseDiscriminator(unsigned Discriminator, - bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) { - return UseFSD ? Discriminator - : DILocation::getBaseDiscriminatorFromDiscriminator( - Discriminator, /* IsFSDiscriminator */ false); - } - - static bool UseFSDiscriminator; - -protected: - // Use SampleProfileWriter to serialize profile map - void write(std::unique_ptr Writer, - SampleProfileMap &ProfileMap); - /* - For each region boundary point, mark if it is begin or end (or both) of - the region. Boundary points are inclusive. Log the sample count as well - so we can use it when we compute the sample count of each disjoint region - later. Note that there might be multiple ranges with different sample - count that share same begin/end point. We need to accumulate the sample - count for the boundary point for such case, because for the example - below, - - |<--100-->| - |<------200------>| - A B C - - sample count for disjoint region [A,B] would be 300. - */ - void findDisjointRanges(RangeSample &DisjointRanges, - const RangeSample &Ranges); - // Helper function for updating body sample for a leaf location in - // FunctionProfile - void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile, - const SampleContextFrame &LeafLoc, - uint64_t Count); - void updateTotalSamples(); - - StringRef getCalleeNameForOffset(uint64_t TargetOffset); - - void computeSummaryAndThreshold(); - - void calculateAndShowDensity(const SampleProfileMap &Profiles); - - double calculateDensity(const SampleProfileMap &Profiles, - uint64_t HotCntThreshold); - - void showDensitySuggestion(double Density); - - // Thresholds from profile summary to answer isHotCount/isColdCount queries. - uint64_t HotCountThreshold; - - uint64_t ColdCountThreshold; - - // Used by SampleProfileWriter - SampleProfileMap ProfileMap; - - ProfiledBinary *Binary = nullptr; - - const ContextSampleCounterMap &SampleCounters; -}; - -class ProfileGenerator : public ProfileGeneratorBase { - -public: - ProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : ProfileGeneratorBase(Binary, Counters){}; - void generateProfile() override; - -private: - void generateLineNumBasedProfile(); - RangeSample preprocessRangeCounter(const RangeSample &RangeCounter); - FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName); - // Helper function to get the leaf frame's FunctionProfile by traversing the - // inline stack and meanwhile it adds the total samples for each frame's - // function profile. - FunctionSamples & - getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec, - uint64_t Count); - void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter); - void - populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters); - void postProcessProfiles(); - void trimColdProfiles(const SampleProfileMap &Profiles, - uint64_t ColdCntThreshold); -}; - -using ProbeCounterMap = - std::unordered_map; - -class CSProfileGenerator : public ProfileGeneratorBase { -public: - CSProfileGenerator(ProfiledBinary *Binary, - const ContextSampleCounterMap &Counters) - : ProfileGeneratorBase(Binary, Counters){}; - - void generateProfile() override; - - // Trim the context stack at a given depth. - template - static void trimContext(SmallVectorImpl &S, int Depth = MaxContextDepth) { - if (Depth < 0 || static_cast(Depth) >= S.size()) - return; - std::copy(S.begin() + S.size() - static_cast(Depth), S.end(), - S.begin()); - S.resize(Depth); - } - - // Remove adjacent repeated context sequences up to a given sequence length, - // -1 means no size limit. Note that repeated sequences are identified based - // on the exact call site, this is finer granularity than function recursion. - template - static void compressRecursionContext(SmallVectorImpl &Context, - int32_t CSize = MaxCompressionSize) { - uint32_t I = 1; - uint32_t HS = static_cast(Context.size() / 2); - uint32_t MaxDedupSize = - CSize == -1 ? HS : std::min(static_cast(CSize), HS); - auto BeginIter = Context.begin(); - // Use an in-place algorithm to save memory copy - // End indicates the end location of current iteration's data - uint32_t End = 0; - // Deduplicate from length 1 to the max possible size of a repeated - // sequence. - while (I <= MaxDedupSize) { - // This is a linear algorithm that deduplicates adjacent repeated - // sequences of size I. The deduplication detection runs on a sliding - // window whose size is 2*I and it keeps sliding the window to deduplicate - // the data inside. Once duplication is detected, deduplicate it by - // skipping the right half part of the window, otherwise just copy back - // the new one by appending them at the back of End pointer(for the next - // iteration). - // - // For example: - // Input: [a1, a2, b1, b2] - // (Added index to distinguish the same char, the origin is [a, a, b, - // b], the size of the dedup window is 2(I = 1) at the beginning) - // - // 1) The initial status is a dummy window[null, a1], then just copy the - // right half of the window(End = 0), then slide the window. - // Result: [a1], a2, b1, b2 (End points to the element right before ], - // after ] is the data of the previous iteration) - // - // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of - // the window i.e the duplication happen. Only slide the window. - // Result: [a1], a2, b1, b2 - // - // 3) Next window is [a2, b1], copy the right half of the window(b1 is - // new) to the End and slide the window. - // Result: [a1, b1], b1, b2 - // - // 4) Next window is [b1, b2], same to 2), skip b2. - // Result: [a1, b1], b1, b2 - // After resize, it will be [a, b] - - // Use pointers like below to do comparison inside the window - // [a b c a b c] - // | | | | | - // LeftBoundary Left Right Left+I Right+I - // A duplication found if Left < LeftBoundry. - - int32_t Right = I - 1; - End = I; - int32_t LeftBoundary = 0; - while (Right + I < Context.size()) { - // To avoids scanning a part of a sequence repeatedly, it finds out - // the common suffix of two hald in the window. The common suffix will - // serve as the common prefix of next possible pair of duplicate - // sequences. The non-common part will be ignored and never scanned - // again. - - // For example. - // Input: [a, b1], c1, b2, c2 - // I = 2 - // - // 1) For the window [a, b1, c1, b2], non-common-suffix for the right - // part is 'c1', copy it and only slide the window 1 step. - // Result: [a, b1, c1], b2, c2 - // - // 2) Next window is [b1, c1, b2, c2], so duplication happen. - // Result after resize: [a, b, c] - - int32_t Left = Right; - while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) { - // Find the longest suffix inside the window. When stops, Left points - // at the diverging point in the current sequence. - Left--; - } - - bool DuplicationFound = (Left < LeftBoundary); - // Don't need to recheck the data before Right - LeftBoundary = Right + 1; - if (DuplicationFound) { - // Duplication found, skip right half of the window. - Right += I; - } else { - // Copy the non-common-suffix part of the adjacent sequence. - std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1, - BeginIter + End); - End += Left + I - Right; - // Only slide the window by the size of non-common-suffix - Right = Left + I; - } - } - // Don't forget the remaining part that's not scanned. - std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End); - End += Context.size() - Right - 1; - I++; - Context.resize(End); - MaxDedupSize = std::min(static_cast(End / 2), MaxDedupSize); - } - } - -private: - void generateLineNumBasedProfile(); - // Lookup or create FunctionSamples for the context - FunctionSamples & - getFunctionProfileForContext(const SampleContextFrameVector &Context, - bool WasLeafInlined = false); - // For profiled only functions, on-demand compute their inline context - // function byte size which is used by the pre-inliner. - void computeSizeForProfiledFunctions(); - // Post processing for profiles before writing out, such as mermining - // and trimming cold profiles, running preinliner on profiles. - void postProcessProfiles(); - - void populateBodySamplesForFunction(FunctionSamples &FunctionProfile, - const RangeSample &RangeCounters); - void populateBoundarySamplesForFunction(SampleContextFrames ContextId, - FunctionSamples &FunctionProfile, - const BranchSample &BranchCounters); - void populateInferredFunctionSamples(); - - void generateProbeBasedProfile(); - // Go through each address from range to extract the top frame probe by - // looking up in the Address2ProbeMap - void extractProbesFromRange(const RangeSample &RangeCounter, - ProbeCounterMap &ProbeCounter); - // Fill in function body samples from probes - void populateBodySamplesWithProbes(const RangeSample &RangeCounter, - SampleContextFrames ContextStack); - // Fill in boundary samples for a call probe - void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, - SampleContextFrames ContextStack); - // Helper function to get FunctionSamples for the leaf probe - FunctionSamples & - getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, - const MCDecodedPseudoProbe *LeafProbe); - - // Underlying context table serves for sample profile writer. - std::unordered_set Contexts; - -public: - // Deduplicate adjacent repeated context sequences up to a given sequence - // length. -1 means no size limit. - static int32_t MaxCompressionSize; - static int MaxContextDepth; -}; - -} // end namespace sampleprof -} // end namespace llvm - -#endif diff --git a/tools/ldc-profgen/ldc-profgen-14.0/ProfiledBinary.cpp b/tools/ldc-profgen/ldc-profgen-14.0/ProfiledBinary.cpp deleted file mode 100644 index a773a3c98d4..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/ProfiledBinary.cpp +++ /dev/null @@ -1,790 +0,0 @@ -//===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "ProfiledBinary.h" -#include "ErrorHandling.h" -#include "ProfileGenerator.h" -#include "llvm/ADT/Triple.h" -#include "llvm/Demangle/Demangle.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/TargetSelect.h" - -#define DEBUG_TYPE "load-binary" - -using namespace llvm; -using namespace sampleprof; - -cl::opt ShowDisassemblyOnly("show-disassembly-only", cl::init(false), - cl::ZeroOrMore, - cl::desc("Print disassembled code.")); - -cl::opt ShowSourceLocations("show-source-locations", cl::init(false), - cl::ZeroOrMore, - cl::desc("Print source locations.")); - -static cl::opt - ShowCanonicalFnName("show-canonical-fname", cl::init(false), cl::ZeroOrMore, - cl::desc("Print canonical function name.")); - -static cl::opt ShowPseudoProbe( - "show-pseudo-probe", cl::init(false), cl::ZeroOrMore, - cl::desc("Print pseudo probe section and disassembled info.")); - -static cl::opt UseDwarfCorrelation( - "use-dwarf-correlation", cl::init(false), cl::ZeroOrMore, - cl::desc("Use dwarf for profile correlation even when binary contains " - "pseudo probe.")); - -static cl::list DisassembleFunctions( - "disassemble-functions", cl::CommaSeparated, - cl::desc("List of functions to print disassembly for. Accept demangled " - "names only. Only work with show-disassembly-only")); - -extern cl::opt ShowDetailedWarning; - -namespace llvm { -namespace sampleprof { - -static const Target *getTarget(const ObjectFile *Obj) { - Triple TheTriple = Obj->makeTriple(); - std::string Error; - std::string ArchName; - const Target *TheTarget = - TargetRegistry::lookupTarget(ArchName, TheTriple, Error); - if (!TheTarget) - exitWithError(Error, Obj->getFileName()); - return TheTarget; -} - -void BinarySizeContextTracker::addInstructionForContext( - const SampleContextFrameVector &Context, uint32_t InstrSize) { - ContextTrieNode *CurNode = &RootContext; - bool IsLeaf = true; - for (const auto &Callsite : reverse(Context)) { - StringRef CallerName = Callsite.FuncName; - LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location; - CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName); - IsLeaf = false; - } - - CurNode->addFunctionSize(InstrSize); -} - -uint32_t -BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) { - ContextTrieNode *CurrNode = &RootContext; - ContextTrieNode *PrevNode = nullptr; - SampleContextFrames Frames = Context.getContextFrames(); - int32_t I = Frames.size() - 1; - Optional Size; - - // Start from top-level context-less function, traverse down the reverse - // context trie to find the best/longest match for given context, then - // retrieve the size. - - while (CurrNode && I >= 0) { - // Process from leaf function to callers (added to context). - const auto &ChildFrame = Frames[I--]; - PrevNode = CurrNode; - CurrNode = - CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName); - if (CurrNode && CurrNode->getFunctionSize().hasValue()) - Size = CurrNode->getFunctionSize().getValue(); - } - - // If we traversed all nodes along the path of the context and haven't - // found a size yet, pivot to look for size from sibling nodes, i.e size - // of inlinee under different context. - if (!Size.hasValue()) { - if (!CurrNode) - CurrNode = PrevNode; - while (!Size.hasValue() && CurrNode && - !CurrNode->getAllChildContext().empty()) { - CurrNode = &CurrNode->getAllChildContext().begin()->second; - if (CurrNode->getFunctionSize().hasValue()) - Size = CurrNode->getFunctionSize().getValue(); - } - } - - assert(Size.hasValue() && "We should at least find one context size."); - return Size.getValue(); -} - -void BinarySizeContextTracker::trackInlineesOptimizedAway( - MCPseudoProbeDecoder &ProbeDecoder) { - ProbeFrameStack ProbeContext; - for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) - trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext); -} - -void BinarySizeContextTracker::trackInlineesOptimizedAway( - MCPseudoProbeDecoder &ProbeDecoder, - MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) { - StringRef FuncName = - ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName; - ProbeContext.emplace_back(FuncName, 0); - - // This ProbeContext has a probe, so it has code before inlining and - // optimization. Make sure we mark its size as known. - if (!ProbeNode.getProbes().empty()) { - ContextTrieNode *SizeContext = &RootContext; - for (auto &ProbeFrame : reverse(ProbeContext)) { - StringRef CallerName = ProbeFrame.first; - LineLocation CallsiteLoc(ProbeFrame.second, 0); - SizeContext = - SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName); - } - // Add 0 size to make known. - SizeContext->addFunctionSize(0); - } - - // DFS down the probe inline tree - for (const auto &ChildNode : ProbeNode.getChildren()) { - InlineSite Location = ChildNode.first; - ProbeContext.back().second = std::get<1>(Location); - trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext); - } - - ProbeContext.pop_back(); -} - -void ProfiledBinary::warnNoFuncEntry() { - uint64_t NoFuncEntryNum = 0; - for (auto &F : BinaryFunctions) { - if (F.second.Ranges.empty()) - continue; - bool hasFuncEntry = false; - for (auto &R : F.second.Ranges) { - if (FuncRange *FR = findFuncRangeForStartOffset(R.first)) { - if (FR->IsFuncEntry) { - hasFuncEntry = true; - break; - } - } - } - - if (!hasFuncEntry) { - NoFuncEntryNum++; - if (ShowDetailedWarning) - WithColor::warning() - << "Failed to determine function entry for " << F.first - << " due to inconsistent name from symbol table and dwarf info.\n"; - } - } - emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(), - "of functions failed to determine function entry due to " - "inconsistent name from symbol table and dwarf info."); -} - -void ProfiledBinary::load() { - // Attempt to open the binary. - OwningBinary OBinary = unwrapOrError(createBinary(Path), Path); - Binary &ExeBinary = *OBinary.getBinary(); - - auto *Obj = dyn_cast(&ExeBinary); - if (!Obj) - exitWithError("not a valid Elf image", Path); - - TheTriple = Obj->makeTriple(); - // Current only support X86 - if (!TheTriple.isX86()) - exitWithError("unsupported target", TheTriple.getTriple()); - LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); - - // Find the preferred load address for text sections. - setPreferredTextSegmentAddresses(Obj); - - // Decode pseudo probe related section - decodePseudoProbe(Obj); - - // Load debug info of subprograms from DWARF section. - // If path of debug info binary is specified, use the debug info from it, - // otherwise use the debug info from the executable binary. - if (!DebugBinaryPath.empty()) { - OwningBinary DebugPath = - unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath); - loadSymbolsFromDWARF(*dyn_cast(DebugPath.getBinary())); - } else { - loadSymbolsFromDWARF(*dyn_cast(&ExeBinary)); - } - - // Disassemble the text sections. - disassemble(Obj); - - // Track size for optimized inlinees when probe is available - if (UsePseudoProbes && TrackFuncContextSize) - FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder); - - // Use function start and return address to infer prolog and epilog - ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap); - ProEpilogTracker.inferEpilogOffsets(RetOffsets); - - warnNoFuncEntry(); - - // TODO: decode other sections. -} - -bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) { - uint64_t Offset1 = virtualAddrToOffset(Address1); - uint64_t Offset2 = virtualAddrToOffset(Address2); - const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1); - const SampleContextFrameVector &Context2 = getFrameLocationStack(Offset2); - if (Context1.size() != Context2.size()) - return false; - if (Context1.empty()) - return false; - // The leaf frame contains location within the leaf, and it - // needs to be remove that as it's not part of the calling context - return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1, - Context2.begin(), Context2.begin() + Context2.size() - 1); -} - -SampleContextFrameVector -ProfiledBinary::getExpandedContext(const SmallVectorImpl &Stack, - bool &WasLeafInlined) { - SampleContextFrameVector ContextVec; - // Process from frame root to leaf - for (auto Address : Stack) { - uint64_t Offset = virtualAddrToOffset(Address); - const SampleContextFrameVector &ExpandedContext = - getFrameLocationStack(Offset); - // An instruction without a valid debug line will be ignored by sample - // processing - if (ExpandedContext.empty()) - return SampleContextFrameVector(); - // Set WasLeafInlined to the size of inlined frame count for the last - // address which is leaf - WasLeafInlined = (ExpandedContext.size() > 1); - ContextVec.append(ExpandedContext); - } - - // Replace with decoded base discriminator - for (auto &Frame : ContextVec) { - Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator( - Frame.Location.Discriminator, UseFSDiscriminator); - } - - assert(ContextVec.size() && "Context length should be at least 1"); - - // Compress the context string except for the leaf frame - auto LeafFrame = ContextVec.back(); - LeafFrame.Location = LineLocation(0, 0); - ContextVec.pop_back(); - CSProfileGenerator::compressRecursionContext(ContextVec); - CSProfileGenerator::trimContext(ContextVec); - ContextVec.push_back(LeafFrame); - return ContextVec; -} - -template -void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj, StringRef FileName) { - const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName); - // FIXME: This should be the page size of the system running profiling. - // However such info isn't available at post-processing time, assuming - // 4K page now. Note that we don't use EXEC_PAGESIZE from - // because we may build the tools on non-linux. - uint32_t PageSize = 0x1000; - for (const typename ELFT::Phdr &Phdr : PhdrRange) { - if (Phdr.p_type == ELF::PT_LOAD) { - if (!FirstLoadableAddress) - FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U); - if (Phdr.p_flags & ELF::PF_X) { - // Segments will always be loaded at a page boundary. - PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr & - ~(PageSize - 1U)); - TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U)); - } - } - } - - if (PreferredTextSegmentAddresses.empty()) - exitWithError("no executable segment found", FileName); -} - -void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) { - if (const auto *ELFObj = dyn_cast(Obj)) - setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); - else if (const auto *ELFObj = dyn_cast(Obj)) - setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); - else if (const auto *ELFObj = dyn_cast(Obj)) - setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); - else if (const auto *ELFObj = cast(Obj)) - setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName()); - else - llvm_unreachable("invalid ELF object format"); -} - -void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) { - if (UseDwarfCorrelation) - return; - - StringRef FileName = Obj->getFileName(); - for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); - SI != SE; ++SI) { - const SectionRef &Section = *SI; - StringRef SectionName = unwrapOrError(Section.getName(), FileName); - - if (SectionName == ".pseudo_probe_desc") { - StringRef Contents = unwrapOrError(Section.getContents(), FileName); - if (!ProbeDecoder.buildGUID2FuncDescMap( - reinterpret_cast(Contents.data()), - Contents.size())) - exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section"); - } else if (SectionName == ".pseudo_probe") { - StringRef Contents = unwrapOrError(Section.getContents(), FileName); - if (!ProbeDecoder.buildAddress2ProbeMap( - reinterpret_cast(Contents.data()), - Contents.size())) - exitWithError("Pseudo Probe decoder fail in .pseudo_probe section"); - // set UsePseudoProbes flag, used for PerfReader - UsePseudoProbes = true; - } - } - - if (ShowPseudoProbe) - ProbeDecoder.printGUID2FuncDescMap(outs()); -} - -void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) { - // Note that the start offset of each ELF section can be a non-function - // symbol, we need to binary search for the start of a real function range. - auto *FuncRange = findFuncRangeForOffset(Offset); - // Skip external function symbol. - if (!FuncRange) - return; - - // Set IsFuncEntry to ture if there is only one range in the function or the - // RangeSymName from ELF is equal to its DWARF-based function name. - if (FuncRange->Func->Ranges.size() == 1 || - (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName)) - FuncRange->IsFuncEntry = true; -} - -bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, - SectionSymbolsTy &Symbols, - const SectionRef &Section) { - std::size_t SE = Symbols.size(); - uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress(); - uint64_t SectSize = Section.getSize(); - uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress(); - uint64_t NextStartOffset = - (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress() - : SectionOffset + SectSize; - setIsFuncEntry(StartOffset, - FunctionSamples::getCanonicalFnName(Symbols[SI].Name)); - - StringRef SymbolName = - ShowCanonicalFnName - ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name) - : Symbols[SI].Name; - bool ShowDisassembly = - ShowDisassemblyOnly && (DisassembleFunctionSet.empty() || - DisassembleFunctionSet.count(SymbolName)); - if (ShowDisassembly) - outs() << '<' << SymbolName << ">:\n"; - - auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) { - WithColor::warning() << "Invalid instructions at " - << format("%8" PRIx64, Start) << " - " - << format("%8" PRIx64, End) << "\n"; - }; - - uint64_t Offset = StartOffset; - // Size of a consecutive invalid instruction range starting from Offset -1 - // backwards. - uint64_t InvalidInstLength = 0; - while (Offset < NextStartOffset) { - MCInst Inst; - uint64_t Size; - // Disassemble an instruction. - bool Disassembled = - DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset), - Offset + getPreferredBaseAddress(), nulls()); - if (Size == 0) - Size = 1; - - if (ShowDisassembly) { - if (ShowPseudoProbe) { - ProbeDecoder.printProbeForAddress(outs(), - Offset + getPreferredBaseAddress()); - } - outs() << format("%8" PRIx64 ":", Offset + getPreferredBaseAddress()); - size_t Start = outs().tell(); - if (Disassembled) - IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs()); - else - outs() << "\t"; - if (ShowSourceLocations) { - unsigned Cur = outs().tell() - Start; - if (Cur < 40) - outs().indent(40 - Cur); - InstructionPointer IP(this, Offset); - outs() << getReversedLocWithContext( - symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe)); - } - outs() << "\n"; - } - - if (Disassembled) { - const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode()); - - // Record instruction size. - Offset2InstSizeMap[Offset] = Size; - - // Populate address maps. - CodeAddrOffsets.push_back(Offset); - if (MCDesc.isCall()) - CallOffsets.insert(Offset); - else if (MCDesc.isReturn()) - RetOffsets.insert(Offset); - else if (MCDesc.isBranch()) - BranchOffsets.insert(Offset); - - if (InvalidInstLength) { - WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1); - InvalidInstLength = 0; - } - } else { - InvalidInstLength += Size; - } - - Offset += Size; - } - - if (InvalidInstLength) - WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1); - - if (ShowDisassembly) - outs() << "\n"; - - return true; -} - -void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) { - const Target *TheTarget = getTarget(Obj); - std::string TripleName = TheTriple.getTriple(); - StringRef FileName = Obj->getFileName(); - - MRI.reset(TheTarget->createMCRegInfo(TripleName)); - if (!MRI) - exitWithError("no register info for target " + TripleName, FileName); - - MCTargetOptions MCOptions; - AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); - if (!AsmInfo) - exitWithError("no assembly info for target " + TripleName, FileName); - - SubtargetFeatures Features = Obj->getFeatures(); - STI.reset( - TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString())); - if (!STI) - exitWithError("no subtarget info for target " + TripleName, FileName); - - MII.reset(TheTarget->createMCInstrInfo()); - if (!MII) - exitWithError("no instruction info for target " + TripleName, FileName); - - MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get()); - std::unique_ptr MOFI( - TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); - Ctx.setObjectFileInfo(MOFI.get()); - DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx)); - if (!DisAsm) - exitWithError("no disassembler for target " + TripleName, FileName); - - MIA.reset(TheTarget->createMCInstrAnalysis(MII.get())); - - int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); - IPrinter.reset(TheTarget->createMCInstPrinter( - Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); - IPrinter->setPrintBranchImmAsAddress(true); -} - -void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) { - // Set up disassembler and related components. - setUpDisassembler(Obj); - - // Create a mapping from virtual address to symbol name. The symbols in text - // sections are the candidates to dissassemble. - std::map AllSymbols; - StringRef FileName = Obj->getFileName(); - for (const SymbolRef &Symbol : Obj->symbols()) { - const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); - const StringRef Name = unwrapOrError(Symbol.getName(), FileName); - section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); - if (SecI != Obj->section_end()) - AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE)); - } - - // Sort all the symbols. Use a stable sort to stabilize the output. - for (std::pair &SecSyms : AllSymbols) - stable_sort(SecSyms.second); - - DisassembleFunctionSet.insert(DisassembleFunctions.begin(), - DisassembleFunctions.end()); - assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) && - "Functions to disassemble should be only specified together with " - "--show-disassembly-only"); - - if (ShowDisassemblyOnly) - outs() << "\nDisassembly of " << FileName << ":\n"; - - // Dissassemble a text section. - for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); - SI != SE; ++SI) { - const SectionRef &Section = *SI; - if (!Section.isText()) - continue; - - uint64_t ImageLoadAddr = getPreferredBaseAddress(); - uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr; - uint64_t SectSize = Section.getSize(); - if (!SectSize) - continue; - - // Register the text section. - TextSections.insert({SectionOffset, SectSize}); - - StringRef SectionName = unwrapOrError(Section.getName(), FileName); - - if (ShowDisassemblyOnly) { - outs() << "\nDisassembly of section " << SectionName; - outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", " - << format("0x%" PRIx64, Section.getAddress() + SectSize) - << "]:\n\n"; - } - - if (SectionName == ".plt") - continue; - - // Get the section data. - ArrayRef Bytes = - arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName)); - - // Get the list of all the symbols in this section. - SectionSymbolsTy &Symbols = AllSymbols[Section]; - - // Disassemble symbol by symbol. - for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) { - if (!dissassembleSymbol(SI, Bytes, Symbols, Section)) - exitWithError("disassembling error", FileName); - } - } - - // Dissassemble rodata section to check if FS discriminator symbol exists. - checkUseFSDiscriminator(Obj, AllSymbols); -} - -void ProfiledBinary::checkUseFSDiscriminator( - const ELFObjectFileBase *Obj, - std::map &AllSymbols) { - const char *FSDiscriminatorVar = "__llvm_fs_discriminator__"; - for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end(); - SI != SE; ++SI) { - const SectionRef &Section = *SI; - if (!Section.isData() || Section.getSize() == 0) - continue; - SectionSymbolsTy &Symbols = AllSymbols[Section]; - - for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) { - if (Symbols[SI].Name == FSDiscriminatorVar) { - UseFSDiscriminator = true; - return; - } - } - } -} - -void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) { - auto DebugContext = llvm::DWARFContext::create(Obj); - if (!DebugContext) - exitWithError("Misssing debug info.", Path); - - for (const auto &CompilationUnit : DebugContext->compile_units()) { - for (const auto &DieInfo : CompilationUnit->dies()) { - llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo); - - if (!Die.isSubprogramDIE()) - continue; - auto Name = Die.getName(llvm::DINameKind::LinkageName); - if (!Name) - Name = Die.getName(llvm::DINameKind::ShortName); - if (!Name) - continue; - - auto RangesOrError = Die.getAddressRanges(); - if (!RangesOrError) - continue; - const DWARFAddressRangesVector &Ranges = RangesOrError.get(); - - if (Ranges.empty()) - continue; - - // Different DWARF symbols can have same function name, search or create - // BinaryFunction indexed by the name. - auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); - auto &Func = Ret.first->second; - if (Ret.second) - Func.FuncName = Ret.first->first; - - for (const auto &Range : Ranges) { - uint64_t FuncStart = Range.LowPC; - uint64_t FuncSize = Range.HighPC - FuncStart; - - if (FuncSize == 0 || FuncStart < getPreferredBaseAddress()) - continue; - - uint64_t StartOffset = FuncStart - getPreferredBaseAddress(); - uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress(); - - // We may want to know all ranges for one function. Here group the - // ranges and store them into BinaryFunction. - Func.Ranges.emplace_back(StartOffset, EndOffset); - - auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange()); - if (R.second) { - FuncRange &FRange = R.first->second; - FRange.Func = &Func; - FRange.StartOffset = StartOffset; - FRange.EndOffset = EndOffset; - } else { - WithColor::warning() - << "Duplicated symbol start address at " - << format("%8" PRIx64, StartOffset + getPreferredBaseAddress()) - << " " << R.first->second.getFuncName() << " and " << Name - << "\n"; - } - } - } - } - assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info."); -} - -void ProfiledBinary::populateSymbolListFromDWARF( - ProfileSymbolList &SymbolList) { - for (auto &I : StartOffset2FuncRangeMap) - SymbolList.add(I.second.getFuncName()); -} - -void ProfiledBinary::setupSymbolizer() { - symbolize::LLVMSymbolizer::Options SymbolizerOpts; - SymbolizerOpts.PrintFunctions = - DILineInfoSpecifier::FunctionNameKind::LinkageName; - SymbolizerOpts.Demangle = false; - SymbolizerOpts.DefaultArch = TheTriple.getArchName().str(); - SymbolizerOpts.UseSymbolTable = false; - SymbolizerOpts.RelativeAddresses = false; - Symbolizer = std::make_unique(SymbolizerOpts); -} - -SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, - bool UseCanonicalFnName, - bool UseProbeDiscriminator) { - assert(this == IP.Binary && - "Binary should only symbolize its own instruction"); - auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(), - object::SectionedAddress::UndefSection}; - DIInliningInfo InlineStack = unwrapOrError( - Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr), - SymbolizerPath); - - SampleContextFrameVector CallStack; - for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) { - const auto &CallerFrame = InlineStack.getFrame(I); - if (CallerFrame.FunctionName == "") - break; - - StringRef FunctionName(CallerFrame.FunctionName); - if (UseCanonicalFnName) - FunctionName = FunctionSamples::getCanonicalFnName(FunctionName); - - uint32_t Discriminator = CallerFrame.Discriminator; - uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff; - if (UseProbeDiscriminator) { - LineOffset = - PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); - Discriminator = 0; - } - - LineLocation Line(LineOffset, Discriminator); - auto It = NameStrings.insert(FunctionName.str()); - CallStack.emplace_back(*It.first, Line); - } - - return CallStack; -} - -void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, - uint64_t EndOffset) { - uint64_t RangeBegin = offsetToVirtualAddr(StartOffset); - uint64_t RangeEnd = offsetToVirtualAddr(EndOffset); - InstructionPointer IP(this, RangeBegin, true); - - if (IP.Address != RangeBegin) - WithColor::warning() << "Invalid start instruction at " - << format("%8" PRIx64, RangeBegin) << "\n"; - - if (IP.Address >= RangeEnd) - return; - - do { - uint64_t Offset = virtualAddrToOffset(IP.Address); - const SampleContextFrameVector &SymbolizedCallStack = - getFrameLocationStack(Offset, UsePseudoProbes); - uint64_t Size = Offset2InstSizeMap[Offset]; - - // Record instruction size for the corresponding context - FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); - - } while (IP.advance() && IP.Address < RangeEnd); -} - -InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, - uint64_t Address, bool RoundToNext) - : Binary(Binary), Address(Address) { - Index = Binary->getIndexForAddr(Address); - if (RoundToNext) { - // we might get address which is not the code - // it should round to the next valid address - if (Index >= Binary->getCodeOffsetsSize()) - this->Address = UINT64_MAX; - else - this->Address = Binary->getAddressforIndex(Index); - } -} - -bool InstructionPointer::advance() { - Index++; - if (Index >= Binary->getCodeOffsetsSize()) { - Address = UINT64_MAX; - return false; - } - Address = Binary->getAddressforIndex(Index); - return true; -} - -bool InstructionPointer::backward() { - if (Index == 0) { - Address = 0; - return false; - } - Index--; - Address = Binary->getAddressforIndex(Index); - return true; -} - -void InstructionPointer::update(uint64_t Addr) { - Address = Addr; - Index = Binary->getIndexForAddr(Address); -} - -} // end namespace sampleprof -} // end namespace llvm diff --git a/tools/ldc-profgen/ldc-profgen-14.0/ProfiledBinary.h b/tools/ldc-profgen/ldc-profgen-14.0/ProfiledBinary.h deleted file mode 100644 index d3d1c6f1fd2..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/ProfiledBinary.h +++ /dev/null @@ -1,541 +0,0 @@ -//===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H -#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H - -#include "CallContext.h" -#include "ErrorHandling.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/Symbolize/Symbolize.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCPseudoProbe.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetOptions.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/ProfileData/SampleProf.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Path.h" -#include "llvm/Transforms/IPO/SampleContextTracker.h" -#include -#include -#include -#include -#include -#include -#include -#include - -extern cl::opt EnableCSPreInliner; -extern cl::opt UseContextCostForPreInliner; - -using namespace llvm; -using namespace sampleprof; -using namespace llvm::object; - -namespace llvm { -namespace sampleprof { - -class ProfiledBinary; - -struct InstructionPointer { - const ProfiledBinary *Binary; - union { - // Offset of the executable segment of the binary. - uint64_t Offset = 0; - // Also used as address in unwinder - uint64_t Address; - }; - // Index to the sorted code address array of the binary. - uint64_t Index = 0; - InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, - bool RoundToNext = false); - bool advance(); - bool backward(); - void update(uint64_t Addr); -}; - -// The special frame addresses. -enum SpecialFrameAddr { - // Dummy root of frame trie. - DummyRoot = 0, - // Represent all the addresses outside of current binary. - // This's also used to indicate the call stack should be truncated since this - // isn't a real call context the compiler will see. - ExternalAddr = 1, -}; - -using RangesTy = std::vector>; - -struct BinaryFunction { - StringRef FuncName; - // End of range is an exclusive bound. - RangesTy Ranges; - - uint64_t getFuncSize() { - uint64_t Sum = 0; - for (auto &R : Ranges) { - Sum += R.second - R.first; - } - return Sum; - } -}; - -// Info about function range. A function can be split into multiple -// non-continuous ranges, each range corresponds to one FuncRange. -struct FuncRange { - uint64_t StartOffset; - // EndOffset is an exclusive bound. - uint64_t EndOffset; - // Function the range belongs to - BinaryFunction *Func; - // Whether the start offset is the real entry of the function. - bool IsFuncEntry = false; - - StringRef getFuncName() { return Func->FuncName; } -}; - -// PrologEpilog offset tracker, used to filter out broken stack samples -// Currently we use a heuristic size (two) to infer prolog and epilog -// based on the start address and return address. In the future, -// we will switch to Dwarf CFI based tracker -struct PrologEpilogTracker { - // A set of prolog and epilog offsets. Used by virtual unwinding. - std::unordered_set PrologEpilogSet; - ProfiledBinary *Binary; - PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){}; - - // Take the two addresses from the start of function as prolog - void inferPrologOffsets(std::map &FuncStartOffsetMap) { - for (auto I : FuncStartOffsetMap) { - PrologEpilogSet.insert(I.first); - InstructionPointer IP(Binary, I.first); - if (!IP.advance()) - break; - PrologEpilogSet.insert(IP.Offset); - } - } - - // Take the last two addresses before the return address as epilog - void inferEpilogOffsets(std::unordered_set &RetAddrs) { - for (auto Addr : RetAddrs) { - PrologEpilogSet.insert(Addr); - InstructionPointer IP(Binary, Addr); - if (!IP.backward()) - break; - PrologEpilogSet.insert(IP.Offset); - } - } -}; - -// Track function byte size under different context (outlined version as well as -// various inlined versions). It also provides query support to get function -// size with the best matching context, which is used to help pre-inliner use -// accurate post-optimization size to make decisions. -// TODO: If an inlinee is completely optimized away, ideally we should have zero -// for its context size, currently we would misss such context since it doesn't -// have instructions. To fix this, we need to mark all inlinee with entry probe -// but without instructions as having zero size. -class BinarySizeContextTracker { -public: - // Add instruction with given size to a context - void addInstructionForContext(const SampleContextFrameVector &Context, - uint32_t InstrSize); - - // Get function size with a specific context. When there's no exact match - // for the given context, try to retrieve the size of that function from - // closest matching context. - uint32_t getFuncSizeForContext(const SampleContext &Context); - - // For inlinees that are full optimized away, we can establish zero size using - // their remaining probes. - void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder); - - void dump() { RootContext.dumpTree(); } - -private: - using ProbeFrameStack = SmallVector>; - void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder, - MCDecodedPseudoProbeInlineTree &ProbeNode, - ProbeFrameStack &Context); - - // Root node for context trie tree, node that this is a reverse context trie - // with callee as parent and caller as child. This way we can traverse from - // root to find the best/longest matching context if an exact match does not - // exist. It gives us the best possible estimate for function's post-inline, - // post-optimization byte size. - ContextTrieNode RootContext; -}; - -using OffsetRange = std::pair; - -class ProfiledBinary { - // Absolute path of the executable binary. - std::string Path; - // Path of the debug info binary. - std::string DebugBinaryPath; - // Path of symbolizer path which should be pointed to binary with debug info. - StringRef SymbolizerPath; - // The target triple. - Triple TheTriple; - // The runtime base address that the first executable segment is loaded at. - uint64_t BaseAddress = 0; - // The runtime base address that the first loadabe segment is loaded at. - uint64_t FirstLoadableAddress = 0; - // The preferred load address of each executable segment. - std::vector PreferredTextSegmentAddresses; - // The file offset of each executable segment. - std::vector TextSegmentOffsets; - - // Mutiple MC component info - std::unique_ptr MRI; - std::unique_ptr AsmInfo; - std::unique_ptr STI; - std::unique_ptr MII; - std::unique_ptr DisAsm; - std::unique_ptr MIA; - std::unique_ptr IPrinter; - // A list of text sections sorted by start RVA and size. Used to check - // if a given RVA is a valid code address. - std::set> TextSections; - - // A map of mapping function name to BinaryFunction info. - std::unordered_map BinaryFunctions; - - // An ordered map of mapping function's start offset to function range - // relevant info. Currently to determine if the offset of ELF is the start of - // a real function, we leverage the function range info from DWARF. - std::map StartOffset2FuncRangeMap; - - // Offset to context location map. Used to expand the context. - std::unordered_map Offset2LocStackMap; - - // Offset to instruction size map. Also used for quick offset lookup. - std::unordered_map Offset2InstSizeMap; - - // An array of offsets of all instructions sorted in increasing order. The - // sorting is needed to fast advance to the next forward/backward instruction. - std::vector CodeAddrOffsets; - // A set of call instruction offsets. Used by virtual unwinding. - std::unordered_set CallOffsets; - // A set of return instruction offsets. Used by virtual unwinding. - std::unordered_set RetOffsets; - // A set of branch instruction offsets. - std::unordered_set BranchOffsets; - - // Estimate and track function prolog and epilog ranges. - PrologEpilogTracker ProEpilogTracker; - - // Track function sizes under different context - BinarySizeContextTracker FuncSizeTracker; - - // The symbolizer used to get inline context for an instruction. - std::unique_ptr Symbolizer; - - // String table owning function name strings created from the symbolizer. - std::unordered_set NameStrings; - - // A collection of functions to print disassembly for. - StringSet<> DisassembleFunctionSet; - - // Pseudo probe decoder - MCPseudoProbeDecoder ProbeDecoder; - - bool UsePseudoProbes = false; - - bool UseFSDiscriminator = false; - - // Whether we need to symbolize all instructions to get function context size. - bool TrackFuncContextSize = false; - - // Indicate if the base loading address is parsed from the mmap event or uses - // the preferred address - bool IsLoadedByMMap = false; - // Use to avoid redundant warning. - bool MissingMMapWarned = false; - - void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O); - - template - void setPreferredTextSegmentAddresses(const ELFFile &Obj, StringRef FileName); - - void decodePseudoProbe(const ELFObjectFileBase *Obj); - - void - checkUseFSDiscriminator(const ELFObjectFileBase *Obj, - std::map &AllSymbols); - - // Set up disassembler and related components. - void setUpDisassembler(const ELFObjectFileBase *Obj); - void setupSymbolizer(); - - // Load debug info of subprograms from DWARF section. - void loadSymbolsFromDWARF(ObjectFile &Obj); - - // A function may be spilt into multiple non-continuous address ranges. We use - // this to set whether start offset of a function is the real entry of the - // function and also set false to the non-function label. - void setIsFuncEntry(uint64_t Offset, StringRef RangeSymName); - - // Warn if no entry range exists in the function. - void warnNoFuncEntry(); - - /// Dissassemble the text section and build various address maps. - void disassemble(const ELFObjectFileBase *O); - - /// Helper function to dissassemble the symbol and extract info for unwinding - bool dissassembleSymbol(std::size_t SI, ArrayRef Bytes, - SectionSymbolsTy &Symbols, const SectionRef &Section); - /// Symbolize a given instruction pointer and return a full call context. - SampleContextFrameVector symbolize(const InstructionPointer &IP, - bool UseCanonicalFnName = false, - bool UseProbeDiscriminator = false); - /// Decode the interesting parts of the binary and build internal data - /// structures. On high level, the parts of interest are: - /// 1. Text sections, including the main code section and the PLT - /// entries that will be used to handle cross-module call transitions. - /// 2. The .debug_line section, used by Dwarf-based profile generation. - /// 3. Pseudo probe related sections, used by probe-based profile - /// generation. - void load(); - -public: - ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath) - : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this), - TrackFuncContextSize(EnableCSPreInliner && - UseContextCostForPreInliner) { - // Point to executable binary if debug info binary is not specified. - SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath; - setupSymbolizer(); - load(); - } - uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const { - return VirtualAddress - BaseAddress; - } - uint64_t offsetToVirtualAddr(uint64_t Offset) const { - return Offset + BaseAddress; - } - StringRef getPath() const { return Path; } - StringRef getName() const { return llvm::sys::path::filename(Path); } - uint64_t getBaseAddress() const { return BaseAddress; } - void setBaseAddress(uint64_t Address) { BaseAddress = Address; } - - // Return the preferred load address for the first executable segment. - uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; } - // Return the preferred load address for the first loadable segment. - uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; } - // Return the file offset for the first executable segment. - uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; } - const std::vector &getPreferredTextSegmentAddresses() const { - return PreferredTextSegmentAddresses; - } - const std::vector &getTextSegmentOffsets() const { - return TextSegmentOffsets; - } - - uint64_t getInstSize(uint64_t Offset) const { - auto I = Offset2InstSizeMap.find(Offset); - if (I == Offset2InstSizeMap.end()) - return 0; - return I->second; - } - - bool offsetIsCode(uint64_t Offset) const { - return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); - } - bool addressIsCode(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return offsetIsCode(Offset); - } - bool addressIsCall(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return CallOffsets.count(Offset); - } - bool addressIsReturn(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return RetOffsets.count(Offset); - } - bool addressInPrologEpilog(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return ProEpilogTracker.PrologEpilogSet.count(Offset); - } - - bool offsetIsTransfer(uint64_t Offset) { - return BranchOffsets.count(Offset) || RetOffsets.count(Offset) || - CallOffsets.count(Offset); - } - - uint64_t getAddressforIndex(uint64_t Index) const { - return offsetToVirtualAddr(CodeAddrOffsets[Index]); - } - - size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); } - - bool usePseudoProbes() const { return UsePseudoProbes; } - bool useFSDiscriminator() const { return UseFSDiscriminator; } - // Get the index in CodeAddrOffsets for the address - // As we might get an address which is not the code - // here it would round to the next valid code address by - // using lower bound operation - uint32_t getIndexForOffset(uint64_t Offset) const { - auto Low = llvm::lower_bound(CodeAddrOffsets, Offset); - return Low - CodeAddrOffsets.begin(); - } - uint32_t getIndexForAddr(uint64_t Address) const { - uint64_t Offset = virtualAddrToOffset(Address); - return getIndexForOffset(Offset); - } - - uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const { - if (FrameAddr == ExternalAddr) - return ExternalAddr; - auto I = getIndexForAddr(FrameAddr); - FrameAddr = I ? getAddressforIndex(I - 1) : 0; - if (FrameAddr && addressIsCall(FrameAddr)) - return FrameAddr; - return 0; - } - - FuncRange *findFuncRangeForStartOffset(uint64_t Offset) { - auto I = StartOffset2FuncRangeMap.find(Offset); - if (I == StartOffset2FuncRangeMap.end()) - return nullptr; - return &I->second; - } - - // Binary search the function range which includes the input offset. - FuncRange *findFuncRangeForOffset(uint64_t Offset) { - auto I = StartOffset2FuncRangeMap.upper_bound(Offset); - if (I == StartOffset2FuncRangeMap.begin()) - return nullptr; - I--; - - if (Offset >= I->second.EndOffset) - return nullptr; - - return &I->second; - } - - // Get all ranges of one function. - RangesTy getRangesForOffset(uint64_t Offset) { - auto *FRange = findFuncRangeForOffset(Offset); - // Ignore the range which falls into plt section or system lib. - if (!FRange) - return RangesTy(); - - return FRange->Func->Ranges; - } - - const std::unordered_map & - getAllBinaryFunctions() { - return BinaryFunctions; - } - - BinaryFunction *getBinaryFunction(StringRef FName) { - auto I = BinaryFunctions.find(FName.str()); - if (I == BinaryFunctions.end()) - return nullptr; - return &I->second; - } - - uint32_t getFuncSizeForContext(SampleContext &Context) { - return FuncSizeTracker.getFuncSizeForContext(Context); - } - - // Load the symbols from debug table and populate into symbol list. - void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList); - - const SampleContextFrameVector & - getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) { - auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector()); - if (I.second) { - InstructionPointer IP(this, Offset); - I.first->second = symbolize(IP, true, UseProbeDiscriminator); - } - return I.first->second; - } - - Optional getInlineLeafFrameLoc(uint64_t Offset) { - const auto &Stack = getFrameLocationStack(Offset); - if (Stack.empty()) - return {}; - return Stack.back(); - } - - // Compare two addresses' inline context - bool inlineContextEqual(uint64_t Add1, uint64_t Add2); - - // Get the full context of the current stack with inline context filled in. - // It will search the disassembling info stored in Offset2LocStackMap. This is - // used as the key of function sample map - SampleContextFrameVector - getExpandedContext(const SmallVectorImpl &Stack, - bool &WasLeafInlined); - // Go through instructions among the given range and record its size for the - // inline context. - void computeInlinedContextSizeForRange(uint64_t StartOffset, - uint64_t EndOffset); - - const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { - return ProbeDecoder.getCallProbeForAddr(Address); - } - - void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe, - SampleContextFrameVector &InlineContextStack, - bool IncludeLeaf = false) const { - SmallVector ProbeInlineContext; - ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext, - IncludeLeaf); - for (uint32_t I = 0; I < ProbeInlineContext.size(); I++) { - auto &Callsite = ProbeInlineContext[I]; - // Clear the current context for an unknown probe. - if (Callsite.second == 0 && I != ProbeInlineContext.size() - 1) { - InlineContextStack.clear(); - continue; - } - InlineContextStack.emplace_back(Callsite.first, - LineLocation(Callsite.second, 0)); - } - } - const AddressProbesMap &getAddress2ProbesMap() const { - return ProbeDecoder.getAddress2ProbesMap(); - } - const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) { - return ProbeDecoder.getFuncDescForGUID(GUID); - } - - const MCPseudoProbeFuncDesc * - getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) { - return ProbeDecoder.getInlinerDescForProbe(Probe); - } - - bool getTrackFuncContextSize() { return TrackFuncContextSize; } - - bool getIsLoadedByMMap() { return IsLoadedByMMap; } - - void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; } - - bool getMissingMMapWarned() { return MissingMMapWarned; } - - void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; } -}; - -} // end namespace sampleprof -} // end namespace llvm - -#endif diff --git a/tools/ldc-profgen/ldc-profgen-14.0/llvm-profgen.cpp b/tools/ldc-profgen/ldc-profgen-14.0/llvm-profgen.cpp deleted file mode 100644 index f092df04d52..00000000000 --- a/tools/ldc-profgen/ldc-profgen-14.0/llvm-profgen.cpp +++ /dev/null @@ -1,164 +0,0 @@ -//===- llvm-profgen.cpp - LLVM SPGO profile generation tool -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// llvm-profgen generates SPGO profiles from perf script ouput. -// -//===----------------------------------------------------------------------===// - -#include "ErrorHandling.h" -#include "PerfReader.h" -#include "ProfileGenerator.h" -#include "ProfiledBinary.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/TargetSelect.h" - -static cl::OptionCategory ProfGenCategory("ProfGen Options"); - -static cl::opt PerfScriptFilename( - "perfscript", cl::value_desc("perfscript"), cl::ZeroOrMore, - llvm::cl::MiscFlags::CommaSeparated, - cl::desc("Path of perf-script trace created by Linux perf tool with " - "`script` command(the raw perf.data should be profiled with -b)"), - cl::cat(ProfGenCategory)); -static cl::alias PSA("ps", cl::desc("Alias for --perfscript"), - cl::aliasopt(PerfScriptFilename)); - -static cl::opt PerfDataFilename( - "perfdata", cl::value_desc("perfdata"), cl::ZeroOrMore, - llvm::cl::MiscFlags::CommaSeparated, - cl::desc("Path of raw perf data created by Linux perf tool (it should be " - "profiled with -b)"), - cl::cat(ProfGenCategory)); -static cl::alias PDA("pd", cl::desc("Alias for --perfdata"), - cl::aliasopt(PerfDataFilename)); - -static cl::opt UnsymbolizedProfFilename( - "unsymbolized-profile", cl::value_desc("unsymbolized profile"), - cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated, - cl::desc("Path of the unsymbolized profile created by " - "`llvm-profgen` with `--skip-symbolization`"), - cl::cat(ProfGenCategory)); -static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"), - cl::aliasopt(UnsymbolizedProfFilename)); - -static cl::opt - BinaryPath("binary", cl::value_desc("binary"), cl::Required, - cl::desc("Path of profiled executable binary."), - cl::cat(ProfGenCategory)); - -static cl::opt DebugBinPath( - "debug-binary", cl::value_desc("debug-binary"), cl::ZeroOrMore, - cl::desc("Path of debug info binary, llvm-profgen will load the DWARF info " - "from it instead of the executable binary."), - cl::cat(ProfGenCategory)); - -extern cl::opt ShowDisassemblyOnly; -extern cl::opt ShowSourceLocations; -extern cl::opt SkipSymbolization; - -using namespace llvm; -using namespace sampleprof; - -// Validate the command line input. -static void validateCommandLine() { - // Allow the missing perfscript if we only use to show binary disassembly. - if (!ShowDisassemblyOnly) { - // Validate input profile is provided only once - uint16_t HasPerfData = PerfDataFilename.getNumOccurrences(); - uint16_t HasPerfScript = PerfScriptFilename.getNumOccurrences(); - uint16_t HasUnsymbolizedProfile = - UnsymbolizedProfFilename.getNumOccurrences(); - uint16_t S = HasPerfData + HasPerfScript + HasUnsymbolizedProfile; - if (S != 1) { - std::string Msg = - S > 1 - ? "`--perfscript`, `--perfdata` and `--unsymbolized-profile` " - "cannot be used together." - : "Perf input file is missing, please use one of `--perfscript`, " - "`--perfdata` and `--unsymbolized-profile` for the input."; - exitWithError(Msg); - } - - auto CheckFileExists = [](bool H, StringRef File) { - if (H && !llvm::sys::fs::exists(File)) { - std::string Msg = "Input perf file(" + File.str() + ") doesn't exist."; - exitWithError(Msg); - } - }; - - CheckFileExists(HasPerfData, PerfDataFilename); - CheckFileExists(HasPerfScript, PerfScriptFilename); - CheckFileExists(HasUnsymbolizedProfile, UnsymbolizedProfFilename); - } - - if (!llvm::sys::fs::exists(BinaryPath)) { - std::string Msg = "Input binary(" + BinaryPath + ") doesn't exist."; - exitWithError(Msg); - } - - if (CSProfileGenerator::MaxCompressionSize < -1) { - exitWithError("Value of --compress-recursion should >= -1"); - } - if (ShowSourceLocations && !ShowDisassemblyOnly) { - exitWithError("--show-source-locations should work together with " - "--show-disassembly-only!"); - } -} - -static PerfInputFile getPerfInputFile() { - PerfInputFile File; - if (PerfDataFilename.getNumOccurrences()) { - File.InputFile = PerfDataFilename; - File.Format = PerfFormat::PerfData; - } else if (PerfScriptFilename.getNumOccurrences()) { - File.InputFile = PerfScriptFilename; - File.Format = PerfFormat::PerfScript; - } else if (UnsymbolizedProfFilename.getNumOccurrences()) { - File.InputFile = UnsymbolizedProfFilename; - File.Format = PerfFormat::UnsymbolizedProfile; - } - return File; -} - -int main(int argc, const char *argv[]) { - InitLLVM X(argc, argv); - - // Initialize targets and assembly printers/parsers. - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllDisassemblers(); - - cl::HideUnrelatedOptions({&ProfGenCategory, &getColorCategory()}); - cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n"); - validateCommandLine(); - - // Load symbols and disassemble the code of a given binary. - std::unique_ptr Binary = - std::make_unique(BinaryPath, DebugBinPath); - if (ShowDisassemblyOnly) - return EXIT_SUCCESS; - - PerfInputFile PerfFile = getPerfInputFile(); - std::unique_ptr Reader = - PerfReaderBase::create(Binary.get(), PerfFile); - // Parse perf events and samples - Reader->parsePerfTraces(); - - if (SkipSymbolization) - return EXIT_SUCCESS; - - std::unique_ptr Generator = - ProfileGeneratorBase::create(Binary.get(), Reader->getSampleCounters(), - Reader->profileIsCSFlat()); - Generator->generateProfile(); - Generator->write(); - - return EXIT_SUCCESS; -} diff --git a/utils/FileCheck-14.cpp b/utils/FileCheck-14.cpp deleted file mode 100644 index 6742853c9b6..00000000000 --- a/utils/FileCheck-14.cpp +++ /dev/null @@ -1,891 +0,0 @@ -//===- FileCheck.cpp - Check that File's Contents match what is expected --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// FileCheck does a line-by line check of a file that validates whether it -// contains the expected content. This is useful for regression tests etc. -// -// This program exits with an exit status of 2 on error, exit status of 0 if -// the file matched the expected contents, and exit status of 1 if it did not -// contain the expected contents. -// -//===----------------------------------------------------------------------===// - -#include "llvm/FileCheck/FileCheck.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace llvm; - -static cl::extrahelp FileCheckOptsEnv( - "\nOptions are parsed from the environment variable FILECHECK_OPTS and\n" - "from the command line.\n"); - -static cl::opt - CheckFilename(cl::Positional, cl::desc(""), cl::Optional); - -static cl::opt - InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), - cl::init("-"), cl::value_desc("filename")); - -static cl::list CheckPrefixes( - "check-prefix", - cl::desc("Prefix to use from check file (defaults to 'CHECK')")); -static cl::alias CheckPrefixesAlias( - "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated, - cl::NotHidden, - cl::desc( - "Alias for -check-prefix permitting multiple comma separated values")); - -static cl::list CommentPrefixes( - "comment-prefixes", cl::CommaSeparated, cl::Hidden, - cl::desc("Comma-separated list of comment prefixes to use from check file\n" - "(defaults to 'COM,RUN'). Please avoid using this feature in\n" - "LLVM's LIT-based test suites, which should be easier to\n" - "maintain if they all follow a consistent comment style. This\n" - "feature is meant for non-LIT test suites using FileCheck.")); - -static cl::opt NoCanonicalizeWhiteSpace( - "strict-whitespace", - cl::desc("Do not treat all horizontal whitespace as equivalent")); - -static cl::opt IgnoreCase( - "ignore-case", - cl::desc("Use case-insensitive matching")); - -static cl::list ImplicitCheckNot( - "implicit-check-not", - cl::desc("Add an implicit negative check with this pattern to every\n" - "positive check. This can be used to ensure that no instances of\n" - "this pattern occur which are not matched by a positive pattern"), - cl::value_desc("pattern")); - -static cl::list - GlobalDefines("D", cl::AlwaysPrefix, - cl::desc("Define a variable to be used in capture patterns."), - cl::value_desc("VAR=VALUE")); - -static cl::opt AllowEmptyInput( - "allow-empty", cl::init(false), - cl::desc("Allow the input file to be empty. This is useful when making\n" - "checks that some error message does not occur, for example.")); - -static cl::opt AllowUnusedPrefixes( - "allow-unused-prefixes", cl::init(false), cl::ZeroOrMore, - cl::desc("Allow prefixes to be specified but not appear in the test.")); - -static cl::opt MatchFullLines( - "match-full-lines", cl::init(false), - cl::desc("Require all positive matches to cover an entire input line.\n" - "Allows leading and trailing whitespace if --strict-whitespace\n" - "is not also passed.")); - -static cl::opt EnableVarScope( - "enable-var-scope", cl::init(false), - cl::desc("Enables scope for regex variables. Variables with names that\n" - "do not start with '$' will be reset at the beginning of\n" - "each CHECK-LABEL block.")); - -static cl::opt AllowDeprecatedDagOverlap( - "allow-deprecated-dag-overlap", cl::init(false), - cl::desc("Enable overlapping among matches in a group of consecutive\n" - "CHECK-DAG directives. This option is deprecated and is only\n" - "provided for convenience as old tests are migrated to the new\n" - "non-overlapping CHECK-DAG implementation.\n")); - -static cl::opt Verbose( - "v", cl::init(false), cl::ZeroOrMore, - cl::desc("Print directive pattern matches, or add them to the input dump\n" - "if enabled.\n")); - -static cl::opt VerboseVerbose( - "vv", cl::init(false), cl::ZeroOrMore, - cl::desc("Print information helpful in diagnosing internal FileCheck\n" - "issues, or add it to the input dump if enabled. Implies\n" - "-v.\n")); - -// The order of DumpInputValue members affects their precedence, as documented -// for -dump-input below. -enum DumpInputValue { - DumpInputNever, - DumpInputFail, - DumpInputAlways, - DumpInputHelp -}; - -static cl::list DumpInputs( - "dump-input", - cl::desc("Dump input to stderr, adding annotations representing\n" - "currently enabled diagnostics. When there are multiple\n" - "occurrences of this option, the that appears earliest\n" - "in the list below has precedence. The default is 'fail'.\n"), - cl::value_desc("mode"), - cl::values(clEnumValN(DumpInputHelp, "help", "Explain input dump and quit"), - clEnumValN(DumpInputAlways, "always", "Always dump input"), - clEnumValN(DumpInputFail, "fail", "Dump input on failure"), - clEnumValN(DumpInputNever, "never", "Never dump input"))); - -// The order of DumpInputFilterValue members affects their precedence, as -// documented for -dump-input-filter below. -enum DumpInputFilterValue { - DumpInputFilterError, - DumpInputFilterAnnotation, - DumpInputFilterAnnotationFull, - DumpInputFilterAll -}; - -static cl::list DumpInputFilters( - "dump-input-filter", - cl::desc("In the dump requested by -dump-input, print only input lines of\n" - "kind plus any context specified by -dump-input-context.\n" - "When there are multiple occurrences of this option, the \n" - "that appears earliest in the list below has precedence. The\n" - "default is 'error' when -dump-input=fail, and it's 'all' when\n" - "-dump-input=always.\n"), - cl::values(clEnumValN(DumpInputFilterAll, "all", "All input lines"), - clEnumValN(DumpInputFilterAnnotationFull, "annotation-full", - "Input lines with annotations"), - clEnumValN(DumpInputFilterAnnotation, "annotation", - "Input lines with starting points of annotations"), - clEnumValN(DumpInputFilterError, "error", - "Input lines with starting points of error " - "annotations"))); - -static cl::list DumpInputContexts( - "dump-input-context", cl::value_desc("N"), - cl::desc("In the dump requested by -dump-input, print input lines\n" - "before and input lines after any lines specified by\n" - "-dump-input-filter. When there are multiple occurrences of\n" - "this option, the largest specified has precedence. The\n" - "default is 5.\n")); - -typedef cl::list::const_iterator prefix_iterator; - - - - - - - -static void DumpCommandLine(int argc, char **argv) { - errs() << "FileCheck command line: "; - for (int I = 0; I < argc; I++) - errs() << " " << argv[I]; - errs() << "\n"; -} - -struct MarkerStyle { - /// The starting char (before tildes) for marking the line. - char Lead; - /// What color to use for this annotation. - raw_ostream::Colors Color; - /// A note to follow the marker, or empty string if none. - std::string Note; - /// Does this marker indicate inclusion by -dump-input-filter=error? - bool FiltersAsError; - MarkerStyle() {} - MarkerStyle(char Lead, raw_ostream::Colors Color, - const std::string &Note = "", bool FiltersAsError = false) - : Lead(Lead), Color(Color), Note(Note), FiltersAsError(FiltersAsError) { - assert((!FiltersAsError || !Note.empty()) && - "expected error diagnostic to have note"); - } -}; - -static MarkerStyle GetMarker(FileCheckDiag::MatchType MatchTy) { - switch (MatchTy) { - case FileCheckDiag::MatchFoundAndExpected: - return MarkerStyle('^', raw_ostream::GREEN); - case FileCheckDiag::MatchFoundButExcluded: - return MarkerStyle('!', raw_ostream::RED, "error: no match expected", - /*FiltersAsError=*/true); - case FileCheckDiag::MatchFoundButWrongLine: - return MarkerStyle('!', raw_ostream::RED, "error: match on wrong line", - /*FiltersAsError=*/true); - case FileCheckDiag::MatchFoundButDiscarded: - return MarkerStyle('!', raw_ostream::CYAN, - "discard: overlaps earlier match"); - case FileCheckDiag::MatchFoundErrorNote: - // Note should always be overridden within the FileCheckDiag. - return MarkerStyle('!', raw_ostream::RED, - "error: unknown error after match", - /*FiltersAsError=*/true); - case FileCheckDiag::MatchNoneAndExcluded: - return MarkerStyle('X', raw_ostream::GREEN); - case FileCheckDiag::MatchNoneButExpected: - return MarkerStyle('X', raw_ostream::RED, "error: no match found", - /*FiltersAsError=*/true); - case FileCheckDiag::MatchNoneForInvalidPattern: - return MarkerStyle('X', raw_ostream::RED, - "error: match failed for invalid pattern", - /*FiltersAsError=*/true); - case FileCheckDiag::MatchFuzzy: - return MarkerStyle('?', raw_ostream::MAGENTA, "possible intended match", - /*FiltersAsError=*/true); - } - llvm_unreachable_internal("unexpected match type"); -} - -static void DumpInputAnnotationHelp(raw_ostream &OS) { - OS << "The following description was requested by -dump-input=help to\n" - << "explain the input dump printed by FileCheck.\n" - << "\n" - << "Related command-line options:\n" - << "\n" - << " - -dump-input= enables or disables the input dump\n" - << " - -dump-input-filter= filters the input lines\n" - << " - -dump-input-context= adjusts the context of filtered lines\n" - << " - -v and -vv add more annotations\n" - << " - -color forces colors to be enabled both in the dump and below\n" - << " - -help documents the above options in more detail\n" - << "\n" - << "These options can also be set via FILECHECK_OPTS. For example, for\n" - << "maximum debugging output on failures:\n" - << "\n" - << " $ FILECHECK_OPTS='-dump-input-filter=all -vv -color' ninja check\n" - << "\n" - << "Input dump annotation format:\n" - << "\n"; - - // Labels for input lines. - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "L:"; - OS << " labels line number L of the input file\n" - << " An extra space is added after each input line to represent" - << " the\n" - << " newline character\n"; - - // Labels for annotation lines. - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L"; - OS << " labels the only match result for either (1) a pattern of type T" - << " from\n" - << " line L of the check file if L is an integer or (2) the" - << " I-th implicit\n" - << " pattern if L is \"imp\" followed by an integer " - << "I (index origin one)\n"; - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L'N"; - OS << " labels the Nth match result for such a pattern\n"; - - // Markers on annotation lines. - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "^~~"; - OS << " marks good match (reported if -v)\n" - << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "!~~"; - OS << " marks bad match, such as:\n" - << " - CHECK-NEXT on same line as previous match (error)\n" - << " - CHECK-NOT found (error)\n" - << " - CHECK-DAG overlapping match (discarded, reported if " - << "-vv)\n" - << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "X~~"; - OS << " marks search range when no match is found, such as:\n" - << " - CHECK-NEXT not found (error)\n" - << " - CHECK-NOT not found (success, reported if -vv)\n" - << " - CHECK-DAG not found after discarded matches (error)\n" - << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "?"; - OS << " marks fuzzy match when no match is found\n"; - - // Elided lines. - OS << " - "; - WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "..."; - OS << " indicates elided input lines and annotations, as specified by\n" - << " -dump-input-filter and -dump-input-context\n"; - - // Colors. - OS << " - colors "; - WithColor(OS, raw_ostream::GREEN, true) << "success"; - OS << ", "; - WithColor(OS, raw_ostream::RED, true) << "error"; - OS << ", "; - WithColor(OS, raw_ostream::MAGENTA, true) << "fuzzy match"; - OS << ", "; - WithColor(OS, raw_ostream::CYAN, true, false) << "discarded match"; - OS << ", "; - WithColor(OS, raw_ostream::CYAN, true, true) << "unmatched input"; - OS << "\n"; -} - -/// An annotation for a single input line. -struct InputAnnotation { - /// The index of the match result across all checks - unsigned DiagIndex; - /// The label for this annotation. - std::string Label; - /// Is this the initial fragment of a diagnostic that has been broken across - /// multiple lines? - bool IsFirstLine; - /// What input line (one-origin indexing) this annotation marks. This might - /// be different from the starting line of the original diagnostic if - /// !IsFirstLine. - unsigned InputLine; - /// The column range (one-origin indexing, open end) in which to mark the - /// input line. If InputEndCol is UINT_MAX, treat it as the last column - /// before the newline. - unsigned InputStartCol, InputEndCol; - /// The marker to use. - MarkerStyle Marker; - /// Whether this annotation represents a good match for an expected pattern. - bool FoundAndExpectedMatch; -}; - -/// Get an abbreviation for the check type. -static std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) { - switch (Ty) { - case Check::CheckPlain: - if (Ty.getCount() > 1) - return "count"; - return "check"; - case Check::CheckNext: - return "next"; - case Check::CheckSame: - return "same"; - case Check::CheckNot: - return "not"; - case Check::CheckDAG: - return "dag"; - case Check::CheckLabel: - return "label"; - case Check::CheckEmpty: - return "empty"; - case Check::CheckComment: - return "com"; - case Check::CheckEOF: - return "eof"; - case Check::CheckBadNot: - return "bad-not"; - case Check::CheckBadCount: - return "bad-count"; - case Check::CheckNone: - llvm_unreachable("invalid FileCheckType"); - } - llvm_unreachable("unknown FileCheckType"); -} - -static void -BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID, - const std::pair &ImpPatBufferIDRange, - const std::vector &Diags, - std::vector &Annotations, - unsigned &LabelWidth) { - struct CompareSMLoc { - bool operator()(const SMLoc &LHS, const SMLoc &RHS) const { - return LHS.getPointer() < RHS.getPointer(); - } - }; - // How many diagnostics does each pattern have? - std::map DiagCountPerPattern; - for (auto Diag : Diags) - ++DiagCountPerPattern[Diag.CheckLoc]; - // How many diagnostics have we seen so far per pattern? - std::map DiagIndexPerPattern; - // How many total diagnostics have we seen so far? - unsigned DiagIndex = 0; - // What's the widest label? - LabelWidth = 0; - for (auto DiagItr = Diags.begin(), DiagEnd = Diags.end(); DiagItr != DiagEnd; - ++DiagItr) { - InputAnnotation A; - A.DiagIndex = DiagIndex++; - - // Build label, which uniquely identifies this check result. - unsigned CheckBufferID = SM.FindBufferContainingLoc(DiagItr->CheckLoc); - auto CheckLineAndCol = - SM.getLineAndColumn(DiagItr->CheckLoc, CheckBufferID); - llvm::raw_string_ostream Label(A.Label); - Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":"; - if (CheckBufferID == CheckFileBufferID) - Label << CheckLineAndCol.first; - else if (ImpPatBufferIDRange.first <= CheckBufferID && - CheckBufferID < ImpPatBufferIDRange.second) - Label << "imp" << (CheckBufferID - ImpPatBufferIDRange.first + 1); - else - llvm_unreachable("expected diagnostic's check location to be either in " - "the check file or for an implicit pattern"); - if (DiagCountPerPattern[DiagItr->CheckLoc] > 1) - Label << "'" << DiagIndexPerPattern[DiagItr->CheckLoc]++; - LabelWidth = std::max((std::string::size_type)LabelWidth, A.Label.size()); - - A.Marker = GetMarker(DiagItr->MatchTy); - if (!DiagItr->Note.empty()) { - A.Marker.Note = DiagItr->Note; - // It's less confusing if notes that don't actually have ranges don't have - // markers. For example, a marker for 'with "VAR" equal to "5"' would - // seem to indicate where "VAR" matches, but the location we actually have - // for the marker simply points to the start of the match/search range for - // the full pattern of which the substitution is potentially just one - // component. - if (DiagItr->InputStartLine == DiagItr->InputEndLine && - DiagItr->InputStartCol == DiagItr->InputEndCol) - A.Marker.Lead = ' '; - } - if (DiagItr->MatchTy == FileCheckDiag::MatchFoundErrorNote) { - assert(!DiagItr->Note.empty() && - "expected custom note for MatchFoundErrorNote"); - A.Marker.Note = "error: " + A.Marker.Note; - } - A.FoundAndExpectedMatch = - DiagItr->MatchTy == FileCheckDiag::MatchFoundAndExpected; - - // Compute the mark location, and break annotation into multiple - // annotations if it spans multiple lines. - A.IsFirstLine = true; - A.InputLine = DiagItr->InputStartLine; - A.InputStartCol = DiagItr->InputStartCol; - if (DiagItr->InputStartLine == DiagItr->InputEndLine) { - // Sometimes ranges are empty in order to indicate a specific point, but - // that would mean nothing would be marked, so adjust the range to - // include the following character. - A.InputEndCol = - std::max(DiagItr->InputStartCol + 1, DiagItr->InputEndCol); - Annotations.push_back(A); - } else { - assert(DiagItr->InputStartLine < DiagItr->InputEndLine && - "expected input range not to be inverted"); - A.InputEndCol = UINT_MAX; - Annotations.push_back(A); - for (unsigned L = DiagItr->InputStartLine + 1, E = DiagItr->InputEndLine; - L <= E; ++L) { - // If a range ends before the first column on a line, then it has no - // characters on that line, so there's nothing to render. - if (DiagItr->InputEndCol == 1 && L == E) - break; - InputAnnotation B; - B.DiagIndex = A.DiagIndex; - B.Label = A.Label; - B.IsFirstLine = false; - B.InputLine = L; - B.Marker = A.Marker; - B.Marker.Lead = '~'; - B.Marker.Note = ""; - B.InputStartCol = 1; - if (L != E) - B.InputEndCol = UINT_MAX; - else - B.InputEndCol = DiagItr->InputEndCol; - B.FoundAndExpectedMatch = A.FoundAndExpectedMatch; - Annotations.push_back(B); - } - } - } -} - -static unsigned FindInputLineInFilter( - DumpInputFilterValue DumpInputFilter, unsigned CurInputLine, - const std::vector::iterator &AnnotationBeg, - const std::vector::iterator &AnnotationEnd) { - if (DumpInputFilter == DumpInputFilterAll) - return CurInputLine; - for (auto AnnotationItr = AnnotationBeg; AnnotationItr != AnnotationEnd; - ++AnnotationItr) { - switch (DumpInputFilter) { - case DumpInputFilterAll: - llvm_unreachable("unexpected DumpInputFilterAll"); - break; - case DumpInputFilterAnnotationFull: - return AnnotationItr->InputLine; - case DumpInputFilterAnnotation: - if (AnnotationItr->IsFirstLine) - return AnnotationItr->InputLine; - break; - case DumpInputFilterError: - if (AnnotationItr->IsFirstLine && AnnotationItr->Marker.FiltersAsError) - return AnnotationItr->InputLine; - break; - } - } - return UINT_MAX; -} - -/// To OS, print a vertical ellipsis (right-justified at LabelWidth) if it would -/// occupy less lines than ElidedLines, but print ElidedLines otherwise. Either -/// way, clear ElidedLines. Thus, if ElidedLines is empty, do nothing. -static void DumpEllipsisOrElidedLines(raw_ostream &OS, std::string &ElidedLines, - unsigned LabelWidth) { - if (ElidedLines.empty()) - return; - unsigned EllipsisLines = 3; - if (EllipsisLines < StringRef(ElidedLines).count('\n')) { - for (unsigned i = 0; i < EllipsisLines; ++i) { - WithColor(OS, raw_ostream::BLACK, /*Bold=*/true) - << right_justify(".", LabelWidth); - OS << '\n'; - } - } else - OS << ElidedLines; - ElidedLines.clear(); -} - -static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req, - DumpInputFilterValue DumpInputFilter, - unsigned DumpInputContext, - StringRef InputFileText, - std::vector &Annotations, - unsigned LabelWidth) { - OS << "Input was:\n<<<<<<\n"; - - // Sort annotations. - llvm::sort(Annotations, - [](const InputAnnotation &A, const InputAnnotation &B) { - // 1. Sort annotations in the order of the input lines. - // - // This makes it easier to find relevant annotations while - // iterating input lines in the implementation below. FileCheck - // does not always produce diagnostics in the order of input - // lines due to, for example, CHECK-DAG and CHECK-NOT. - if (A.InputLine != B.InputLine) - return A.InputLine < B.InputLine; - // 2. Sort annotations in the temporal order FileCheck produced - // their associated diagnostics. - // - // This sort offers several benefits: - // - // A. On a single input line, the order of annotations reflects - // the FileCheck logic for processing directives/patterns. - // This can be helpful in understanding cases in which the - // order of the associated directives/patterns in the check - // file or on the command line either (i) does not match the - // temporal order in which FileCheck looks for matches for the - // directives/patterns (due to, for example, CHECK-LABEL, - // CHECK-NOT, or `--implicit-check-not`) or (ii) does match - // that order but does not match the order of those - // diagnostics along an input line (due to, for example, - // CHECK-DAG). - // - // On the other hand, because our presentation format presents - // input lines in order, there's no clear way to offer the - // same benefit across input lines. For consistency, it might - // then seem worthwhile to have annotations on a single line - // also sorted in input order (that is, by input column). - // However, in practice, this appears to be more confusing - // than helpful. Perhaps it's intuitive to expect annotations - // to be listed in the temporal order in which they were - // produced except in cases the presentation format obviously - // and inherently cannot support it (that is, across input - // lines). - // - // B. When diagnostics' annotations are split among multiple - // input lines, the user must track them from one input line - // to the next. One property of the sort chosen here is that - // it facilitates the user in this regard by ensuring the - // following: when comparing any two input lines, a - // diagnostic's annotations are sorted in the same position - // relative to all other diagnostics' annotations. - return A.DiagIndex < B.DiagIndex; - }); - - // Compute the width of the label column. - const unsigned char *InputFilePtr = InputFileText.bytes_begin(), - *InputFileEnd = InputFileText.bytes_end(); - unsigned LineCount = InputFileText.count('\n'); - if (InputFileEnd[-1] != '\n') - ++LineCount; - unsigned LineNoWidth = std::log10(LineCount) + 1; - // +3 below adds spaces (1) to the left of the (right-aligned) line numbers - // on input lines and (2) to the right of the (left-aligned) labels on - // annotation lines so that input lines and annotation lines are more - // visually distinct. For example, the spaces on the annotation lines ensure - // that input line numbers and check directive line numbers never align - // horizontally. Those line numbers might not even be for the same file. - // One space would be enough to achieve that, but more makes it even easier - // to see. - LabelWidth = std::max(LabelWidth, LineNoWidth) + 3; - - // Print annotated input lines. - unsigned PrevLineInFilter = 0; // 0 means none so far - unsigned NextLineInFilter = 0; // 0 means uncomputed, UINT_MAX means none - std::string ElidedLines; - raw_string_ostream ElidedLinesOS(ElidedLines); - ColorMode TheColorMode = - WithColor(OS).colorsEnabled() ? ColorMode::Enable : ColorMode::Disable; - if (TheColorMode == ColorMode::Enable) - ElidedLinesOS.enable_colors(true); - auto AnnotationItr = Annotations.begin(), AnnotationEnd = Annotations.end(); - for (unsigned Line = 1; - InputFilePtr != InputFileEnd || AnnotationItr != AnnotationEnd; - ++Line) { - const unsigned char *InputFileLine = InputFilePtr; - - // Compute the previous and next line included by the filter. - if (NextLineInFilter < Line) - NextLineInFilter = FindInputLineInFilter(DumpInputFilter, Line, - AnnotationItr, AnnotationEnd); - assert(NextLineInFilter && "expected NextLineInFilter to be computed"); - if (NextLineInFilter == Line) - PrevLineInFilter = Line; - - // Elide this input line and its annotations if it's not within the - // context specified by -dump-input-context of an input line included by - // -dump-input-filter. However, in case the resulting ellipsis would occupy - // more lines than the input lines and annotations it elides, buffer the - // elided lines and annotations so we can print them instead. - raw_ostream *LineOS = &OS; - if ((!PrevLineInFilter || PrevLineInFilter + DumpInputContext < Line) && - (NextLineInFilter == UINT_MAX || - Line + DumpInputContext < NextLineInFilter)) - LineOS = &ElidedLinesOS; - else { - LineOS = &OS; - DumpEllipsisOrElidedLines(OS, ElidedLinesOS.str(), LabelWidth); - } - - // Print right-aligned line number. - WithColor(*LineOS, raw_ostream::BLACK, /*Bold=*/true, /*BF=*/false, - TheColorMode) - << format_decimal(Line, LabelWidth) << ": "; - - // For the case where -v and colors are enabled, find the annotations for - // good matches for expected patterns in order to highlight everything - // else in the line. There are no such annotations if -v is disabled. - std::vector FoundAndExpectedMatches; - if (Req.Verbose && TheColorMode == ColorMode::Enable) { - for (auto I = AnnotationItr; I != AnnotationEnd && I->InputLine == Line; - ++I) { - if (I->FoundAndExpectedMatch) - FoundAndExpectedMatches.push_back(*I); - } - } - - // Print numbered line with highlighting where there are no matches for - // expected patterns. - bool Newline = false; - { - WithColor COS(*LineOS, raw_ostream::SAVEDCOLOR, /*Bold=*/false, - /*BG=*/false, TheColorMode); - bool InMatch = false; - if (Req.Verbose) - COS.changeColor(raw_ostream::CYAN, true, true); - for (unsigned Col = 1; InputFilePtr != InputFileEnd && !Newline; ++Col) { - bool WasInMatch = InMatch; - InMatch = false; - for (auto M : FoundAndExpectedMatches) { - if (M.InputStartCol <= Col && Col < M.InputEndCol) { - InMatch = true; - break; - } - } - if (!WasInMatch && InMatch) - COS.resetColor(); - else if (WasInMatch && !InMatch) - COS.changeColor(raw_ostream::CYAN, true, true); - if (*InputFilePtr == '\n') { - Newline = true; - COS << ' '; - } else - COS << *InputFilePtr; - ++InputFilePtr; - } - } - *LineOS << '\n'; - unsigned InputLineWidth = InputFilePtr - InputFileLine; - - // Print any annotations. - while (AnnotationItr != AnnotationEnd && - AnnotationItr->InputLine == Line) { - WithColor COS(*LineOS, AnnotationItr->Marker.Color, /*Bold=*/true, - /*BG=*/false, TheColorMode); - // The two spaces below are where the ": " appears on input lines. - COS << left_justify(AnnotationItr->Label, LabelWidth) << " "; - unsigned Col; - for (Col = 1; Col < AnnotationItr->InputStartCol; ++Col) - COS << ' '; - COS << AnnotationItr->Marker.Lead; - // If InputEndCol=UINT_MAX, stop at InputLineWidth. - for (++Col; Col < AnnotationItr->InputEndCol && Col <= InputLineWidth; - ++Col) - COS << '~'; - const std::string &Note = AnnotationItr->Marker.Note; - if (!Note.empty()) { - // Put the note at the end of the input line. If we were to instead - // put the note right after the marker, subsequent annotations for the - // same input line might appear to mark this note instead of the input - // line. - for (; Col <= InputLineWidth; ++Col) - COS << ' '; - COS << ' ' << Note; - } - COS << '\n'; - ++AnnotationItr; - } - } - DumpEllipsisOrElidedLines(OS, ElidedLinesOS.str(), LabelWidth); - - OS << ">>>>>>\n"; -} - -int main(int argc, char **argv) { - // Enable use of ANSI color codes because FileCheck is using them to - // highlight text. - llvm::sys::Process::UseANSIEscapeCodes(true); - - InitLLVM X(argc, argv); - cl::ParseCommandLineOptions(argc, argv, /*Overview*/ "", /*Errs*/ nullptr, - "FILECHECK_OPTS"); - - // Select -dump-input* values. The -help documentation specifies the default - // value and which value to choose if an option is specified multiple times. - // In the latter case, the general rule of thumb is to choose the value that - // provides the most information. - DumpInputValue DumpInput = - DumpInputs.empty() - ? DumpInputFail - : *std::max_element(DumpInputs.begin(), DumpInputs.end()); - DumpInputFilterValue DumpInputFilter; - if (DumpInputFilters.empty()) - DumpInputFilter = DumpInput == DumpInputAlways ? DumpInputFilterAll - : DumpInputFilterError; - else - DumpInputFilter = - *std::max_element(DumpInputFilters.begin(), DumpInputFilters.end()); - unsigned DumpInputContext = DumpInputContexts.empty() - ? 5 - : *std::max_element(DumpInputContexts.begin(), - DumpInputContexts.end()); - - if (DumpInput == DumpInputHelp) { - DumpInputAnnotationHelp(outs()); - return 0; - } - if (CheckFilename.empty()) { - errs() << " not specified\n"; - return 2; - } - - FileCheckRequest Req; - append_range(Req.CheckPrefixes, CheckPrefixes); - - append_range(Req.CommentPrefixes, CommentPrefixes); - - append_range(Req.ImplicitCheckNot, ImplicitCheckNot); - - bool GlobalDefineError = false; - for (StringRef G : GlobalDefines) { - size_t EqIdx = G.find('='); - if (EqIdx == std::string::npos) { - errs() << "Missing equal sign in command-line definition '-D" << G - << "'\n"; - GlobalDefineError = true; - continue; - } - if (EqIdx == 0) { - errs() << "Missing variable name in command-line definition '-D" << G - << "'\n"; - GlobalDefineError = true; - continue; - } - Req.GlobalDefines.push_back(G); - } - if (GlobalDefineError) - return 2; - - Req.AllowEmptyInput = AllowEmptyInput; - Req.AllowUnusedPrefixes = AllowUnusedPrefixes; - Req.EnableVarScope = EnableVarScope; - Req.AllowDeprecatedDagOverlap = AllowDeprecatedDagOverlap; - Req.Verbose = Verbose; - Req.VerboseVerbose = VerboseVerbose; - Req.NoCanonicalizeWhiteSpace = NoCanonicalizeWhiteSpace; - Req.MatchFullLines = MatchFullLines; - Req.IgnoreCase = IgnoreCase; - - if (VerboseVerbose) - Req.Verbose = true; - - FileCheck FC(Req); - if (!FC.ValidateCheckPrefixes()) - return 2; - - Regex PrefixRE = FC.buildCheckPrefixRegex(); - std::string REError; - if (!PrefixRE.isValid(REError)) { - errs() << "Unable to combine check-prefix strings into a prefix regular " - "expression! This is likely a bug in FileCheck's verification of " - "the check-prefix strings. Regular expression parsing failed " - "with the following error: " - << REError << "\n"; - return 2; - } - - SourceMgr SM; - - // Read the expected strings from the check file. - ErrorOr> CheckFileOrErr = - MemoryBuffer::getFileOrSTDIN(CheckFilename, /*IsText=*/true); - if (std::error_code EC = CheckFileOrErr.getError()) { - errs() << "Could not open check file '" << CheckFilename - << "': " << EC.message() << '\n'; - return 2; - } - MemoryBuffer &CheckFile = *CheckFileOrErr.get(); - - SmallString<4096> CheckFileBuffer; - StringRef CheckFileText = FC.CanonicalizeFile(CheckFile, CheckFileBuffer); - - unsigned CheckFileBufferID = - SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( - CheckFileText, CheckFile.getBufferIdentifier()), - SMLoc()); - - std::pair ImpPatBufferIDRange; - if (FC.readCheckFile(SM, CheckFileText, PrefixRE, &ImpPatBufferIDRange)) - return 2; - - // Open the file to check and add it to SourceMgr. - ErrorOr> InputFileOrErr = - MemoryBuffer::getFileOrSTDIN(InputFilename, /*IsText=*/true); - if (InputFilename == "-") - InputFilename = ""; // Overwrite for improved diagnostic messages - if (std::error_code EC = InputFileOrErr.getError()) { - errs() << "Could not open input file '" << InputFilename - << "': " << EC.message() << '\n'; - return 2; - } - MemoryBuffer &InputFile = *InputFileOrErr.get(); - - if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) { - errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; - DumpCommandLine(argc, argv); - return 2; - } - - SmallString<4096> InputFileBuffer; - StringRef InputFileText = FC.CanonicalizeFile(InputFile, InputFileBuffer); - - SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer( - InputFileText, InputFile.getBufferIdentifier()), - SMLoc()); - - std::vector Diags; - int ExitCode = FC.checkInput(SM, InputFileText, - DumpInput == DumpInputNever ? nullptr : &Diags) - ? EXIT_SUCCESS - : 1; - if (DumpInput == DumpInputAlways || - (ExitCode == 1 && DumpInput == DumpInputFail)) { - errs() << "\n" - << "Input file: " << InputFilename << "\n" - << "Check file: " << CheckFilename << "\n" - << "\n" - << "-dump-input=help explains the following input dump.\n" - << "\n"; - std::vector Annotations; - unsigned LabelWidth; - BuildInputAnnotations(SM, CheckFileBufferID, ImpPatBufferIDRange, Diags, - Annotations, LabelWidth); - DumpAnnotatedInput(errs(), Req, DumpInputFilter, DumpInputContext, - InputFileText, Annotations, LabelWidth); - } - - return ExitCode; -} diff --git a/utils/gen_gccbuiltins.cpp b/utils/gen_gccbuiltins.cpp index 5c973641dd7..e483abbe26e 100644 --- a/utils/gen_gccbuiltins.cpp +++ b/utils/gen_gccbuiltins.cpp @@ -29,11 +29,7 @@ using namespace std; using namespace llvm; -#if LDC_LLVM_VER >= 1500 #define BUILTIN_NAME_STRING "ClangBuiltinName" -#else -#define BUILTIN_NAME_STRING "GCCBuiltinName" -#endif string dtype(Record* rec, bool readOnlyMem) {