Skip to content

Commit

Permalink
Merge pull request #266 from Xilinx/bump_to_8612fa0d
Browse files Browse the repository at this point in the history
[AutoBump] Merge with 8612fa0 (9)
  • Loading branch information
cferry-AMD authored Aug 16, 2024
2 parents 6c48d6f + 64c0a57 commit 6f96182
Show file tree
Hide file tree
Showing 411 changed files with 34,832 additions and 4,534 deletions.
20 changes: 13 additions & 7 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ clang/test/AST/Interp/ @tbaederr
/mlir/Dialect/*/Transforms/Bufferize.cpp @matthias-springer

# Linalg Dialect in MLIR.
/mlir/include/mlir/Dialect/Linalg @dcaballe @nicolasvasilache
/mlir/lib/Dialect/Linalg @dcaballe @nicolasvasilache
/mlir/include/mlir/Dialect/Linalg/* @dcaballe @nicolasvasilache
/mlir/lib/Dialect/Linalg/* @dcaballe @nicolasvasilache
/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp @MaheshRavishankar @nicolasvasilache
/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @MaheshRavishankar @nicolasvasilache
/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @MaheshRavishankar @nicolasvasilache
Expand All @@ -77,14 +77,14 @@ clang/test/AST/Interp/ @tbaederr
/mlir/**/*SME* @banach-space @dcaballe @nicolasvasilache
/mlir/**/*SVE* @banach-space @dcaballe @nicolasvasilache
/mlir/**/*VectorInterfaces* @dcaballe @nicolasvasilache
/mlir/**/*VectorToSCF* @banach-space @dcaballe @nicolasvasilache @matthias-springer
/mlir/**/*VectorToSCF* @banach-space @dcaballe @matthias-springer @nicolasvasilache
/mlir/**/*VectorToLLVM* @banach-space @dcaballe @nicolasvasilache
/mlir/**/*X86Vector* @aartbik @dcaballe @nicolasvasilache
/mlir/include/mlir/Dialect/Vector @dcaballe @nicolasvasilache
/mlir/lib/Dialect/Vector @dcaballe @nicolasvasilache
/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @MaheshRavishankar @nicolasvasilache
/mlir/**/*EmulateNarrowType* @hanhanW
/mlir/include/mlir/Dialect/Vector/* @dcaballe @nicolasvasilache
/mlir/lib/Dialect/Vector/* @dcaballe @nicolasvasilache
/mlir/lib/Dialect/Vector/Transforms/* @hanhanW @nicolasvasilache
/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @MaheshRavishankar @nicolasvasilache
/mlir/**/*EmulateNarrowType* @dcaballe @hanhanW

# Presburger library in MLIR
/mlir/**/*Presburger* @Groverkss @Superty
Expand All @@ -96,6 +96,7 @@ clang/test/AST/Interp/ @tbaederr
# Transform Dialect in MLIR.
/mlir/include/mlir/Dialect/Transform/* @ftynse @nicolasvasilache
/mlir/lib/Dialect/Transform/* @ftynse @nicolasvasilache
/mlir/**/*TransformOps* @ftynse @nicolasvasilache

# SPIR-V Dialect in MLIR.
/mlir/**/SPIRV/ @antiagainst @kuhar
Expand All @@ -119,3 +120,8 @@ clang/test/AST/Interp/ @tbaederr

# Bazel build system.
/utils/bazel/ @rupprecht

# InstallAPI and TextAPI
/llvm/**/TextAPI/ @cyndyishida
/clang/**/InstallAPI/ @cyndyishida
/clang/tools/clang-installapi/ @cyndyishida
16 changes: 8 additions & 8 deletions bolt/docs/BAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ binary onto the original binary.
# Usage
`--enable-bat` flag controls the generation of BAT section. Sampled profile
needs to be passed along with the optimized binary containing BAT section to
`perf2bolt` which reads BAT section and produces fdata profile for the original
binary. Note that YAML profile generation is not supported since BAT doesn't
contain the metadata for input functions.
`perf2bolt` which reads BAT section and produces profile for the original
binary.

# Internals
## Section contents
Expand Down Expand Up @@ -91,11 +90,12 @@ current function.
### Address translation table
Delta encoding means that only the difference with the previous corresponding
entry is encoded. Input offsets implicitly start at zero.
| Entry | Encoding | Description |
| ------ | ------| ----------- |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary |
| Entry | Encoding | Description | Branch/BB |
| ------ | ------| ----------- | ------ |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both |
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |

`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target
Expand Down
1 change: 1 addition & 0 deletions bolt/include/bolt/Core/MCPlus.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class MCAnnotation {
kOffset, /// Offset in the function.
kLabel, /// MCSymbol pointing to this instruction.
kSize, /// Size of the instruction.
kDynamicBranch, /// Jit instruction patched at runtime.
kGeneric /// First generic annotation.
};

Expand Down
17 changes: 17 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,16 @@ class MCPlusBuilder {
/// Set instruction size.
void setSize(MCInst &Inst, uint32_t Size) const;

/// Check if the branch instruction could be modified at runtime.
bool isDynamicBranch(const MCInst &Inst) const;

/// Return ID for runtime-modifiable instruction.
std::optional<uint32_t> getDynamicBranchID(const MCInst &Inst) const;

/// Mark instruction as a dynamic branch, i.e. a branch that can be
/// overwritten at runtime.
void setDynamicBranch(MCInst &Inst, uint32_t ID) const;

/// Return MCSymbol that represents a target of this instruction at a given
/// operand number \p OpNum. If there's no symbol associated with
/// the operand - return nullptr.
Expand Down Expand Up @@ -1688,6 +1698,13 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}

/// Create long conditional branch with a target-specific conditional code
/// \p CC.
virtual void createLongCondBranch(MCInst &Inst, const MCSymbol *Target,
unsigned CC, MCContext *Ctx) const {
llvm_unreachable("not implemented");
}

/// Reverses the branch condition in Inst and update its taken target to TBB.
///
/// Returns true on success.
Expand Down
10 changes: 9 additions & 1 deletion bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ class BoltAddressTranslation {
/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t OutputAddress) const;

/// Returns BB index by function output address (after BOLT) and basic block
/// input offset.
unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;

/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }

private:
/// Helper to update \p Map by inserting one or more BAT entries reflecting
/// \p BB for function located at \p FuncAddress. At least one entry will be
Expand Down Expand Up @@ -151,7 +158,8 @@ class BoltAddressTranslation {

std::map<uint64_t, MapTy> Maps;

using BBHashMap = std::unordered_map<uint32_t, size_t>;
/// Map basic block input offset to a basic block index and hash pair.
using BBHashMap = std::unordered_map<uint32_t, std::pair<unsigned, size_t>>;
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;

/// Links outlined cold bocks to their original function
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ class DataAggregator : public DataReader {
/// Dump data structures into a file readable by llvm-bolt
std::error_code writeAggregatedFile(StringRef OutputFilename) const;

/// Dump translated data structures into YAML
std::error_code writeBATYAML(BinaryContext &BC,
StringRef OutputFilename) const;

/// Filter out binaries based on PID
void filterBinaryMMapInfo();

Expand Down
8 changes: 7 additions & 1 deletion bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1939,7 +1939,13 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
OS << Endl;
return;
}
InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
if (std::optional<uint32_t> DynamicID =
MIB->getDynamicBranchID(Instruction)) {
OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
<< " # ID: " << DynamicID;
} else {
InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
}
if (MIB->isCall(Instruction)) {
if (MIB->isTailCall(Instruction))
OS << " # TAILCALL ";
Expand Down
17 changes: 16 additions & 1 deletion bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3350,6 +3350,16 @@ void BinaryFunction::fixBranches() {

// Eliminate unnecessary conditional branch.
if (TSuccessor == FSuccessor) {
// FIXME: at the moment, we cannot safely remove static key branches.
if (MIB->isDynamicBranch(*CondBranch)) {
if (opts::Verbosity) {
BC.outs()
<< "BOLT-INFO: unable to remove redundant dynamic branch in "
<< *this << '\n';
}
continue;
}

BB->removeDuplicateConditionalSuccessor(CondBranch);
if (TSuccessor != NextBB)
BB->addBranchInstruction(TSuccessor);
Expand All @@ -3358,8 +3368,13 @@ void BinaryFunction::fixBranches() {

// Reverse branch condition and swap successors.
auto swapSuccessors = [&]() {
if (MIB->isUnsupportedBranch(*CondBranch))
if (MIB->isUnsupportedBranch(*CondBranch)) {
if (opts::Verbosity) {
BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
<< '\n';
}
return false;
}
std::swap(TSuccessor, FSuccessor);
BB->swapConditionalSuccessors();
auto L = BC.scopeLock();
Expand Down
22 changes: 22 additions & 0 deletions bolt/lib/Core/MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,28 @@ void MCPlusBuilder::setSize(MCInst &Inst, uint32_t Size) const {
setAnnotationOpValue(Inst, MCAnnotation::kSize, Size);
}

bool MCPlusBuilder::isDynamicBranch(const MCInst &Inst) const {
if (!hasAnnotation(Inst, MCAnnotation::kDynamicBranch))
return false;
assert(isBranch(Inst) && "Branch expected.");
return true;
}

std::optional<uint32_t>
MCPlusBuilder::getDynamicBranchID(const MCInst &Inst) const {
if (std::optional<int64_t> Value =
getAnnotationOpValue(Inst, MCAnnotation::kDynamicBranch)) {
assert(isBranch(Inst) && "Branch expected.");
return static_cast<uint32_t>(*Value);
}
return std::nullopt;
}

void MCPlusBuilder::setDynamicBranch(MCInst &Inst, uint32_t ID) const {
assert(isBranch(Inst) && "Branch expected.");
setAnnotationOpValue(Inst, MCAnnotation::kDynamicBranch, ID);
}

bool MCPlusBuilder::hasAnnotation(const MCInst &Inst, unsigned Index) const {
return (bool)getAnnotationOpValue(Inst, Index);
}
Expand Down
20 changes: 19 additions & 1 deletion bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ static cl::opt<unsigned>
cl::desc("print statistics about basic block ordering"),
cl::init(0), cl::cat(BoltOptCategory));

static cl::opt<bool> PrintLargeFunctions(
"print-large-functions",
cl::desc("print functions that could not be overwritten due to excessive "
"size"),
cl::init(false), cl::cat(BoltOptCategory));

static cl::list<bolt::DynoStats::Category>
PrintSortedBy("print-sorted-by", cl::CommaSeparated,
cl::desc("print functions sorted by order of dyno stats"),
Expand Down Expand Up @@ -570,8 +576,12 @@ Error CheckLargeFunctions::runOnFunctions(BinaryContext &BC) {
uint64_t HotSize, ColdSize;
std::tie(HotSize, ColdSize) =
BC.calculateEmittedSize(BF, /*FixBranches=*/false);
if (HotSize > BF.getMaxSize())
if (HotSize > BF.getMaxSize()) {
if (opts::PrintLargeFunctions)
BC.outs() << "BOLT-INFO: " << BF << " size exceeds allocated space by "
<< (HotSize - BF.getMaxSize()) << " bytes\n";
BF.setSimple(false);
}
};

ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
Expand Down Expand Up @@ -852,6 +862,10 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryFunction &BF) {
assert(Result && "internal error analyzing conditional branch");
assert(CondBranch && "conditional branch expected");

// Skip dynamic branches for now.
if (BF.getBinaryContext().MIB->isDynamicBranch(*CondBranch))
continue;

// It's possible that PredBB is also a successor to BB that may have
// been processed by a previous iteration of the SCTC loop, in which
// case it may have been marked invalid. We should skip rewriting in
Expand Down Expand Up @@ -1012,6 +1026,10 @@ uint64_t ShortenInstructions::shortenInstructions(BinaryFunction &Function) {
const BinaryContext &BC = Function.getBinaryContext();
for (BinaryBasicBlock &BB : Function) {
for (MCInst &Inst : BB) {
// Skip shortening instructions with Size annotation.
if (BC.MIB->getSize(Inst))
continue;

MCInst OriginalInst;
if (opts::Verbosity > 2)
OriginalInst = Inst;
Expand Down
39 changes: 29 additions & 10 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
getBBHash(HotFuncAddress, BBInputOffset)));
(void)HotFuncAddress;
LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
getBBIndex(HotFuncAddress, BBInputOffset)));
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
Expand Down Expand Up @@ -217,6 +219,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
}
size_t Index = 0;
uint64_t InOffset = 0;
size_t PrevBBIndex = 0;
// Output and Input addresses and delta-encoded
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
const uint64_t OutputAddress = KeyVal.first + Address;
Expand All @@ -226,11 +229,15 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
encodeSLEB128(KeyVal.second - InOffset, OS);
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
if ((InOffset & BRANCHENTRY) == 0) {
// Basic block hash
size_t BBHash = FuncHashPair.second[InOffset >> 1];
unsigned BBIndex;
size_t BBHash;
std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
OS.write(reinterpret_cast<char *>(&BBHash), 8);
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first,
InOffset >> 1, BBHash));
// Basic block index in the input binary
encodeULEB128(BBIndex - PrevBBIndex, OS);
PrevBBIndex = BBIndex;
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
InOffset >> 1, BBHash, BBIndex));
}
}
}
Expand Down Expand Up @@ -316,6 +323,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
uint64_t InputOffset = 0;
size_t BBIndex = 0;
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
const uint64_t OutputAddress = PrevAddress + OutputDelta;
Expand All @@ -330,19 +338,25 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
}
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
size_t BBHash = 0;
size_t BBIndexDelta = 0;
const bool IsBranchEntry = InputOffset & BRANCHENTRY;
if (!IsBranchEntry) {
BBHash = DE.getU64(&Offset, &Err);
BBIndexDelta = DE.getULEB128(&Offset, &Err);
BBIndex += BBIndexDelta;
// Map basic block hash to hot fragment by input offset
FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash);
FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
std::pair(BBIndex, BBHash));
}
LLVM_DEBUG({
dbgs() << formatv(
"{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
if (BBHash)
dbgs() << formatv(" {0:x}", BBHash);
if (!IsBranchEntry) {
dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
getULEB128Size(BBIndexDelta));
}
dbgs() << '\n';
});
}
Expand Down Expand Up @@ -494,14 +508,19 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
FuncHashes[BF.getAddress()].first = BF.computeHash();
BF.computeBlockHashes();
for (const BinaryBasicBlock &BB : BF)
FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
BB.getHash());
FuncHashes[BF.getAddress()].second.emplace(
BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
}
}

unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first;
}

size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset);
return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second;
}

size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
Expand Down
Loading

0 comments on commit 6f96182

Please sign in to comment.