Skip to content

Commit

Permalink
Merge pull request #346 from Xilinx/bump_to_13d983e7
Browse files Browse the repository at this point in the history
[AutoBump] Merge with fixes of 9b78ddf (Jun 21, needs torch & onnx bump) (82)
  • Loading branch information
mgehre-amd authored Sep 16, 2024
2 parents ff7d639 + cca70a5 commit b309613
Show file tree
Hide file tree
Showing 2,610 changed files with 96,651 additions and 46,990 deletions.
3 changes: 3 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,6 @@ f6d557ee34b6bbdb1dc32f29e34b4a4a8ad35e81

# [NFC] clang-format utils/TableGen (#80973)
b9079baaddfed5e604fbfaa1d81a7a1c38e78c26

# [libc++][NFC] Run clang-format on libcxx/include again (#95874)
e2c2ffbe7a1b5d9e32a2ce64279475b50c4cba5b
2 changes: 1 addition & 1 deletion .github/workflows/pr-code-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- name: Install clang-format
uses: aminya/setup-cpp@v1
with:
clangformat: 18.1.1
clangformat: 18.1.7

- name: Setup Python env
uses: actions/setup-python@v5
Expand Down
104 changes: 104 additions & 0 deletions .github/workflows/release-sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
name: Release Sources

permissions:
contents: read

on:
workflow_dispatch:
inputs:
release-version:
description: Release Version
required: true
type: string
workflow_call:
inputs:
release-version:
description: Release Version
required: true
type: string
# Run on pull_requests for testing purposes.
pull_request:
paths:
- '.github/workflows/release-sources.yml'
types:
- opened
- synchronize
- reopened
# When a PR is closed, we still start this workflow, but then skip
# all the jobs, which makes it effectively a no-op. The reason to
# do this is that it allows us to take advantage of concurrency groups
# to cancel in progress CI jobs whenever the PR is closed.
- closed

concurrency:
group: ${{ github.workflow }}-${{ inputs.release-version || github.event.pull_request.number }}
cancel-in-progress: True

jobs:
inputs:
name: Collect Job Inputs
if: >-
github.repository_owner == 'llvm' &&
github.event.action != 'closed'
outputs:
ref: ${{ steps.inputs.outputs.ref }}
export-args: ${{ steps.inputs.outputs.export-args }}
runs-on: ubuntu-latest
steps:
- id: inputs
run: |
ref=${{ inputs.release-version || github.sha }}
if [ -n "${{ inputs.release-version }}" ]; then
export_args="-release ${{ inputs.release-version }} -final"
else
export_args="-git-ref ${{ github.sha }}"
fi
echo "ref=$ref" >> $GITHUB_OUTPUT
echo "export-args=$export_args" >> $GITHUB_OUTPUT
release-sources:
name: Package Release Sources
if: github.repository_owner == 'llvm'
runs-on: ubuntu-latest
needs:
- inputs
permissions:
id-token: write
attestations: write
steps:
- name: Checkout LLVM
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ needs.inputs.outputs.ref }}
fetch-tags: true
- name: Install Dependencies
run: |
pip install --require-hashes -r ./llvm/utils/git/requirements.txt
- name: Check Permissions
if: github.event_name != 'pull_request'
env:
GITHUB_TOKEN: ${{ github.token }}
USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
run: |
./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --user ${{ github.actor }} --user-token "$USER_TOKEN" check-permissions
- name: Create Tarballs
run: |
./llvm/utils/release/export.sh ${{ needs.inputs.outputs.export-args }}
- name: Attest Build Provenance
if: github.event_name != 'pull_request'
id: provenance
uses: actions/attest-build-provenance@897ed5eab6ed058a474202017ada7f40bfa52940 # v1.0.0
with:
subject-path: "*.xz"
- if: github.event_name != 'pull_request'
run: |
mv ${{ steps.provenance.outputs.bundle-path }} .
- name: Create Tarball Artifacts
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 #v4.3.3
with:
path: |
*.xz
attestation.jsonl
11 changes: 11 additions & 0 deletions .github/workflows/release-tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,14 @@ jobs:
with:
release-version: ${{ needs.validate-tag.outputs.release-version }}
upload: true

release-sources:
name: Package Release Sources
permissions:
id-token: write
attestations: write
needs:
- validate-tag
uses: ./.github/workflows/release-sources.yml
with:
release-version: ${{ needs.validate-tag.outputs.release-version }}
7 changes: 6 additions & 1 deletion bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,11 @@

The maximum size of a function to consider for inference.

- `--stale-matching-min-matched-block=<uint>`

Minimum percent of exact match block for a function to be considered for
profile inference.

- `--stale-threshold=<uint>`

Maximum percentage of stale functions to tolerate (default: 100)
Expand Down Expand Up @@ -1161,4 +1166,4 @@

- `--print-options`

Print non-default options after command line parsing
Print non-default options after command line parsing
1 change: 1 addition & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <functional>
#include <limits>
#include <numeric>
#include <stack>
#include <string>

#define DEBUG_TYPE "bolt"
Expand Down
4 changes: 3 additions & 1 deletion bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,9 @@ Error LowerAnnotations::runOnFunctions(BinaryContext &BC) {
Error CleanMCState::runOnFunctions(BinaryContext &BC) {
MCContext &Ctx = *BC.Ctx;
for (const auto &SymMapEntry : Ctx.getSymbols()) {
const MCSymbol *S = SymMapEntry.second;
const MCSymbol *S = SymMapEntry.getValue().Symbol;
if (!S)
continue;
if (S->isDefined()) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Symbol \"" << S->getName()
<< "\" is already defined\n");
Expand Down
72 changes: 53 additions & 19 deletions bolt/lib/Profile/StaleProfileMatching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ cl::opt<bool>
cl::desc("Infer counts from stale profile data."),
cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));

cl::opt<unsigned> StaleMatchingMinMatchedBlock(
"stale-matching-min-matched-block",
cl::desc("Percentage threshold of matched basic blocks at which stale "
"profile inference is executed."),
cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));

cl::opt<unsigned> StaleMatchingMaxFuncSize(
"stale-matching-max-func-size",
cl::desc("The maximum size of a function to consider for inference."),
Expand Down Expand Up @@ -301,21 +307,21 @@ void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
BB->setHash(BlendedHashes[I].combine());
}
}

// TODO: mediate the difference between flow function construction here in BOLT
// and in the compiler by splitting blocks with exception throwing calls at the
// call and adding the landing pad as the successor.
/// Create a wrapper flow function to use with the profile inference algorithm,
/// and initialize its jumps and metadata.
FlowFunction
createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
FlowFunction Func;

// Add a special "dummy" source so that there is always a unique entry point.
// Because of the extra source, for all other blocks in FlowFunction it holds
// that Block.Index == BB->getIndex() + 1
FlowBlock EntryBlock;
EntryBlock.Index = 0;
Func.Blocks.push_back(EntryBlock);

// Create FlowBlock for every basic block in the binary function
// Create FlowBlock for every basic block in the binary function.
for (const BinaryBasicBlock *BB : BlockOrder) {
Func.Blocks.emplace_back();
FlowBlock &Block = Func.Blocks.back();
Expand All @@ -325,7 +331,12 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
"incorrectly assigned basic block index");
}

// Create FlowJump for each jump between basic blocks in the binary function
// Add a special "dummy" sink block so there is always a unique sink.
FlowBlock SinkBlock;
SinkBlock.Index = Func.Blocks.size();
Func.Blocks.push_back(SinkBlock);

// Create FlowJump for each jump between basic blocks in the binary function.
std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
for (const BinaryBasicBlock *SrcBB : BlockOrder) {
std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
Expand All @@ -342,6 +353,16 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
InDegree[Jump.Target]++;
UniqueSuccs.insert(DstBB);
}
// TODO: set jump from exit block to landing pad to Unlikely.
// If the block is an exit, add a dummy edge from it to the sink block.
if (UniqueSuccs.empty()) {
Func.Jumps.emplace_back();
FlowJump &Jump = Func.Jumps.back();
Jump.Source = SrcBB->getIndex() + 1;
Jump.Target = Func.Blocks.size() - 1;
InDegree[Jump.Target]++;
}

// Collect jumps to landing pads
for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
// Ignoring parallel edges
Expand All @@ -358,9 +379,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
}

// Add dummy edges to the extra sources. If there are multiple entry blocks,
// add an unlikely edge from 0 to the subsequent ones
// add an unlikely edge from 0 to the subsequent ones. Skips the sink block.
assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
for (uint64_t I = 1; I < Func.Blocks.size(); I++) {
for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) {
const BinaryBasicBlock *BB = BlockOrder[I - 1];
if (BB->isEntryPoint() || InDegree[I] == 0) {
Func.Jumps.emplace_back();
Expand Down Expand Up @@ -391,11 +412,10 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
/// of the basic blocks in the binary, the count is "matched" to the block.
/// Similarly, if both the source and the target of a count in the profile are
/// matched to a jump in the binary, the count is recorded in CFG.
void matchWeightsByHashes(BinaryContext &BC,
const BinaryFunction::BasicBlockOrderType &BlockOrder,
const yaml::bolt::BinaryFunctionProfile &YamlBF,
FlowFunction &Func) {
assert(Func.Blocks.size() == BlockOrder.size() + 1);
size_t matchWeightsByHashes(
BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
assert(Func.Blocks.size() == BlockOrder.size() + 2);

std::vector<FlowBlock *> Blocks;
std::vector<BlendedBlockHash> BlendedHashes;
Expand Down Expand Up @@ -500,6 +520,8 @@ void matchWeightsByHashes(BinaryContext &BC,
Block.HasUnknownWeight = false;
Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
}

return MatchedBlocks.size();
}

/// The function finds all blocks that are (i) reachable from the Entry block
Expand Down Expand Up @@ -575,13 +597,19 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
/// Decide if stale profile matching can be applied for a given function.
/// Currently we skip inference for (very) large instances and for instances
/// having "unexpected" control flow (e.g., having no sink basic blocks).
bool canApplyInference(const FlowFunction &Func) {
bool canApplyInference(const FlowFunction &Func,
const yaml::bolt::BinaryFunctionProfile &YamlBF,
const uint64_t &MatchedBlocks) {
if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
return false;

bool HasExitBlocks = llvm::any_of(
Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
if (!HasExitBlocks)
if (MatchedBlocks * 100 <
opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
return false;

// Returns false if the artificial sink block has no predecessors meaning
// there are no exit blocks.
if (Func.Blocks[Func.Blocks.size() - 1].isEntry())
return false;

return true;
Expand Down Expand Up @@ -618,7 +646,7 @@ void assignProfile(BinaryFunction &BF,
FlowFunction &Func) {
BinaryContext &BC = BF.getBinaryContext();

assert(Func.Blocks.size() == BlockOrder.size() + 1);
assert(Func.Blocks.size() == BlockOrder.size() + 2);
for (uint64_t I = 0; I < BlockOrder.size(); I++) {
FlowBlock &Block = Func.Blocks[I + 1];
BinaryBasicBlock *BB = BlockOrder[I];
Expand All @@ -640,6 +668,9 @@ void assignProfile(BinaryFunction &BF,
if (Jump->Flow == 0)
continue;

// Skips the artificial sink block.
if (Jump->Target == Func.Blocks.size() - 1)
continue;
BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
// Check if the edge corresponds to a regular jump or a landing pad
if (BB->getSuccessor(SuccBB.getLabel())) {
Expand Down Expand Up @@ -725,18 +756,21 @@ bool YAMLProfileReader::inferStaleProfile(
const BinaryFunction::BasicBlockOrderType BlockOrder(
BF.getLayout().block_begin(), BF.getLayout().block_end());

// Tracks the number of matched blocks.

// Create a wrapper flow function to use with the profile inference algorithm.
FlowFunction Func = createFlowFunction(BlockOrder);

// Match as many block/jump counts from the stale profile as possible
matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
size_t MatchedBlocks =
matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);

// Adjust the flow function by marking unreachable blocks Unlikely so that
// they don't get any counts assigned.
preprocessUnreachableBlocks(Func);

// Check if profile inference can be applied for the instance.
if (!canApplyInference(Func))
if (!canApplyInference(Func, YamlBF, MatchedBlocks))
return false;

// Apply the profile inference algorithm.
Expand Down
16 changes: 7 additions & 9 deletions bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {

/// Handle alternative instruction info from .altinstructions.
Error readAltInstructions();
void processAltInstructionsPostCFG();
Error tryReadAltInstructions(uint32_t AltInstFeatureSize,
bool AltInstHasPadLen, bool ParseOnly);
Error rewriteAltInstructions();

/// Read .pci_fixup
Error readPCIFixupTable();
Expand Down Expand Up @@ -326,6 +326,8 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = processORCPostCFG())
return E;

processAltInstructionsPostCFG();

return Error::success();
}

Expand All @@ -335,9 +337,6 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = rewriteExceptionTable())
return E;

if (Error E = rewriteAltInstructions())
return E;

if (Error E = rewriteParaInstructions())
return E;

Expand Down Expand Up @@ -1486,12 +1485,11 @@ Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
return Error::success();
}

Error LinuxKernelRewriter::rewriteAltInstructions() {
// Disable output of functions with alt instructions before the rewrite
// support is complete.
void LinuxKernelRewriter::processAltInstructionsPostCFG() {
// Disable optimization and output of functions with alt instructions before
// the rewrite support is complete. Alt instructions can modify the control
// flow, hence we may end up deleting seemingly unreachable code.
skipFunctionsWithAnnotation("AltInst");

return Error::success();
}

/// When the Linux kernel needs to handle an error associated with a given PCI
Expand Down
Loading

0 comments on commit b309613

Please sign in to comment.