Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIE] Solve it. Work In Progress adding a solver-based postpipeliner #255

Draft
wants to merge 4 commits into
base: aie-public
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/cmake/caches/Peano-AIE.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ if(LLVM_BUILD_LLVM_DYLIB)
list(APPEND _llvm_distribution_components LLVM clang-cpp)
endif()

option(LLVM_ENABLE_Z3_SOLVER "" ON)

# there's some bug here where if you list(APPEND ...) to a CACHE variable
# it doesn't work (neither libLLVM nor clang-cpp were being successfully installed)
set(LLVM_DISTRIBUTION_COMPONENTS ${_llvm_distribution_components} CACHE STRING "")
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,7 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
// But first try SWP
if (BS.getRegions().size() == 1) {
auto &PostSWP = BS.getPostSWP();
PostSWP.setUseSolver(true);
if (PostSWP.canAccept(*BS.TheBlock)) {
BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock);
return BS.FixPoint.Stage = SchedulingStage::Pipelining;
Expand Down
124 changes: 123 additions & 1 deletion llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "AIEPostPipeliner.h"
#include "AIESWPSolver.h"
#include "AIESlotCounts.h"
#include "Utils/AIELoopUtils.h"
#include "llvm/CodeGen/ScheduleDAG.h"
Expand All @@ -23,6 +24,7 @@
#define DEBUG_FULL(X) DEBUG_WITH_TYPE("postpipeliner-full", X)

namespace llvm::AIE {
using namespace Solver;

static cl::opt<int>
Heuristic("aie-postpipeliner-heuristic",
Expand Down Expand Up @@ -59,6 +61,8 @@ class PostPipelineDumper : public PipelineScheduleVisitor {
PostPipeliner::PostPipeliner(const AIEHazardRecognizer &HR, int NInstr)
: HR(HR), NInstr(NInstr) {}

void PostPipeliner::setUseSolver(bool Value) { UseSolver = Value; }

bool PostPipeliner::canAccept(MachineBasicBlock &LoopBlock) {
// We leave the single-block loop criterion to our caller. It is fulfilled
// by being a loopaware scheduling candidate.
Expand Down Expand Up @@ -115,11 +119,15 @@ bool PostPipeliner::canAccept(MachineBasicBlock &LoopBlock) {
return true;
}

static SlotCounts getSlotCounts(MachineInstr &MI, const AIEBaseInstrInfo *TII) {
static uint64_t getSlotSet(MachineInstr &MI, const AIEBaseInstrInfo *TII) {
auto *SlotInfo = TII->getSlotInfo(TII->getSlotKind(MI.getOpcode()));
return SlotInfo ? SlotInfo->getSlotSet() : 0;
}

static SlotCounts getSlotCounts(MachineInstr &MI, const AIEBaseInstrInfo *TII) {
return SlotCounts{getSlotSet(MI, TII)};
}

int PostPipeliner::getResMII(MachineBasicBlock &LoopBlock) {
// Add up all slot requirements and return the maximum slot count
SlotCounts Counts;
Expand Down Expand Up @@ -668,6 +676,10 @@ bool PostPipeliner::tryHeuristics() {

DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n");

if (solve(MinLength / II)) {
return true;
}

int HeuristicIndex = 0;
for (auto &[ExtraStages, TopDown, Rerun, Components] : Strategies) {
if (Heuristic >= 0 && Heuristic != HeuristicIndex++) {
Expand Down Expand Up @@ -702,6 +714,116 @@ bool PostPipeliner::tryHeuristics() {
return false;
}

// This is a strategy that follows a pre-computed schedule. it picks
// instructions in the order of the final schedule and nudges earliest and
// latest so as to have no slack.
// It still checks latencies and resources
class FixedStrategy : public PostPipelinerStrategy {
std::vector<int> Schedule;
// We schedule in strict top-down order, and we leave only one cycle
// to schedule it in.
bool better(const SUnit &A, const SUnit &B) override {
if (Schedule[A.NodeNum] < Schedule[B.NodeNum]) {
return true;
}
return false;
}
int earliest(const SUnit &N) override {
int Result = PostPipelinerStrategy::earliest(N);
unsigned NodeNum = N.NodeNum;
if (NodeNum < Schedule.size()) {
Result = std::max(Result, Schedule[NodeNum]);
}
return Result;
}
int latest(const SUnit &N) override {
int Result = PostPipelinerStrategy::latest(N);
unsigned NodeNum = N.NodeNum;
if (NodeNum < Schedule.size()) {
Result = std::min(Result, Schedule[NodeNum]);
}
return Result;
}

public:
FixedStrategy(ScheduleDAGInstrs &DAG, std::vector<NodeInfo> &Info, int Length,
std::vector<int> Schedule)
: PostPipelinerStrategy(DAG, Info, Length), Schedule(Schedule) {}
std::string name() override { return "FixedStrategy"; }
};

bool PostPipeliner::solve(int NS) {
if (!UseSolver) {
return false;
}

Z3BinarySolver Solver;
for (int N = 0; N < NInstr; N++) {
SUnit &SU = DAG->SUnits[N];
auto *MI = SU.getInstr();
auto SlotSet = getSlotSet(*MI, TII);

// We assume we only have one slot bit
auto GetBit = [](uint64_t SlotSet) {
assert(SlotSet);
int SlotNo = 1;
while (!(SlotSet & 1)) {
SlotNo++;
SlotSet >>= 1;
}
assert(SlotSet == 1);
return SlotNo;
};
uint64_t MemoryBanks = HR.getMemoryBanks(MI);
unsigned Id = Solver.addInsn(GetBit(SlotSet), MemoryBanks);
assert(Id == SU.NodeNum);
for (auto Dep : SU.Preds) {
int From = Dep.getSUnit()->NodeNum;
if (From < NInstr) {
Solver.addLatency(From, N, Dep.getSignedLatency());
}
}
}

// Add loop-carried dependences to future iterations. The iteration
// distance is taken into account
for (int N = 0; N < NInstr; N++) {
SUnit &SU = DAG->SUnits[N];
for (auto Dep : SU.Succs) {
if (Dep.getKind() != SDep::Data) {
// continue;
}
int To = Dep.getSUnit()->NodeNum;
if (To >= NInstr && To % NInstr != N) {
Solver.addLatency(N, To % NInstr, Dep.getSignedLatency(), To / NInstr);
}
}
}

Solver.setScheduleSize(II, NS);
Solver.genModel();
if (!Solver.solveModel()) {
// Note: If we can't solve it, it doesn't mean the II isn't feasible,
// so we don't need to avoid running the heuristics.
return false;
}
auto Schedule = Solver.getCycles();
DEBUG_SUMMARY(dbgs() << "Solver found "; for (auto C
: Schedule) dbgs()
<< C << ", ";
dbgs() << "\n";);
FixedStrategy S{*DAG, Info, II * 3, Schedule};
resetSchedule(/*FullReset=*/true);
DEBUG_SUMMARY(dbgs() << "--- Strategy " << S.name() << "\n");
if (scheduleFirstIteration(S) && scheduleOtherIterations()) {
DEBUG_SUMMARY(dbgs() << " Strategy " << S.name() << " found II=" << II
<< "\n");
return true;
}

return false;
}

bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval) {
NTotalInstrs = TheDAG.SUnits.size();
assert(NTotalInstrs % NInstr == 0);
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AIE/AIEPostPipeliner.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ class PostPipeliner {
// The instruction defining the tripcount
MachineInstr *TripCountDef = nullptr;

bool UseSolver = false;

// Basic modulo scheduling parameters
int NInstr;
int NCopies;
Expand Down Expand Up @@ -187,6 +189,9 @@ class PostPipeliner {
// this length will be a multiple of the InitiationInterval
int computeMinScheduleLength() const;

// try to find a solution using a solver
bool solve(int NS);

/// Try all heuristics, stop at the first that fits the II
/// If it returns true, a valid schedule is laid down in Info.
bool tryHeuristics();
Expand All @@ -210,6 +215,10 @@ class PostPipeliner {
public:
PostPipeliner(const AIEHazardRecognizer &HR, int NInstr);

// Specify whether to use a solver. Maybe for -O3, or pragma driven.
// Default is off
void setUseSolver(bool Value);

/// Check whether this is a suitable loop for the PostPipeliner. It also
/// leaves some useful information.
bool canAccept(MachineBasicBlock &LoopBlock);
Expand Down
Loading
Loading