Skip to content

Commit

Permalink
Merge pull request #16 from yomaytk/improve
Browse files Browse the repository at this point in the history
improve the lifted LLVM IR.
  • Loading branch information
yomaytk authored Feb 27, 2024
2 parents 66bce25 + 4dd572c commit 6e748d6
Show file tree
Hide file tree
Showing 29 changed files with 352 additions and 438 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,5 @@ obj-intel64/*
VERSION

examples/w2c
examples/tests
examples/tests
examples/mnist-neural-network-plain-c
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ echo "deb-src http://apt.llvm.org/${DISTRO_NAME}/ llvm-toolchain-${DISTRO_NAME}-
# several install
RUN apt update
RUN apt install -qqy --no-install-recommends file libtinfo-dev libzstd-dev python3-pip python3-setuptools python-setuptools python3 build-essential \
clang-${LLVM_VERSION} lld-${LLVM_VERSION} ninja-build pixz xz-utils make rpm curl unzip tar git zip pkg-config vim \
clang-${LLVM_VERSION} lld-${LLVM_VERSION} llvm-${LLVM_VERSION} ninja-build pixz xz-utils make rpm curl unzip tar git zip pkg-config vim \
libc6-dev liblzma-dev zlib1g-dev libselinux1-dev libbsd-dev ccache binutils-dev libelf-dev && \
apt upgrade --yes && apt clean --yes && \
rm -rf /var/lib/apt/lists/*
Expand Down
5 changes: 5 additions & 0 deletions backend/remill/include/remill/Arch/Runtime/Definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@
ALWAYS_INLINE __attribute__((flatten)) static Memory *name(Memory *memory, State &state, \
##__VA_ARGS__)

// Define a DEF_SEM that returns uint64_t.
#define DEF_SEM_RETU64(name, ...) \
ALWAYS_INLINE __attribute__((flatten)) static uint64_t name(Memory *memory, State &state, \
##__VA_ARGS__)

template <typename R, typename... Args>
inline static constexpr auto Specialize(R (*)(Args...), R (*b)(Args...)) -> R (*)(Args...) {
return b;
Expand Down
4 changes: 2 additions & 2 deletions backend/remill/include/remill/Arch/Runtime/Intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ extern "C" {

[[gnu::used]] extern Memory *__remill_read_memory_f80(Memory *, addr_t, native_float80_t &);

[[gnu::used]] extern float128_t __remill_read_memory_f128(Memory *, addr_t);
[[gnu::used, gnu::const]] extern float128_t __remill_read_memory_f128(Memory *, addr_t);

[[gnu::used, gnu::const]] extern Memory *__remill_write_memory_f32(Memory *, addr_t, float32_t);

[[gnu::used, gnu::const]] extern Memory *__remill_write_memory_f64(Memory *, addr_t, float64_t);

[[gnu::used]] extern Memory *__remill_write_memory_f80(Memory *, addr_t, const native_float80_t &);

[[gnu::used]] extern Memory *__remill_write_memory_f128(Memory *, addr_t, float128_t);
[[gnu::used, gnu::const]] extern Memory *__remill_write_memory_f128(Memory *, addr_t, float128_t);

[[gnu::used, gnu::const]] extern uint8_t __remill_undefined_8(void);

Expand Down
1 change: 1 addition & 0 deletions backend/remill/include/remill/BC/TraceLifter.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ class TraceLifter::Impl {
llvm::BasicBlock *indirectbr_block;
llvm::SwitchInst *switch_inst;
std::map<uint64_t, llvm::BasicBlock *> lifted_block_map;
std::vector<std::pair<llvm::BasicBlock *, llvm::Value *>> br_blocks;
bool lift_all_insn;
const size_t max_inst_bytes;
std::string inst_bytes;
Expand Down
24 changes: 14 additions & 10 deletions backend/remill/include/remill/BC/Util.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,20 @@ void InitFunctionAttributes(llvm::Function *F);

// Create a call from one lifted function to another.
llvm::CallInst *AddCall(llvm::IRBuilder<> &builder, llvm::BasicBlock *source_block,
llvm::Value *dest_func, const IntrinsicTable &intrinsics);
llvm::Value *dest_func, const IntrinsicTable &intrinsics,
llvm::Value *pc_value = nullptr);

llvm::CallInst *AddCall(llvm::BasicBlock *source_block, llvm::Value *dest_func,
const IntrinsicTable &intrinsics);
const IntrinsicTable &intrinsics, llvm::Value *pc_value = nullptr);

// Create a tail-call from one lifted function to another.
llvm::CallInst *AddTerminatingTailCall(llvm::Function *source_func, llvm::Value *dest_func,
const IntrinsicTable &intrinsics);
const IntrinsicTable &intrinsics,
llvm::Value *pc_value = nullptr);

llvm::CallInst *AddTerminatingTailCall(llvm::BasicBlock *source_block, llvm::Value *dest_func,
const IntrinsicTable &intrinsics);
const IntrinsicTable &intrinsics,
llvm::Value *pc_value = nullptr);

// Find a local variable defined in the entry block of the function. We use
// this to find register variables.
Expand Down Expand Up @@ -138,12 +141,6 @@ llvm::Value *LoadMemoryPointerRef(llvm::BasicBlock *block);
/* Return a reference to the indirect br addr ref. */
llvm::Value *LoadIndirectBrAddrRef(llvm::BasicBlock *block);

/* Return a reference to the vma start address */
llvm::Value *LoadVMASRef(llvm::BasicBlock *block);

/* Return a reference to the vma end address */
llvm::Value *LoadVMAERef(llvm::BasicBlock *block);

// Return an `llvm::Value *` that is an `i1` (bool type) representing whether
// or not a conditional branch is taken.
llvm::Value *LoadBranchTaken(llvm::IRBuilder<> &builder);
Expand Down Expand Up @@ -210,6 +207,13 @@ llvm::Argument *NthArgument(llvm::Function *func, size_t index);
std::array<llvm::Value *, kNumBlockArgs> LiftedFunctionArgs(llvm::BasicBlock *block,
const IntrinsicTable &intrinsics);

// Return a vector of arguments to pass to a lifted function, where the
// arguments are derived from `block`.
// this function uses constant program counter instead of loading `NEXT_PC`
std::array<llvm::Value *, kNumBlockArgs>
LiftedFunctionArgsWithPCValue(llvm::BasicBlock *block, const IntrinsicTable &intrinsics,
llvm::Value *pc_value);

// Serialize an LLVM object into a string.
std::string LLVMThingToString(llvm::Value *thing);
std::string LLVMThingToString(llvm::Type *thing);
Expand Down
92 changes: 57 additions & 35 deletions backend/remill/lib/Arch/AArch64/Arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,28 +827,28 @@ bool AArch64Arch::ArchDecodeInstruction(uint64_t address, std::string_view inst_
}

// Control flow operands update the next program counter.
if (inst.IsControlFlow()) {
inst.operands.emplace_back();
auto &dst_ret_pc = inst.operands.back();
dst_ret_pc.type = Operand::kTypeRegister;
dst_ret_pc.action = Operand::kActionWrite;
dst_ret_pc.size = address_size;
dst_ret_pc.reg.name = "NEXT_PC";
dst_ret_pc.reg.size = address_size;
}
// if (inst.IsControlFlow()) {
// inst.operands.emplace_back();
// auto &dst_ret_pc = inst.operands.back();
// dst_ret_pc.type = Operand::kTypeRegister;
// dst_ret_pc.action = Operand::kActionWrite;
// dst_ret_pc.size = address_size;
// dst_ret_pc.reg.name = "NEXT_PC";
// dst_ret_pc.reg.size = address_size;
// }

// The semantics will store the return address in `RETURN_PC`. This is to
// help synchronize program counters when lifting instructions on an ISA
// with delay slots.
if (inst.IsFunctionCall()) {
inst.operands.emplace_back();
auto &dst_ret_pc = inst.operands.back();
dst_ret_pc.type = Operand::kTypeRegister;
dst_ret_pc.action = Operand::kActionWrite;
dst_ret_pc.size = address_size;
dst_ret_pc.reg.name = "RETURN_PC";
dst_ret_pc.reg.size = address_size;
}
// if (inst.IsFunctionCall()) {
// inst.operands.emplace_back();
// auto &dst_ret_pc = inst.operands.back();
// dst_ret_pc.type = Operand::kTypeRegister;
// dst_ret_pc.action = Operand::kActionWrite;
// dst_ret_pc.size = address_size;
// dst_ret_pc.reg.name = "RETURN_PC";
// dst_ret_pc.reg.size = address_size;
// }

return true;
}
Expand Down Expand Up @@ -2884,48 +2884,67 @@ bool TryDecodeSBCS_64_ADDSUB_CARRY(const InstData &data, Instruction &inst) {
return TryDecodeSBC_64_ADDSUB_CARRY(data, inst);
}

static bool TryDecodeUCVTF_Un_FLOAT2INT(const InstData &data, Instruction &inst,
RegClass dest_class, RegClass src_class) {
AddRegOperand(inst, kActionWrite, dest_class, kUseAsValue, data.Rd);
AddRegOperand(inst, kActionRead, src_class, kUseAsValue, data.Rn);
return true;
}

// UCVTF <Hd>, <Wn>
bool TryDecodeUCVTF_H32_FLOAT2INT(const InstData &data, Instruction &inst) {
AddRegOperand(inst, kActionWrite, kRegH, kUseAsValue, data.Rd);
AddRegOperand(inst, kActionRead, kRegW, kUseAsValue, data.Rn);
TryDecodeUCVTF_Un_FLOAT2INT(data, inst, kRegH, kRegW);
return true;
}

// UCVTF <Sd>, <Wn>
bool TryDecodeUCVTF_S32_FLOAT2INT(const InstData &data, Instruction &inst) {
AddRegOperand(inst, kActionWrite, kRegS, kUseAsValue, data.Rd);
AddRegOperand(inst, kActionRead, kRegW, kUseAsValue, data.Rn);
TryDecodeUCVTF_Un_FLOAT2INT(data, inst, kRegS, kRegW);
return true;
}

// UCVTF <Dd>, <Wn>
bool TryDecodeUCVTF_D32_FLOAT2INT(const InstData &data, Instruction &inst) {
AddRegOperand(inst, kActionWrite, kRegD, kUseAsValue, data.Rd);
AddRegOperand(inst, kActionRead, kRegW, kUseAsValue, data.Rn);
TryDecodeUCVTF_Un_FLOAT2INT(data, inst, kRegD, kRegW);
return true;
}

// UCVTF <Hd>, <Xn>
bool TryDecodeUCVTF_H64_FLOAT2INT(const InstData &data, Instruction &inst) {
AddRegOperand(inst, kActionWrite, kRegH, kUseAsValue, data.Rd);
AddRegOperand(inst, kActionRead, kRegX, kUseAsValue, data.Rn);
TryDecodeUCVTF_Un_FLOAT2INT(data, inst, kRegH, kRegX);

return true;
}

// UCVTF <Sd>, <Xn>
bool TryDecodeUCVTF_S64_FLOAT2INT(const InstData &data, Instruction &inst) {
AddRegOperand(inst, kActionWrite, kRegS, kUseAsValue, data.Rd);
AddRegOperand(inst, kActionRead, kRegX, kUseAsValue, data.Rn);
TryDecodeUCVTF_Un_FLOAT2INT(data, inst, kRegS, kRegX);
return true;
}

// UCVTF <Dd>, <Xn>
bool TryDecodeUCVTF_D64_FLOAT2INT(const InstData &data, Instruction &inst) {
AddRegOperand(inst, kActionWrite, kRegD, kUseAsValue, data.Rd);
AddRegOperand(inst, kActionRead, kRegX, kUseAsValue, data.Rn);
TryDecodeUCVTF_Un_FLOAT2INT(data, inst, kRegD, kRegX);
return true;
}

// UCVTF <V><d>, <V><n>
bool TryDecodeUCVTF_ASISDMISC_R(const InstData &data, Instruction &inst) {
if (1 == data.sz)
inst.function += "_64";
else
inst.function += "_32";
return TryDecodeUCVTF_Un_FLOAT2INT(data, inst, kRegV, kRegV);
return true;
}

// FRINTA <Dd>, <Dn>
bool TryDecodeFRINTA_D_FLOATDP1(const InstData &data, Instruction &inst) {
// AddRegOperand(inst, kActionWrite, kRegD, kUseAsValue, data.Rd);
// AddRegOperand(inst, kActionRead, kRegD, kUseAsValue, data.Rn);
return false;
}

bool IsUnallocatedFloatEncoding(const InstData &data) {

// when type `10` UnallocatedEncoding()
Expand Down Expand Up @@ -3514,6 +3533,11 @@ static bool TryDecodeSTR_Vn_LDST_REGOFF(const InstData &data, Instruction &inst,
return true;
}

// STR <St>, [<Xn|SP>, (<Wm>|<Xm>){, <extend> {<amount>}}]
bool TryDecodeSTR_S_LDST_REGOFF(const InstData &data, Instruction &inst) {
return TryDecodeSTR_Vn_LDST_REGOFF(data, inst, kRegS);
}

// STR <Qt>, [<Xn|SP>, (<Wm>|<Xm>){, <extend> {<amount>}}]
bool TryDecodeSTR_Q_LDST_REGOFF(const InstData &data, Instruction &inst) {
return TryDecodeSTR_Vn_LDST_REGOFF(data, inst, kRegQ);
Expand Down Expand Up @@ -4536,13 +4560,11 @@ bool TryDecodeSCVTF_D64_FLOAT2INT(const InstData &data, Instruction &inst) {

// SCVTF <V><d>, <V><n>
bool TryDecodeSCVTF_ASISDMISC_R(const InstData &data, Instruction &inst) {
if (1 == data.sz) {
if (1 == data.sz)
inst.function += "_64";
return TryDecodeSCVTF_Sn_FLOAT2INT(data, inst, kRegD, kRegD);
} else {
else
inst.function += "_32";
return TryDecodeSCVTF_Sn_FLOAT2INT(data, inst, kRegS, kRegS);
}
return TryDecodeSCVTF_Sn_FLOAT2INT(data, inst, kRegV, kRegV);
}

// BIC <Vd>.<T>, <Vn>.<T>, <Vm>.<T>
Expand Down
114 changes: 0 additions & 114 deletions backend/remill/lib/Arch/AArch64/Decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3344,44 +3344,6 @@ bool TryDecodeUCVTF_ASISDMISCFP16_R(const InstData &, Instruction &) {
return false;
}

// UCVTF UCVTF_asisdmisc_R:
// 0 x Rd 0
// 1 x Rd 1
// 2 x Rd 2
// 3 x Rd 3
// 4 x Rd 4
// 5 x Rn 0
// 6 x Rn 1
// 7 x Rn 2
// 8 x Rn 3
// 9 x Rn 4
// 10 0
// 11 1
// 12 1 opcode 0
// 13 0 opcode 1
// 14 1 opcode 2
// 15 1 opcode 3
// 16 1 opcode 4
// 17 0
// 18 0
// 19 0
// 20 0
// 21 1
// 22 x sz 0
// 23 0
// 24 0
// 25 1
// 26 1
// 27 1
// 28 1
// 29 1 U 0
// 30 1
// 31 0
// UCVTF <V><d>, <V><n>
bool TryDecodeUCVTF_ASISDMISC_R(const InstData &, Instruction &) {
return false;
}

// UCVTF UCVTF_asimdmiscfp16_R:
// 0 x Rd 0
// 1 x Rd 1
Expand Down Expand Up @@ -35455,44 +35417,6 @@ bool TryDecodeFRINTA_S_FLOATDP1(const InstData &, Instruction &) {
return false;
}

// FRINTA FRINTA_D_floatdp1:
// 0 x Rd 0
// 1 x Rd 1
// 2 x Rd 2
// 3 x Rd 3
// 4 x Rd 4
// 5 x Rn 0
// 6 x Rn 1
// 7 x Rn 2
// 8 x Rn 3
// 9 x Rn 4
// 10 0
// 11 0
// 12 0
// 13 0
// 14 1
// 15 0 rmode 0
// 16 0 rmode 1
// 17 1 rmode 2
// 18 1
// 19 0
// 20 0
// 21 1
// 22 1 type 0
// 23 0 type 1
// 24 0
// 25 1
// 26 1
// 27 1
// 28 1
// 29 0 S 0
// 30 0
// 31 0 M 0
// FRINTA <Dd>, <Dn>
bool TryDecodeFRINTA_D_FLOATDP1(const InstData &, Instruction &) {
return false;
}

// FCSEL FCSEL_H_floatsel:
// 0 x Rd 0
// 1 x Rd 1
Expand Down Expand Up @@ -40243,44 +40167,6 @@ bool TryDecodeSTR_H_LDST_REGOFF(const InstData &, Instruction &) {
return false;
}

// STR STR_S_ldst_regoff:
// 0 x Rt 0
// 1 x Rt 1
// 2 x Rt 2
// 3 x Rt 3
// 4 x Rt 4
// 5 x Rn 0
// 6 x Rn 1
// 7 x Rn 2
// 8 x Rn 3
// 9 x Rn 4
// 10 0
// 11 1
// 12 x S 0
// 13 x option 0
// 14 x option 1
// 15 x option 2
// 16 x Rm 0
// 17 x Rm 1
// 18 x Rm 2
// 19 x Rm 3
// 20 x Rm 4
// 21 1
// 22 0 opc 0
// 23 0 opc 1
// 24 0
// 25 0
// 26 1 V 0
// 27 1
// 28 1
// 29 1
// 30 0 size 0
// 31 1 size 1
// STR <St>, [<Xn|SP>, (<Wm>|<Xm>){, <extend> {<amount>}}]
bool TryDecodeSTR_S_LDST_REGOFF(const InstData &, Instruction &) {
return false;
}

// LDCLRAB LDCLRAB_32_memop:
// 0 x Rt 0
// 1 x Rt 1
Expand Down
Loading

0 comments on commit 6e748d6

Please sign in to comment.