From 44bbdb8a814ea5a06a12d7b9ed5a934afad6aaaa Mon Sep 17 00:00:00 2001
From: Bharadwaj Yadavalli <Bharadwaj.Yadavalli@microsoft.com>
Date: Sun, 12 Sep 2021 22:33:53 -0400
Subject: [PATCH] [X86-64] Add support to raise IDIV8[r/m]/DIV8[r/m] to
 complete support raising signed and unsigned division instructions.

Clean up raising of division instructions.

[Tests] Add tests to verify support added.
---
 X86/X86AdditionalInstrInfo.cpp           |   8 +-
 X86/X86MachineInstructionRaiser.cpp      | 276 ++++++++++++-----------
 X86/X86MachineInstructionRaiserUtils.cpp |  34 ++-
 X86/X86RaisedValueTracker.cpp            |  23 +-
 X86/X86RaisedValueTracker.h              |   7 +-
 test/asm_test/X86/raise-divxxm.c         |  60 ++++-
 test/asm_test/X86/raise-divxxr.c         |  36 +++
 test/smoke_test/division-tests.c         |  85 ++++++-
 test/smoke_test/wrapper-calls.c          |   2 +-
 9 files changed, 373 insertions(+), 158 deletions(-)
 create mode 100644 test/asm_test/X86/raise-divxxr.c

diff --git a/X86/X86AdditionalInstrInfo.cpp b/X86/X86AdditionalInstrInfo.cpp
index 9f4c4ac4..3f781ba7 100644
--- a/X86/X86AdditionalInstrInfo.cpp
+++ b/X86/X86AdditionalInstrInfo.cpp
@@ -629,13 +629,13 @@ static constexpr const_addl_instr_info::value_type mapdata[] = {
     {X86::DEC8m, {1, INPLACE_MEM_OP}},
     {X86::DEC8r, {0, BINARY_OP_WITH_IMM}},
     {X86::DIV16m, {2, DIVIDE_MEM_OP}},
-    {X86::DIV16r, {0, Unknown}},
+    {X86::DIV16r, {0, DIVIDE_REG_OP}},
     {X86::DIV32m, {4, DIVIDE_MEM_OP}},
     {X86::DIV32r, {0, DIVIDE_REG_OP}},
     {X86::DIV64m, {8, DIVIDE_MEM_OP}},
-    {X86::DIV64r, {0, Unknown}},
-    {X86::DIV8m, {0, Unknown}},
-    {X86::DIV8r, {0, Unknown}},
+    {X86::DIV64r, {0, DIVIDE_REG_OP}},
+    {X86::DIV8m, {1, DIVIDE_MEM_OP}},
+    {X86::DIV8r, {0, DIVIDE_REG_OP}},
     {X86::DIVPDrm, {0, Unknown}},
     {X86::DIVPDrr, {0, Unknown}},
     {X86::DIVPSrm, {0, Unknown}},
diff --git a/X86/X86MachineInstructionRaiser.cpp b/X86/X86MachineInstructionRaiser.cpp
index 90da0cd1..3722a5d3 100644
--- a/X86/X86MachineInstructionRaiser.cpp
+++ b/X86/X86MachineInstructionRaiser.cpp
@@ -1497,7 +1497,8 @@ bool X86MachineInstructionRaiser::raiseBinaryOpMemToRegInstr(
   // Cast DestValue to the DestopTy, as for single-precision FP ops
   // DestValue type and DestopTy might be different.
   if (isSSE2Reg(DestPReg)) {
-    DestValue = getRaisedValues()->reinterpretSSERegValue(DestValue, DestopTy, RaisedBB);
+    DestValue = getRaisedValues()->reinterpretSSERegValue(DestValue, DestopTy,
+                                                          RaisedBB);
   } else {
     DestValue = getRaisedValues()->castValue(DestValue, DestopTy, RaisedBB);
   }
@@ -2301,7 +2302,7 @@ bool X86MachineInstructionRaiser::raiseInplaceMemOpInstr(const MachineInstr &MI,
   return true;
 }
 
-// Raise idiv instruction with memory reference value
+// Raise signed or unsigned division instruction with memory reference value
 bool X86MachineInstructionRaiser::raiseDivideFromMemInstr(
     const MachineInstr &MI, Value *MemRefValue) {
   LLVMContext &Ctx(MF.getFunction().getContext());
@@ -2314,7 +2315,8 @@ bool X86MachineInstructionRaiser::raiseDivideFromMemInstr(
   return raiseDivideInstr(MI, SrcValue);
 }
 
-// Raise idiv instruction with source operand with value SrcValue.
+// Raise signed or unsigned division instruction with source operand with value
+// SrcValue.
 bool X86MachineInstructionRaiser::raiseDivideInstr(const MachineInstr &MI,
                                                    Value *SrcValue) {
   const MCInstrDesc &MIDesc = MI.getDesc();
@@ -2324,125 +2326,145 @@ bool X86MachineInstructionRaiser::raiseDivideInstr(const MachineInstr &MI,
   // Raised instruction is added to this BasicBlock.
   BasicBlock *RaisedBB = getRaisedBasicBlock(MI.getParent());
 
-  // idiv uses AX(AH:AL or DX:AX or EDX:EAX or RDX:RAX pairs as dividend and
-  // stores the result in the same pair. Additionally, EFLAGS is an implicit
-  // def.
-  assert(MIDesc.getNumImplicitUses() == 2 && MIDesc.getNumImplicitDefs() == 3 &&
-         MIDesc.hasImplicitDefOfPhysReg(X86::EFLAGS) &&
-         "Unexpected number of implicit uses and defs in div instruction");
-  MCPhysReg UseDefReg_0 = MIDesc.ImplicitUses[0];
-  MCPhysReg UseDefReg_1 = MIDesc.ImplicitUses[1];
-  assert((UseDefReg_0 == MIDesc.ImplicitDefs[0]) &&
-         (UseDefReg_1 == MIDesc.ImplicitDefs[1]) &&
-         "Unexpected use/def registers in div instruction");
-
-  bool isSigned = true;
-  switch (MI.getOpcode()) {
-  case X86::DIV16m:
-  case X86::DIV32m:
-  case X86::DIV64m:
-    isSigned = false;
-    break;
-  default:
-    isSigned = true;
-  }
-
-  Value *DividendLowBytes = getPhysRegValue(MI, UseDefReg_0);
-  Value *DividendHighBytes = getPhysRegValue(MI, UseDefReg_1);
-  if ((DividendLowBytes == nullptr) || (DividendHighBytes == nullptr))
-    return false;
-
-  // Divisor is srcValue.
-  // Create a Value representing the dividend.
-  // TODO: Not sure how the implicit use registers of IDIV8m are encode.
-  // Does the instruction have AX as a single use/def register or does it
-  // have 2 use/def registers, viz., AH:AL pair similar to the other IDIV
-  // instructions? Handle it when it is encountered.
-  assert((DividendLowBytes->getType() == DividendHighBytes->getType()) &&
-         "Unexpected types of dividend registers in idiv instruction");
-  unsigned int UseDefRegSize =
-      DividendLowBytes->getType()->getScalarSizeInBits();
-  // Generate the following code
-  // %h = lshl DividendHighBytes, UseDefRegSize
-  // %f = or %h, DividendLowBytes
-  // %quo = idiv %f, srcValue
-  // %rem = irem %f, srcValue
-  // UseDef_0 = %quo
-  // UseDef_1 = %rem
-
-  // Logical Shift left DividendHighBytes by n-bits (where n is the size of
-  // UseDefRegSize) to get the high bytes and set DefReg_1 to the resulting
-  // value.
-  // DoubleTy type is of type twice the use reg size
-  Type *DoubleTy = Type::getIntNTy(Ctx, UseDefRegSize * 2);
-  Value *ShiftAmountVal =
-      ConstantInt::get(DoubleTy, UseDefRegSize, false /* isSigned */);
-  // Cast DividendHighBytes and DividendLowBytes to types with double the
-  // size.
-  CastInst *DividendLowBytesDT = CastInst::Create(
-      CastInst::getCastOpcode(DividendLowBytes, isSigned, DoubleTy, isSigned),
-      DividendLowBytes, DoubleTy);
-  RaisedBB->getInstList().push_back(DividendLowBytesDT);
-
-  CastInst *DividendHighBytesDT = CastInst::Create(
-      CastInst::getCastOpcode(DividendHighBytes, isSigned, DoubleTy, isSigned),
-      DividendHighBytes, DoubleTy);
-  RaisedBB->getInstList().push_back(DividendHighBytesDT);
-
-  Instruction *LShlInst =
-      BinaryOperator::CreateNUWShl(DividendHighBytesDT, ShiftAmountVal);
-  RaisedBB->getInstList().push_back(LShlInst);
-
-  // Combine the dividend values to get full dividend.
-  // or instruction
-  Instruction *FullDividend =
-      BinaryOperator::CreateOr(LShlInst, DividendLowBytesDT);
-  RaisedBB->getInstList().push_back(FullDividend);
-
-  // Cast divisor (srcValue) to double type
-  CastInst *srcValueDT = CastInst::Create(
-      CastInst::getCastOpcode(SrcValue, isSigned, DoubleTy, isSigned), SrcValue,
-      DoubleTy);
-  RaisedBB->getInstList().push_back(srcValueDT);
-
-  // quotient
-  Instruction *QuotientDT = nullptr;
-  if (isSigned)
-    QuotientDT = BinaryOperator::CreateSDiv(FullDividend, srcValueDT);
-  else
-    QuotientDT = BinaryOperator::CreateUDiv(FullDividend, srcValueDT);
-  RaisedBB->getInstList().push_back(QuotientDT);
-
-  // Cast Quotient back to UseDef reg value type
-  CastInst *Quotient = CastInst::Create(
-      CastInst::getCastOpcode(QuotientDT, isSigned, DividendLowBytes->getType(),
-                              isSigned),
-      QuotientDT, DividendLowBytes->getType());
-
-  RaisedBB->getInstList().push_back(Quotient);
-  // Update ssa val of UseDefReg_0
-  raisedValues->setPhysRegSSAValue(UseDefReg_0, MI.getParent()->getNumber(),
-                                   Quotient);
-
-  // remainder
-  Instruction *RemainderDT = nullptr;
-  if (isSigned)
-    RemainderDT = BinaryOperator::CreateSRem(FullDividend, srcValueDT);
-  else
-    RemainderDT = BinaryOperator::CreateURem(FullDividend, srcValueDT);
-  RaisedBB->getInstList().push_back(RemainderDT);
-
-  // Cast RemainderDT back to UseDef reg value type
-  CastInst *Remainder = CastInst::Create(
-      CastInst::getCastOpcode(RemainderDT, isSigned,
-                              DividendHighBytes->getType(), isSigned),
-      RemainderDT, DividendHighBytes->getType());
+  // div uses AX or DX:AX or EDX:EAX or RDX:RAX registers as dividend and
+  // stores the result in AX(AH:AL) or DX:AX or EDX:EAX or RDX:RAX.
+  // Additionally, EFLAGS is an implicit def.
+  bool is8BOperand =
+      (instrNameStartsWith(MI, "IDIV8") || instrNameStartsWith(MI, "DIV8"));
+
+  // Determine if the instruction is a signed division or not
+  bool isSigned = instrNameStartsWith(MI, "IDIV");
+
+  // Handle 8-bit division differentrly as it does not use register pairs.
+  if (is8BOperand) {
+    assert(MIDesc.getNumImplicitUses() == 1 &&
+           MIDesc.getNumImplicitDefs() == 3 &&
+           MIDesc.hasImplicitDefOfPhysReg(X86::EFLAGS) &&
+           "Unexpected number of implicit uses and defs in div instruction");
+    // Ensure AX is the implicit use operand
+    MCPhysReg UseDefReg = MIDesc.ImplicitUses[0];
+    assert((UseDefReg == X86::AX) &&
+           "Expected AX operand of div instruction not found");
+    LLVMContext &Ctx(MF.getFunction().getContext());
+    // This is the instruction result type
+    Type *ResultType = Type::getInt16Ty(Ctx);
+    Value *DividendValue = getPhysRegValue(MI, UseDefReg);
+    // Cast SrcValue and DividendValue to ResultType
+    Value *SrcValue16b =
+        getRaisedValues()->castValue(SrcValue, ResultType, RaisedBB, isSigned);
+    Value *DividendValue16b = getRaisedValues()->castValue(
+        DividendValue, ResultType, RaisedBB, isSigned);
+
+    // quotient
+    auto DivOp = (isSigned) ? Instruction::SDiv : Instruction::UDiv;
+    Instruction *Quotient = BinaryOperator::Create(
+        DivOp, DividendValue16b, SrcValue16b, "div8_q", RaisedBB);
+
+    // remainder
+    auto RemOp = (isSigned) ? Instruction::SRem : Instruction::URem;
+    Instruction *Remainder = BinaryOperator::Create(
+        RemOp, DividendValue16b, SrcValue16b, "div8_r", RaisedBB);
+
+    // Construct a value of ResultType holding quotient in AL and remainder in
+    // AH. Clear high-byte of Quotient
+    Instruction *ALValue = BinaryOperator::CreateAnd(
+        Quotient, ConstantInt::get(ResultType, 0xff), "div8_al", RaisedBB);
+    // Shift Remainder by 8
+    Instruction *AHValue = BinaryOperator::CreateShl(
+        Remainder, ConstantInt::get(ResultType, 8), "div8_ah", RaisedBB);
+    // Construct AX value
+    Instruction *AXValue =
+        BinaryOperator::CreateOr(ALValue, AHValue, "div8_ax", RaisedBB);
+    // CF, OF, SF, ZF, AF and PF flags are undefined. So, no need to generate
+    // code to compute any of the status flags. Update value of UseDefReg
+    raisedValues->setPhysRegSSAValue(UseDefReg, MI.getParent()->getNumber(),
+                                     AXValue);
 
-  RaisedBB->getInstList().push_back(Remainder);
-  // CF, OF, SF, ZF, AF and PF flags are undefined. So, no need to generate code
-  // to compute any of the status flags. Update ssa val of UseDefReg_1
-  raisedValues->setPhysRegSSAValue(UseDefReg_1, MI.getParent()->getNumber(),
-                                   Remainder);
+  } else {
+    assert(MIDesc.getNumImplicitUses() == 2 &&
+           MIDesc.getNumImplicitDefs() == 3 &&
+           MIDesc.hasImplicitDefOfPhysReg(X86::EFLAGS) &&
+           "Unexpected number of implicit uses and defs in div instruction");
+    MCPhysReg UseDefReg_0 = MIDesc.ImplicitUses[0];
+    MCPhysReg UseDefReg_1 = MIDesc.ImplicitUses[1];
+    assert((UseDefReg_0 == MIDesc.ImplicitDefs[0]) &&
+           (UseDefReg_1 == MIDesc.ImplicitDefs[1]) &&
+           "Unexpected use/def registers in div instruction");
+
+    Value *DividendLowBytes = getPhysRegValue(MI, UseDefReg_0);
+    Value *DividendHighBytes = getPhysRegValue(MI, UseDefReg_1);
+    if ((DividendLowBytes == nullptr) || (DividendHighBytes == nullptr))
+      return false;
+
+    // Divisor is srcValue.
+    // Create a Value representing the dividend.
+    assert((DividendLowBytes->getType() == DividendHighBytes->getType()) &&
+           "Unexpected types of dividend registers in div instruction");
+    unsigned int UseDefRegSize =
+        DividendLowBytes->getType()->getScalarSizeInBits();
+    // Generate the following code
+    // %h = lshl DividendHighBytes, UseDefRegSize
+    // %f = or %h, DividendLowBytes
+    // %quo = (s/u)div %f, srcValue
+    // %rem = (s/u)rem %f, srcValue
+    // UseDef_0 = %quo
+    // UseDef_1 = %rem
+
+    // Logical Shift left DividendHighBytes by n-bits (where n is the size of
+    // UseDefRegSize) to get the high bytes and set DefReg_1 to the resulting
+    // value.
+    // DoubleTy type is of type twice the use reg size
+    Type *DoubleTy = Type::getIntNTy(Ctx, UseDefRegSize * 2);
+    Value *ShiftAmountVal =
+        ConstantInt::get(DoubleTy, UseDefRegSize, false /* isSigned */);
+    // Cast DividendHighBytes and DividendLowBytes to types with double the
+    // size.
+    Value *DividendLowBytesDT = getRaisedValues()->castValue(
+        DividendLowBytes, DoubleTy, RaisedBB, isSigned);
+
+    Value *DividendHighBytesDT = getRaisedValues()->castValue(
+        DividendHighBytes, DoubleTy, RaisedBB, isSigned);
+
+    Instruction *LShlInst = BinaryOperator::CreateNUWShl(
+        DividendHighBytesDT, ShiftAmountVal, "div_hb_ls", RaisedBB);
+
+    // Combine the dividend values to get full dividend.
+    // or instruction
+    Instruction *FullDividend = BinaryOperator::CreateOr(
+        LShlInst, DividendLowBytesDT, "dividend", RaisedBB);
+
+    // Cast divisor (srcValue) to double type
+    Value *srcValueDT =
+        getRaisedValues()->castValue(SrcValue, DoubleTy, RaisedBB, isSigned);
+    // quotient
+    auto DivOp = (isSigned) ? Instruction::SDiv : Instruction::UDiv;
+    Instruction *QuotientDT = BinaryOperator::Create(
+        DivOp, FullDividend, srcValueDT, "div_q", RaisedBB);
+
+    // Cast Quotient back to UseDef reg value type
+    Value *Quotient = getRaisedValues()->castValue(
+        QuotientDT, DividendLowBytes->getType(), RaisedBB, isSigned);
+
+    // Update ssa val of UseDefReg_0
+    raisedValues->setPhysRegSSAValue(UseDefReg_0, MI.getParent()->getNumber(),
+                                     Quotient);
+
+    // remainder
+    auto RemOp = (isSigned) ? Instruction::SRem : Instruction::URem;
+    Instruction *RemainderDT = BinaryOperator::Create(
+        RemOp, FullDividend, srcValueDT, "div_r", RaisedBB);
+
+    // Cast RemainderDT back to UseDef reg value type
+    CastInst *Remainder = CastInst::Create(
+        CastInst::getCastOpcode(RemainderDT, isSigned,
+                                DividendHighBytes->getType(), isSigned),
+        RemainderDT, DividendHighBytes->getType(), "", RaisedBB);
+
+    // CF, OF, SF, ZF, AF and PF flags are undefined. So, no need to generate
+    // code to compute any of the status flags. Update value of UseDefReg_1
+    raisedValues->setPhysRegSSAValue(UseDefReg_1, MI.getParent()->getNumber(),
+                                     Remainder);
+  }
 
   return true;
 }
@@ -2632,13 +2654,11 @@ bool X86MachineInstructionRaiser::raiseCompareMachineInstr(
   raisedValues->setEflagBoolean(EFLAGS::CF, MBBNo, false);
   // CmpInst is of type Value * to allow for a potential need to pass it to
   // castValue(), if needed.
-  Value *CmpInst = nullptr;
   // If MI is a test instruction, the compare instruction should be an and
   // instruction.
-  if (isTESTInst)
-    CmpInst = BinaryOperator::CreateAnd(OpValues[0], OpValues[1]);
-  else
-    CmpInst = BinaryOperator::CreateSub(OpValues[0], OpValues[1]);
+  Value *CmpInst = (isTESTInst)
+                       ? BinaryOperator::CreateAnd(OpValues[0], OpValues[1])
+                       : BinaryOperator::CreateSub(OpValues[0], OpValues[1]);
   // Casting CmpInst to instruction to be added to the raised basic
   // block is correct since it is known to be specifically of type Instruction.
   RaisedBB->getInstList().push_back(dyn_cast<Instruction>(CmpInst));
@@ -4062,8 +4082,8 @@ bool X86MachineInstructionRaiser::raiseReturnMachineInstr(
   // Ensure RetValue type match RetType
   if (RetValue != nullptr) {
     if (retReg == X86::XMM0) {
-      RetValue =
-          getRaisedValues()->reinterpretSSERegValue(RetValue, RetType, RaisedBB);
+      RetValue = getRaisedValues()->reinterpretSSERegValue(RetValue, RetType,
+                                                           RaisedBB);
     } else {
       RetValue = getRaisedValues()->castValue(RetValue, RetType, RaisedBB);
     }
diff --git a/X86/X86MachineInstructionRaiserUtils.cpp b/X86/X86MachineInstructionRaiserUtils.cpp
index b9cd4a35..fbcfbf4d 100644
--- a/X86/X86MachineInstructionRaiserUtils.cpp
+++ b/X86/X86MachineInstructionRaiserUtils.cpp
@@ -193,8 +193,8 @@ Value *X86MachineInstructionRaiser::loadMemoryRefValue(
     // content and should not be loaded from.
     if (auto GV = dyn_cast<GlobalVariable>(MemRefValue))
       LoadFromMemrefValue = !(GV->hasInitializer());
-      // If it is not a PC-relative constant expression accessed using
-      // GetElementPtrInst, it is memory content and should not be loaded from.
+    // If it is not a PC-relative constant expression accessed using
+    // GetElementPtrInst, it is memory content and should not be loaded from.
     else {
       const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(MemRefValue);
       if (CExpr != nullptr) {
@@ -1503,8 +1503,8 @@ Function *X86MachineInstructionRaiser::getTargetFunctionAtPLTOffset(
         // This is an undefined function symbol. Look through the list of
         // user provided function prototypes and construct a Function
         // accordingly.
-        CalledFunc = IncludedFileInfo::CreateFunction(*CalledFuncSymName,
-                                               *const_cast<ModuleRaiser *>(MR));
+        CalledFunc = IncludedFileInfo::CreateFunction(
+            *CalledFuncSymName, *const_cast<ModuleRaiser *>(MR));
         // Bail out if function prototype is not available
         if (!CalledFunc)
           exit(-1);
@@ -1879,7 +1879,8 @@ X86MachineInstructionRaiser::getGlobalVariableValueAt(const MachineInstr &MI,
             GlobalInit = ConstantInt::get(GlobalValTy, SV);
         }
 
-        // Declare external global variables as external and don't initalize them
+        // Declare external global variables as external and don't initalize
+        // them
         if (IncludedFileInfo::IsExternalVariable(
                 GlobalDataSymNameIndexStrRef.str())) {
           Lnkg = GlobalValue::ExternalLinkage;
@@ -2004,7 +2005,7 @@ X86MachineInstructionRaiser::getMemoryAddressExprValue(const MachineInstr &MI) {
     if (IndexRegVal->getType()->isPointerTy()) {
       Type *LdTy = IndexRegVal->getType()->getPointerElementType();
       LoadInst *LdInst =
-        new LoadInst(LdTy, IndexRegVal, "memload", false, Align());
+          new LoadInst(LdTy, IndexRegVal, "memload", false, Align());
       RaisedBB->getInstList().push_back(LdInst);
       IndexRegVal = LdInst;
     }
@@ -2251,7 +2252,8 @@ Value *X86MachineInstructionRaiser::getRegOperandValue(const MachineInstr &MI,
   const MachineOperand &MO = MI.getOperand(OpIndex);
   Value *PRegValue = nullptr; // Unknown, to start with.
   assert(MO.isReg() && "Register operand expected");
-  PRegValue = getRegOrArgValue(MO.getReg(), MI.getParent()->getNumber());
+  auto PReg = MO.getReg();
+  PRegValue = getRegOrArgValue(PReg, MI.getParent()->getNumber());
 
   if (PRegValue != nullptr) {
     // Cast the value in accordance with the register size of the operand,
@@ -2259,9 +2261,23 @@ Value *X86MachineInstructionRaiser::getRegOperandValue(const MachineInstr &MI,
     Type *PRegTy = getPhysRegOperandType(MI, OpIndex);
     // Get the BasicBlock corresponding to MachineBasicBlock of MI.
     BasicBlock *RaisedBB = getRaisedBasicBlock(MI.getParent());
-    if (isSSE2Reg(MO.getReg())) {
-      PRegValue = getRaisedValues()->reinterpretSSERegValue(PRegValue, PRegTy, RaisedBB);
+    if (isSSE2Reg(PReg)) {
+      PRegValue = getRaisedValues()->reinterpretSSERegValue(PRegValue, PRegTy,
+                                                            RaisedBB);
     } else {
+      // If PReg is one of AH, BH, CH or DH extract and return the high-byte of
+      // PRegValue.
+      if ((PReg == X86::AH) || (PReg == X86::BH) || (PReg == X86::CH) ||
+          (PReg == X86::DH)) {
+        LLVMContext &Ctx(MF.getFunction().getContext());
+        // Cast the value to i16
+        Value *PRegValue16b = getRaisedValues()->castValue(
+            PRegValue, Type::getInt16Ty(Ctx), RaisedBB);
+        // Perform logical shift of PRValue
+        PRegValue = BinaryOperator::CreateLShr(
+            PRegValue16b, ConstantInt::get(PRegValue16b->getType(), 8), "",
+            RaisedBB);
+      }
       PRegValue = getRaisedValues()->castValue(PRegValue, PRegTy, RaisedBB);
     }
   }
diff --git a/X86/X86RaisedValueTracker.cpp b/X86/X86RaisedValueTracker.cpp
index 556ccd61..14fcafd8 100644
--- a/X86/X86RaisedValueTracker.cpp
+++ b/X86/X86RaisedValueTracker.cpp
@@ -347,8 +347,8 @@ Value *X86RaisedValueTracker::getReachingDef(unsigned int PhysReg, int MBBNo,
     bool HasUnknownReachingDef = false;
     for (auto RD : ReachingDefs) {
       if (RD.second == nullptr) {
-        // we might not have found a incoming edge that tells us if we're dealing
-        // with int or floating point types -> continue for now
+        // we might not have found a incoming edge that tells us if we're
+        // dealing with int or floating point types -> continue for now
         HasUnknownReachingDef = true;
         continue;
       }
@@ -1085,11 +1085,13 @@ Value *X86RaisedValueTracker::getEflagReachingDef(unsigned int FlagBit,
 // Cast SrcVal to the type of DstVal, if their types are different.
 // Return the cast instruction upon inserting it at the end of InsertBlock
 Value *X86RaisedValueTracker::castValue(Value *SrcValue, Type *DstTy,
-                                        BasicBlock *InsertBlock) {
+                                        BasicBlock *InsertBlock,
+                                        bool SrcIsSigned) {
   if (SrcValue->getType() != DstTy) {
-    Instruction *CInst =
-        CastInst::Create(CastInst::getCastOpcode(SrcValue, false, DstTy, false),
-                         SrcValue, DstTy);
+    // If SrcValue is signed, Dst is also signed
+    Instruction *CInst = CastInst::Create(
+        CastInst::getCastOpcode(SrcValue, SrcIsSigned, DstTy, SrcIsSigned),
+        SrcValue, DstTy);
     // Set RODataIndex metadata
     setInstMetadataRODataIndex(SrcValue, CInst);
     // Add the cast instruction RaisedBB.
@@ -1100,9 +1102,10 @@ Value *X86RaisedValueTracker::castValue(Value *SrcValue, Type *DstTy,
   return SrcValue;
 }
 
-Value *X86RaisedValueTracker::reinterpretSSERegValue(Value *SrcVal, Type *DstTy,
-                                                  BasicBlock *InsertBlock,
-                                                  Instruction *InsertBefore) {
+Value *
+X86RaisedValueTracker::reinterpretSSERegValue(Value *SrcVal, Type *DstTy,
+                                              BasicBlock *InsertBlock,
+                                              Instruction *InsertBefore) {
   assert((InsertBlock != nullptr || InsertBefore != nullptr) &&
          "Expected either InsertBlock or InsertBefore to be not null");
 
@@ -1151,7 +1154,7 @@ Value *X86RaisedValueTracker::reinterpretSSERegValue(Value *SrcVal, Type *DstTy,
 }
 
 Type *X86RaisedValueTracker::getSSEInstructionType(const MachineInstr &MI,
-                                                         LLVMContext &Ctx) {
+                                                   LLVMContext &Ctx) {
   uint64_t TSFlags = MI.getDesc().TSFlags;
 
   if ((TSFlags & llvm::X86II::OpPrefixMask) == llvm::X86II::XS) {
diff --git a/X86/X86RaisedValueTracker.h b/X86/X86RaisedValueTracker.h
index 2e47af15..c2a1c1c6 100644
--- a/X86/X86RaisedValueTracker.h
+++ b/X86/X86RaisedValueTracker.h
@@ -70,7 +70,8 @@ class X86RaisedValueTracker {
   unsigned getInBlockPhysRegSize(unsigned int PhysReg, int MBBNo);
   // Cast SrcVal to type DstTy, if the type of SrcVal is different from DstTy.
   // Return the cast instruction upon inserting it at the end of InsertBlock
-  Value *castValue(Value *SrcVal, Type *DstTy, BasicBlock *InsertBlock);
+  Value *castValue(Value *SrcVal, Type *DstTy, BasicBlock *InsertBlock,
+                   bool SrcIsSigned = false);
 
   // Cast SrcVal to type DstTy if the types are different. This function does
   // not change any bits in the value. This allows to interpret SSE register
@@ -81,7 +82,9 @@ class X86RaisedValueTracker {
   // Return type: <0x0, 0x0, 0x0, (bitcast SrcVal as i32)>
   // If the passed value is larger than DstTy, the excess bits are truncated.
   // If the types are of the same size, the value is just bitcast
-  Value *reinterpretSSERegValue(Value *SrcVal, Type *DstTy, BasicBlock *InsertBlock = nullptr, Instruction *InsertBefore = nullptr);
+  Value *reinterpretSSERegValue(Value *SrcVal, Type *DstTy,
+                                BasicBlock *InsertBlock = nullptr,
+                                Instruction *InsertBefore = nullptr);
   // Returns the type of an SSE instruction
   Type *getSSEInstructionType(const MachineInstr &, LLVMContext &);
 
diff --git a/test/asm_test/X86/raise-divxxm.c b/test/asm_test/X86/raise-divxxm.c
index d3740050..8b10fb12 100644
--- a/test/asm_test/X86/raise-divxxm.c
+++ b/test/asm_test/X86/raise-divxxm.c
@@ -3,7 +3,13 @@
 // RUN: llvm-mctoll -d -I /usr/include/stdio.h %t
 // RUN: clang -o %t1 %t-dis.ll
 // RUN: %t1 2>&1 | FileCheck %s
-// CHECK: [Implicit AX/DX]
+// CHECK: [Implicit AH/AL]
+// CHECK-NEXT: Test 0xfa DIV8m 0xf0
+// CHECK-NEXT: Quotient = 0x1, Remainder = 0xa
+// CHECK-NEXT: [Implicit AH/AL]
+// CHECK-NEXT: Test 0xfffffffa IDIV8m 0xfffffff0
+// CHECK-NEXT: Quotient = 0xfffffff1, Remainder = 0xa
+// CHECK-NEXT: [Implicit AX/DX]
 // CHECK-NEXT: Test 0xf1d2 DIV16m 0xf123
 // CHECK-NEXT: Quotient = 0x1, Remainder = 0xaf
 // CHECK-NEXT: [Implicit EAX/EDX]
@@ -111,11 +117,59 @@ test_divm64_rax_rdx(unsigned long int a, unsigned long int b) {
   return 0;
 }
 
+// IDIV8m
+int __attribute__((noinline))
+test_idivm8(char a, char b) {
+  char q = 0;
+  char r = 0;
+  char spill; // spill loc
+
+  printf("[Implicit AH/AL]\nTest 0x%x IDIV8m 0x%x\n", a, b);
+
+  asm("movzbw  %[a], %%ax\n"
+      "mov %[b], %[spill]\n"
+      "idivb  %[spill]\n"
+      "mov  %%ah, %[r]\n"
+      "mov  %%al, %[q]\n"
+      : [q] "=r"(q), [r] "=r"(r),
+	[spill] "=m"(spill)                  /* output operands */
+      : [a] "r"(a), [b] "r"(b)                       /* input operands */
+      : "%ax"                                /* list of clobbered registers */
+  );
+
+  printf("Quotient = 0x%x, Remainder = 0x%x\n", q, r);
+  return 0;
+}
+
+// DIV8m
+int __attribute__((noinline))
+test_divm8(unsigned char a, unsigned char b) {
+  unsigned char q = 0;
+  unsigned char r = 0;
+  unsigned char spill; // spill loc
+
+  printf("[Implicit AH/AL]\nTest 0x%x DIV8m 0x%x\n", a, b);
+
+  asm("movzbw  %[a], %%ax\n"
+      "mov %[b], %[spill]\n"
+      "divb  %[spill]\n"
+      "mov  %%ah, %[r]\n"
+      "mov  %%al, %[q]\n"
+      : [q] "=r"(q), [r] "=r"(r),
+	[spill] "=m"(spill)                  /* output operands */
+      : [a] "r"(a), [b] "r"(b)               /* input operands */
+      : "%ax"                                /* list of clobbered registers */
+  );
+
+  printf("Quotient = 0x%x, Remainder = 0x%x\n", q, r);
+  return 0;
+}
+
 int main() {
+  test_divm8(0xFA, 0xF0);
+  test_idivm8(0xFA, 0xF0);
   test_divm16_ax_dx(0xF1D2, 0xF123);
-
   test_divm32_eax_edx(0x9FEEDDCC, 0xF8);
-
   test_divm64_rax_rdx(0xBBAACCDD12345678, 0xABCDEF);
   return 0;
 }
diff --git a/test/asm_test/X86/raise-divxxr.c b/test/asm_test/X86/raise-divxxr.c
new file mode 100644
index 00000000..607c6cec
--- /dev/null
+++ b/test/asm_test/X86/raise-divxxr.c
@@ -0,0 +1,36 @@
+// REQUIRES: system-linux
+// RUN: clang -o %t %s -O2
+// RUN: llvm-mctoll -d -I /usr/include/stdio.h %t
+// RUN: clang -o %t1 %t-dis.ll
+// RUN: %t1 2>&1 | FileCheck %s
+// CHECK: [Implicit AH/AL]
+// CHECK-NEXT: Test 0xfffffffa IDIV8r 0xfffffff0
+// CHECK-NEXT: Quotient = 0xfffffff1, Remainder = 0xa
+// CHECK-EMPTY
+#include <stdio.h>
+
+// IDIVr8
+int __attribute__((noinline))
+test_idiv8r(char a, char b) {
+  char quotient = 0;
+  char remainder = 0;
+
+  printf("[Implicit AH/AL]\nTest 0x%x IDIV8r 0x%x\n", a, b);
+
+  asm("movzbw  %2, %%ax\n"
+      "idivb  %3\n"
+      "mov  %%ah, %1\n"
+      "mov  %%al, %0\n"
+      : "=r"(quotient), "=r"(remainder)      /* output operands */
+      : "r"(a), "r"(b)                       /* input operands */
+      : "%ax"                                /* list of clobbered registers */
+  );
+
+  printf("Quotient = 0x%x, Remainder = 0x%x\n", quotient, remainder);
+  return 0;
+}
+
+int main() {
+  test_idiv8r(0xFA, 0xF0);
+  return 0;
+}
diff --git a/test/smoke_test/division-tests.c b/test/smoke_test/division-tests.c
index 9c11217d..ddd5e4fa 100644
--- a/test/smoke_test/division-tests.c
+++ b/test/smoke_test/division-tests.c
@@ -2,7 +2,7 @@
 #include <stdint.h>
 #include <stdio.h>
 // REQUIRES: system-linux
-// RUN: clang -o %t %s
+// RUN: clang -o %t %s -O2
 // RUN: llvm-mctoll -d -I /usr/include/stdio.h %t
 // RUN: clang -o %t1 %t-dis.ll
 // RUN: %t1 2>&1 | FileCheck %s
@@ -14,6 +14,78 @@
 // CHECK: Int_1 = 1
 // CHECK: Int_2 = 13
 // CHECK: Int_3 = 7
+// CHECK: q = 0x53 r = 0x1
+// CHECK: q = 0xfffffffe r = 0x0
+// CHECK: q = 0x5f8 r = 0xc
+// CHECK: q = 0xffffffe0 r = 0xfffffffc
+// CHECK: q = 0x5de09 r = 0x2331
+// CHECK: q = 0xffffe079 r = 0xfffff8f1
+// CHECK: q = 0x7d5dfff r = 0x1ff82a2001
+// CHECK: q = 0xffffffffffd5e001 r = 0xffffffe0002a1fff
+
+void __attribute__((noinline)) div8_test(uint8_t dd,
+					 uint8_t dv) {
+  uint8_t  q = dd / dv;
+  uint8_t  r = dd % dv;
+  printf("q = 0x%x r = 0x%x\n", q, r);
+  return;
+}
+
+void __attribute__((noinline)) idiv8_test(int8_t dd,
+					  int8_t dv) {
+  int16_t  q = dd / dv;
+  int16_t  r = dd % dv;
+  printf("q = 0x%x r = 0x%x\n", q, r);
+  return;
+}
+
+void __attribute__((noinline)) div16_test(uint16_t dd,
+					  uint16_t dv) {
+  uint16_t  q = dd / dv;
+  uint16_t  r = dd % dv;
+  printf("q = 0x%x r = 0x%x\n", q, r);
+  return;
+}
+
+void __attribute__((noinline)) idiv16_test(int16_t dd,
+					   int16_t dv) {
+  int16_t  q = dd / dv;
+  int16_t  r = dd % dv;
+  printf("q = 0x%x r = 0x%x\n", q, r);
+  return;
+}
+
+void __attribute__((noinline)) div32_test(uint32_t dd,
+					  uint32_t dv) {
+  uint32_t  q = dd / dv;
+  uint32_t  r = dd % dv;
+  printf("q = 0x%x r = 0x%x\n", q, r);
+  return;
+}
+
+void __attribute__((noinline)) idiv32_test(int32_t dd,
+					   int32_t dv) {
+  int32_t  q = dd / dv;
+  int32_t  r = dd % dv;
+  printf("q = 0x%x r = 0x%x\n", q, r);
+  return;
+}
+
+void __attribute__((noinline)) div64_test(uint64_t dd,
+					  uint64_t dv) {
+  uint64_t  q = dd / dv;
+  uint64_t  r = dd % dv;
+  printf("q = 0x%lx r = 0x%lx\n", q, r);
+  return;
+}
+
+void __attribute__((noinline)) idiv64_test(int64_t dd,
+					   int64_t dv) {
+  int64_t  q = dd / dv;
+  int64_t  r = dd % dv;
+  printf("q = 0x%lx r = 0x%lx\n", q, r);
+  return;
+}
 
 int main() {
   uint8_t a = 10;
@@ -42,5 +114,16 @@ int main() {
   printf("Int_1 = %d\n", Int_1);
   printf("Int_2 = %d\n", Int_2);
   printf("Int_3 = %d\n", Int_3);
+
+  // More division tests
+  div8_test(0xfa, 0x3);
+  idiv8_test(0xfa, 0x3);
+  div16_test(0xfabc, 0x2a);
+  idiv16_test(0xfabc, 0x2a);
+  div32_test(0xfabcabcd, 0x2abc);
+  idiv32_test(0xfabcabcd, 0x2abc);
+  div64_test(0xfabc000000000000, 0x0000002000000001);
+  idiv64_test(0xfabc000000000000, 0x0000002000000001);
+
   return 0;
 }
diff --git a/test/smoke_test/wrapper-calls.c b/test/smoke_test/wrapper-calls.c
index b31ad6b4..455bd45d 100644
--- a/test/smoke_test/wrapper-calls.c
+++ b/test/smoke_test/wrapper-calls.c
@@ -22,7 +22,7 @@
         that returns the tail call return is not possible.
 */
 
-#include <stdlib.h>
+#include <stddef.h>
 #include <string.h>
 
 void * __attribute__((noinline))