summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp1460
1 files changed, 939 insertions, 521 deletions
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a2daa890943..3cfcb1e09f0 100644
--- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -65,6 +65,13 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
+// Disabled for causing self-hosting failures once returned-attribute inference
+// was enabled.
+static cl::opt<bool>
+EnableThisRetForwarding("arm-this-return-forwarding", cl::Hidden,
+ cl::desc("Directly forward this return"),
+ cl::init(false));
+
namespace {
class ARMCCState : public CCState {
public:
@@ -240,7 +247,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Set the correct calling convention for ARMv7k WatchOS. It's just
// AAPCS_VFP for functions as simple as libcalls.
- if (Subtarget->isTargetWatchOS()) {
+ if (Subtarget->isTargetWatchABI()) {
for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
}
@@ -254,7 +261,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// RTLIB
if (Subtarget->isAAPCS_ABI() &&
(Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
- Subtarget->isTargetAndroid())) {
+ Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
@@ -406,17 +413,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
- // The half <-> float conversion functions are always soft-float, but are
- // needed for some targets which use a hard-float calling convention by
- // default.
- if (Subtarget->isAAPCS_ABI()) {
- setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
- } else {
- setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
- setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
- setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+ // The half <-> float conversion functions are always soft-float on
+ // non-watchos platforms, but are needed for some targets which use a
+ // hard-float calling convention by default.
+ if (!Subtarget->isTargetWatchABI()) {
+ if (Subtarget->isAAPCS_ABI()) {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
+ } else {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+ }
}
// In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
@@ -577,6 +586,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
+ setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
// NEON does not have single instruction CTTZ for vectors.
setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
@@ -708,6 +722,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
}
+ } else {
+ // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
}
setOperationAction(ISD::SADDO, MVT::i32, Custom);
@@ -754,10 +772,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
- // These just redirect to CTTZ and CTLZ on ARM.
- setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
-
// @llvm.readcyclecounter requires the Performance Monitors extension.
// Default to the 0 expansion on unsupported platforms.
// FIXME: Technically there are older ARM CPUs that have
@@ -769,8 +783,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
- !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
+ bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
+ : Subtarget->hasDivideInARMMode();
+ if (!hasDivide) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
setOperationAction(ISD::SDIV, MVT::i32, LibCall);
setOperationAction(ISD::UDIV, MVT::i32, LibCall);
@@ -787,9 +802,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
- if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
+ if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
+ Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) {
setOperationAction(ISD::SREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
+ HasStandaloneRem = false;
setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
@@ -811,6 +828,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
} else {
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
@@ -837,21 +856,21 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
- // the default expansion. If we are targeting a single threaded system,
- // then set them all for expand so we can lower them later into their
- // non-atomic form.
- if (TM.Options.ThreadModel == ThreadModel::Single)
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // the default expansion.
+ InsertFencesForAtomic = false;
+ if (Subtarget->hasAnyDataBarrier() &&
+ (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
// ATOMIC_FENCE needs custom lowering; the others should have been expanded
// to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (!Subtarget->isThumb() || !Subtarget->isMClass())
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
- if (!Subtarget->hasV8Ops()) {
+ if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ InsertFencesForAtomic = true;
}
} else {
// If there's anything we can use as a barrier, go through custom lowering
@@ -913,6 +932,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ // Thumb-1 cannot currently select ARMISD::SUBE.
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::SETCCE, MVT::i32, Custom);
+
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
@@ -960,7 +983,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->isTargetWatchOS()) {
+ if (Subtarget->isTargetWatchABI()) {
setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
}
@@ -1043,7 +1066,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setMinStackArgumentAlignment(4);
// Prefer likely predicted branches to selects on out-of-order cores.
- PredictableSelectIsExpensive = Subtarget->isLikeA9();
+ PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -1110,7 +1133,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
- case ARMISD::tCALL: return "ARMISD::tCALL";
case ARMISD::BRCOND: return "ARMISD::BRCOND";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
@@ -1127,6 +1149,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::SSAT: return "ARMISD::SSAT";
+
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1203,6 +1227,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
+ case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
@@ -1377,7 +1402,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::ARM_APCS:
case CallingConv::GHC:
return CC;
+ case CallingConv::PreserveMost:
+ return CallingConv::PreserveMost;
case CallingConv::ARM_AAPCS_VFP:
+ case CallingConv::Swift:
return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
@@ -1419,18 +1447,18 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
+ case CallingConv::PreserveMost:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
-SDValue
-ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- bool isThisReturn, SDValue ThisVal) const {
+SDValue ARMTargetLowering::LowerCallResult(
+ SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
+ SDValue ThisVal) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1446,7 +1474,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
- if (i == 0 && isThisReturn) {
+ if (i == 0 && isThisReturn && EnableThisRetForwarding) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
@@ -1510,23 +1538,21 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
}
/// LowerMemOpCallTo - Store the argument to the stack.
-SDValue
-ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
- SDValue StackPtr, SDValue Arg,
- SDLoc dl, SelectionDAG &DAG,
- const CCValAssign &VA,
- ISD::ArgFlagsTy Flags) const {
+SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+ SDValue Arg, const SDLoc &dl,
+ SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
return DAG.getStore(
Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
- false, false, 0);
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
}
-void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
+void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
CCValAssign &VA, CCValAssign &NextVA,
@@ -1708,7 +1734,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, false,
DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
@@ -1784,20 +1809,27 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
bool isDirect = false;
- bool isARMFunc = false;
+
+ const TargetMachine &TM = getTargetMachine();
+ const Module *Mod = MF.getFunction()->getParent();
+ const GlobalValue *GV = nullptr;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ GV = G->getGlobal();
+ bool isStub =
+ !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
+
+ bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
bool isLocalARMFunc = false;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
auto PtrVt = getPointerTy(DAG.getDataLayout());
if (Subtarget->genLongCalls()) {
- assert((Subtarget->isTargetWindows() ||
- getTargetMachine().getRelocationModel() == Reloc::Static) &&
- "long-calls with non-static relocation model!");
+ assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
+ "long-calls codegen is not position independent!");
// Handle a global address or an external symbol. If it's not one of
// those, the target's already in a register, so we don't need to do
// anything extra.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
+ if (isa<GlobalAddressSDNode>(Callee)) {
// Create a constant pool entry for the callee address
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
@@ -1808,8 +1840,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1823,54 +1854,55 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
- }
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- isDirect = true;
- bool isDef = GV->isStrongDefinitionForLinker();
- bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
- getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
- // ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
- // tBX takes a register source operand.
- if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
- assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
- Callee = DAG.getNode(
- ARMISD::WrapperPIC, dl, PtrVt,
- DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, true, 0);
- } else if (Subtarget->isTargetCOFF()) {
- assert(Subtarget->isTargetWindows() &&
- "Windows is the only supported COFF target");
- unsigned TargetFlags = GV->hasDLLImportStorageClass()
- ? ARMII::MO_DLLIMPORT
- : ARMII::MO_NO_FLAG;
- Callee =
- DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
- if (GV->hasDLLImportStorageClass())
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ }
+ } else if (isa<GlobalAddressSDNode>(Callee)) {
+ // If we're optimizing for minimum size and the function is called three or
+ // more times in this block, we can improve codesize by calling indirectly
+ // as BLXr has a 16-bit encoding.
+ auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ auto *BB = CLI.CS->getParent();
+ bool PreferIndirect =
+ Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
+ std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) {
+ return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
+ }) > 2;
+
+ if (!PreferIndirect) {
+ isDirect = true;
+ bool isDef = GV->isStrongDefinitionForLinker();
+
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
+ // tBX takes a register source operand.
+ if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
+ Callee = DAG.getNode(
+ ARMISD::WrapperPIC, dl, PtrVt,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee =
- DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
- DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
- } else {
- // On ELF targets for PIC code, direct calls should go through the PLT
- unsigned OpFlags = 0;
- if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_)
- OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
+ /* Alignment = */ 0, MachineMemOperand::MOInvariant);
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+ unsigned TargetFlags = GV->hasDLLImportStorageClass()
+ ? ARMII::MO_DLLIMPORT
+ : ARMII::MO_NO_FLAG;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
+ TargetFlags);
+ if (GV->hasDLLImportStorageClass())
+ Callee =
+ DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ } else {
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
+ }
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
- bool isStub = Subtarget->isTargetMachO() &&
- getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
@@ -1882,17 +1914,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(
PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
- unsigned OpFlags = 0;
- // On ELF targets for PIC code, direct calls should go through the PLT
- if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_)
- OpFlags = ARMII::MO_PLT;
- Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
+ Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
}
}
@@ -1902,11 +1928,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else
- CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ CallOpc = ARMISD::CALL;
} else {
if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
- else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
+ else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
// Emit regular call when code size is the priority
!MF.getFunction()->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
@@ -2046,7 +2072,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
if (!Def)
return false;
if (!Flags.isByVal()) {
- if (!TII->isLoadFromStackSlot(Def, FI))
+ if (!TII->isLoadFromStackSlot(*Def, FI))
return false;
} else {
return false;
@@ -2086,9 +2112,9 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
- const Function *CallerF = DAG.getMachineFunction().getFunction();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
- bool CCMatch = CallerCC == CalleeCC;
assert(Subtarget->supportsTailCall());
@@ -2126,41 +2152,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
}
- // If the calling conventions do not match, then we'd better make sure the
- // results are returned in the same way as what the caller expects.
- if (!CCMatch) {
- SmallVector<CCValAssign, 16> RVLocs1;
- ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
- *DAG.getContext(), Call);
- CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
-
- SmallVector<CCValAssign, 16> RVLocs2;
- ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
- *DAG.getContext(), Call);
- CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
-
- if (RVLocs1.size() != RVLocs2.size())
+ // Check that the call results are passed in the same way.
+ LLVMContext &C = *DAG.getContext();
+ if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
+ CCAssignFnForNode(CalleeCC, true, isVarArg),
+ CCAssignFnForNode(CallerCC, true, isVarArg)))
+ return false;
+ // The callee has to preserve all registers the caller needs to preserve.
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
+ if (CalleeCC != CallerCC) {
+ const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
- for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
- if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
- return false;
- if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
- return false;
- if (RVLocs1[i].isRegLoc()) {
- if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
- return false;
- } else {
- if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
- return false;
- }
- }
}
// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
// local frame.
- const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
- getInfo<ARMFunctionInfo>();
+ const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
if (AFI_Caller->getArgRegsSaveSize())
return false;
@@ -2170,13 +2180,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Call);
+ ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CalleeCC, false, isVarArg));
if (CCInfo.getNextStackOffset()) {
- MachineFunction &MF = DAG.getMachineFunction();
-
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2213,6 +2220,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
}
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
+ return false;
}
return true;
@@ -2230,7 +2241,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
}
static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
- SDLoc DL, SelectionDAG &DAG) {
+ const SDLoc &DL, SelectionDAG &DAG) {
const MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
@@ -2263,11 +2274,11 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
}
SDValue
-ARMTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
+ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const {
+ const SDLoc &dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
@@ -2525,9 +2536,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SDLoc DL(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
- if (RelocM == Reloc::Static) {
+ bool IsPositionIndependent = isPositionIndependent();
+ if (!IsPositionIndependent) {
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -2538,11 +2549,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
- SDValue Result =
- DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- false, false, false, 0);
- if (RelocM == Reloc::Static)
+ SDValue Result = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ if (!IsPositionIndependent)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
@@ -2588,7 +2598,8 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
SDValue FuncTLVGet =
DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, true, true, 4);
+ /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MOInvariant);
Chain = FuncTLVGet.getValue(1);
MachineFunction &F = DAG.getMachineFunction();
@@ -2614,6 +2625,61 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
}
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
+
+ SDValue Chain = DAG.getEntryNode();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc DL(Op);
+
+ // Load the current TEB (thread environment block)
+ SDValue Ops[] = {Chain,
+ DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getConstant(15, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(13, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(2, DL, MVT::i32)};
+ SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+
+ SDValue TEB = CurrentTEB.getValue(0);
+ Chain = CurrentTEB.getValue(1);
+
+ // Load the ThreadLocalStoragePointer from the TEB
+ // A pointer to the TLS array is located at offset 0x2c from the TEB.
+ SDValue TLSArray =
+ DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
+ TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
+
+ // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
+ // offset into the TLSArray.
+
+ // Load the TLS index from the C runtime
+ SDValue TLSIndex =
+ DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
+ TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
+ TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
+
+ SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
+ DAG.getConstant(2, DL, MVT::i32));
+ SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
+ DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
+ MachinePointerInfo());
+
+ // Get the offset of the start of the .tls section (section base)
+ const auto *GA = cast<GlobalAddressSDNode>(Op);
+ auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
+ SDValue Offset = DAG.getLoad(
+ PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
+ DAG.getTargetConstantPool(CPV, PtrVT, 4)),
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -2629,10 +2695,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
- Argument =
- DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- false, false, false, 0);
+ Argument = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2649,8 +2714,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
- 0);
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2684,8 +2748,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2693,8 +2756,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
} else {
// local exec model
assert(model == TLSModel::LocalExec);
@@ -2704,8 +2766,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(
PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
// The address of the thread local variable is the add of the thread
@@ -2718,6 +2779,9 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->isTargetDarwin())
return LowerGlobalTLSAddressDarwin(Op, DAG);
+ if (Subtarget->isTargetWindows())
+ return LowerGlobalTLSAddressWindows(Op, DAG);
+
// TODO: implement the "local dynamic" model
assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
@@ -2742,9 +2806,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- bool UseGOT_PREL =
- !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+ const TargetMachine &TM = getTargetMachine();
+ if (isPositionIndependent()) {
+ bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2760,15 +2824,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
SDValue Chain = Result.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
if (UseGOT_PREL)
- Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ Result =
+ DAG.getLoad(PtrVT, dl, Chain, Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
@@ -2785,8 +2848,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
}
@@ -2795,7 +2857,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
if (Subtarget->useMovt(DAG.getMachineFunction()))
++NumMovwMovt;
@@ -2803,15 +2864,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into multiple nodes
unsigned Wrapper =
- RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper;
+ isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
- if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ if (Subtarget->isGVIndirectSymbol(GV))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
@@ -2837,8 +2897,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
TargetFlags));
if (GV->hasDLLImportStorageClass())
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
@@ -2877,7 +2936,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
"RBIT intrinsic must have i32 type!");
return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
}
- case Intrinsic::arm_thread_pointer: {
+ case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
@@ -2886,10 +2945,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
SDValue CPAddr;
- unsigned PCAdj = (RelocM != Reloc::PIC_)
- ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ bool IsPositionIndependent = isPositionIndependent();
+ unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
ARMConstantPoolValue *CPV =
ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
ARMCP::CPLSDA, PCAdj);
@@ -2897,10 +2955,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(
PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, 0);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
- if (RelocM == Reloc::PIC_) {
+ if (IsPositionIndependent) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
@@ -2966,7 +3023,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
if (Subtarget->isMClass()) {
// Only a full system barrier exists in the M-class architectures.
Domain = ARM_MB::SY;
- } else if (Subtarget->isSwift() && Ord == Release) {
+ } else if (Subtarget->preferISHSTBarriers() &&
+ Ord == AtomicOrdering::Release) {
// Swift happens to implement ISHST barriers in a way that's compatible with
// Release semantics but weaker than ISH so we'd be fools not to use
// it. Beware: other processors probably don't!
@@ -3016,13 +3074,14 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
- MachinePointerInfo(SV), false, false, 0);
+ MachinePointerInfo(SV));
}
-SDValue
-ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
- SDValue &Root, SelectionDAG &DAG,
- SDLoc dl) const {
+SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
+ CCValAssign &NextVA,
+ SDValue &Root,
+ SelectionDAG &DAG,
+ const SDLoc &dl) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -3045,8 +3104,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
ArgValue2 = DAG.getLoad(
MVT::i32, dl, Root, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -3064,13 +3122,11 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
// these values; otherwise, this reassembles a (byval) structure that
// was split between registers and memory.
// Return: The frame index registers were stored into.
-int
-ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned InRegsParamRecordIdx,
- int ArgOffset,
- unsigned ArgSize) const {
+int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ int ArgOffset, unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
@@ -3108,9 +3164,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
unsigned VReg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, 4 * i));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
}
@@ -3121,17 +3176,16 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
}
// Setup stack frame, the va_list pointer will start from.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc dl, SDValue &Chain,
- unsigned ArgOffset,
- unsigned TotalArgRegsSaveSize,
- bool ForceMutable) const {
+void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain,
+ unsigned ArgOffset,
+ unsigned TotalArgRegsSaveSize,
+ bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// Try to store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
+ // to their spots on the stack so that they may be loaded by dereferencing
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
@@ -3141,14 +3195,10 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
AFI->setVarArgsFrameIndex(FrameIndex);
}
-SDValue
-ARMTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
+SDValue ARMTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3230,10 +3280,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isMemLoc()) {
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgValue2 = DAG.getLoad(
- MVT::f64, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI));
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
@@ -3326,10 +3375,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(DAG.getLoad(
- VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
- false, false, false, 0));
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI)));
}
lastInsIndex = index;
}
@@ -3373,10 +3421,9 @@ static bool isFloatingPointZero(SDValue Op) {
/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
/// the given operands.
-SDValue
-ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMcc, SelectionDAG &DAG,
- SDLoc dl) const {
+SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMcc, SelectionDAG &DAG,
+ const SDLoc &dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
if (!isLegalICmpImmediate(C)) {
@@ -3432,9 +3479,8 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
-SDValue
-ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- SDLoc dl) const {
+SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const SDLoc &dl) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
if (!isFloatingPointZero(RHS))
@@ -3651,7 +3697,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
}
-SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
+SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue TrueVal, SDValue ARMcc, SDValue CCR,
SDValue Cmp, SelectionDAG &DAG) const {
if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
@@ -3677,14 +3723,150 @@ SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
}
}
+static bool isGTorGE(ISD::CondCode CC) {
+ return CC == ISD::SETGT || CC == ISD::SETGE;
+}
+
+static bool isLTorLE(ISD::CondCode CC) {
+ return CC == ISD::SETLT || CC == ISD::SETLE;
+}
+
+// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
+// All of these conditions (and their <= and >= counterparts) will do:
+// x < k ? k : x
+// x > k ? x : k
+// k < x ? x : k
+// k > x ? k : x
+static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
+ const SDValue TrueVal, const SDValue FalseVal,
+ const ISD::CondCode CC, const SDValue K) {
+ return (isGTorGE(CC) &&
+ ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
+ (isLTorLE(CC) &&
+ ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
+}
+
+// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
+static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
+ const SDValue TrueVal, const SDValue FalseVal,
+ const ISD::CondCode CC, const SDValue K) {
+ return (isGTorGE(CC) &&
+ ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
+ (isLTorLE(CC) &&
+ ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
+}
+
+// Check if two chained conditionals could be converted into SSAT.
+//
+// SSAT can replace a set of two conditional selectors that bound a number to an
+// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
+//
+// x < -k ? -k : (x > k ? k : x)
+// x < -k ? -k : (x < k ? x : k)
+// x > -k ? (x > k ? k : x) : -k
+// x < k ? (x < -k ? -k : x) : k
+// etc.
+//
+// It returns true if the conversion can be done, false otherwise.
+// Additionally, the variable is returned in parameter V and the constant in K.
+static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
+ uint64_t &K) {
+
+ SDValue LHS1 = Op.getOperand(0);
+ SDValue RHS1 = Op.getOperand(1);
+ SDValue TrueVal1 = Op.getOperand(2);
+ SDValue FalseVal1 = Op.getOperand(3);
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
+ if (Op2.getOpcode() != ISD::SELECT_CC)
+ return false;
+
+ SDValue LHS2 = Op2.getOperand(0);
+ SDValue RHS2 = Op2.getOperand(1);
+ SDValue TrueVal2 = Op2.getOperand(2);
+ SDValue FalseVal2 = Op2.getOperand(3);
+ ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
+
+ // Find out which are the constants and which are the variables
+ // in each conditional
+ SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
+ ? &RHS1
+ : NULL;
+ SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
+ ? &RHS2
+ : NULL;
+ SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
+ SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
+ SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
+ SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
+
+ // We must detect cases where the original operations worked with 16- or
+ // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
+ // must work with sign-extended values but the select operations return
+ // the original non-extended value.
+ SDValue V2TmpReg = V2Tmp;
+ if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
+ V2TmpReg = V2Tmp->getOperand(0);
+
+ // Check that the registers and the constants have the correct values
+ // in both conditionals
+ if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
+ V2TmpReg != V2)
+ return false;
+
+ // Figure out which conditional is saturating the lower/upper bound.
+ const SDValue *LowerCheckOp =
+ isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
+ ? &Op
+ : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
+ : NULL;
+ const SDValue *UpperCheckOp =
+ isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
+ ? &Op
+ : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
+ : NULL;
+
+ if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
+ return false;
+
+ // Check that the constant in the lower-bound check is
+ // the opposite of the constant in the upper-bound check
+ // in 1's complement.
+ int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
+ int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
+ int64_t PosVal = std::max(Val1, Val2);
+
+ if (((Val1 > Val2 && UpperCheckOp == &Op) ||
+ (Val1 < Val2 && UpperCheckOp == &Op2)) &&
+ Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
+
+ V = V2;
+ K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
+ return true;
+ }
+
+ return false;
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+
+ // Try to convert two saturating conditional selects into a single SSAT
+ SDValue SatValue;
+ uint64_t SatConstant;
+ if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
+ isSaturatingConditional(Op, SatValue, SatConstant))
+ return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
+ DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
+
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
- SDLoc dl(Op);
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
@@ -3785,10 +3967,9 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
return DAG.getConstant(0, SDLoc(Op), MVT::i32);
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, SDLoc(Op),
- Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), Ld->getAlignment());
+ return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
llvm_unreachable("Unknown VFP cmp argument!");
}
@@ -3805,21 +3986,17 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
SDValue Ptr = Ld->getBasePtr();
- RetVal1 = DAG.getLoad(MVT::i32, dl,
- Ld->getChain(), Ptr,
- Ld->getPointerInfo(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), Ld->getAlignment());
+ RetVal1 =
+ DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
+ Ld->getAlignment(), Ld->getMemOperand()->getFlags());
EVT PtrType = Ptr.getValueType();
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
- RetVal2 = DAG.getLoad(MVT::i32, dl,
- Ld->getChain(), NewPtr,
- Ld->getPointerInfo().getWithOffset(4),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), NewAlign);
+ RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
+ Ld->getPointerInfo().getWithOffset(4), NewAlign,
+ Ld->getMemOperand()->getFlags());
return;
}
@@ -3912,8 +4089,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
- SDValue Result = OptimizeVFPBrcond(Op, DAG);
- if (Result.getNode())
+ if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
return Result;
}
@@ -3954,19 +4130,17 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
Addr, Op.getOperand(2), JTI);
}
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ if (isPositionIndependent()) {
Addr =
DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
} else {
Addr =
DAG.getLoad(PTy, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
- false, false, false, 0);
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
}
@@ -4160,7 +4334,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
- MachinePointerInfo(), false, false, false, 0);
+ MachinePointerInfo());
}
// Return LR, which contains the return address. Mark it an implicit live-in.
@@ -4182,8 +4356,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
- MachinePointerInfo(),
- false, false, false, 0);
+ MachinePointerInfo());
return FrameAddr;
}
@@ -4326,7 +4499,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
/// not support i64 elements, so sometimes the zero vectors will need to be
/// explicitly constructed. Regardless, use a canonical VMOV to create the
/// zero vector.
-static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert(VT.isVector() && "Expected a vector type");
// The canonical modified immediate encoding of a zero vector is....0!
SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
@@ -4830,12 +5003,36 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
+static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Carry = Op.getOperand(2);
+ SDValue Cond = Op.getOperand(3);
+ SDLoc DL(Op);
+
+ assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
+
+ assert(Carry.getOpcode() != ISD::CARRY_FALSE);
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+ SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
+
+ SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
+ SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(
+ IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
+ Cmp.getValue(1), SDValue());
+ return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
+ CCR, Chain.getValue(1));
+}
+
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
/// operand (e.g., VMOV). If so, return the encoded value.
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
- SDLoc dl, EVT &VT, bool is128Bits,
+ const SDLoc &dl, EVT &VT, bool is128Bits,
NEONModImmType type) {
unsigned OpCmode, Imm;
@@ -4983,7 +5180,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
return SDValue();
// Try splatting with a VMOV.f32...
- APFloat FPVal = CFP->getValueAPF();
+ const APFloat &FPVal = CFP->getValueAPF();
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
if (ImmVal != -1) {
@@ -5425,7 +5622,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) {
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
- const ARMSubtarget *ST, SDLoc dl) {
+ const ARMSubtarget *ST, const SDLoc &dl) {
uint64_t Val;
if (!isa<ConstantSDNode>(N))
return SDValue();
@@ -5506,7 +5703,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue Value;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
if (i > 0)
isOnlyLowElement = false;
@@ -5589,7 +5786,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
+ SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
@@ -5639,7 +5836,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue Vec = DAG.getUNDEF(VT);
for (unsigned i = 0 ; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
@@ -5685,7 +5882,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
// A shuffle can only come from building a vector from various
@@ -5812,7 +6009,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF)
+ if (Entry.isUndef())
continue;
auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
@@ -5849,7 +6046,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
ShuffleOps[i] = Sources[i].ShuffleVec;
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
- ShuffleOps[1], &Mask[0]);
+ ShuffleOps[1], Mask);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
@@ -5899,7 +6096,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
- SDLoc dl) {
+ const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
@@ -5986,12 +6183,12 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
- if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ if (V2.getNode()->isUndef())
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
+ DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
+ DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
}
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
@@ -6028,7 +6225,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize <= 32) {
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1) Lane = 0;
@@ -6044,7 +6241,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
!isa<ConstantSDNode>(V1.getOperand(0))) {
bool IsScalarToVector = true;
for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
- if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ if (!V1.getOperand(i).isUndef()) {
IsScalarToVector = false;
break;
}
@@ -6071,8 +6268,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isVREVMask(ShuffleMask, VT, 16))
return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
- if (V2->getOpcode() == ISD::UNDEF &&
- isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+ if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
@@ -6107,8 +6303,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// ->
// concat(VZIP(v1, v2):0, :1)
//
- if (V1->getOpcode() == ISD::CONCAT_VECTORS &&
- V2->getOpcode() == ISD::UNDEF) {
+ if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
SDValue SubV1 = V1->getOperand(0);
SDValue SubV2 = V1->getOperand(1);
EVT SubVT = SubV1.getValueType();
@@ -6179,11 +6374,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
- if (VT == MVT::v8i8) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
- if (NewOp.getNode())
+ if (VT == MVT::v8i8)
+ if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
return NewOp;
- }
return SDValue();
}
@@ -6222,11 +6415,11 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDValue Val = DAG.getUNDEF(MVT::v2f64);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (Op0.getOpcode() != ISD::UNDEF)
+ if (!Op0.isUndef())
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
DAG.getIntPtrConstant(0, dl));
- if (Op1.getOpcode() != ISD::UNDEF)
+ if (!Op1.isUndef())
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
DAG.getIntPtrConstant(1, dl));
@@ -6355,17 +6548,16 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// The load already has the right type.
if (ExtendedTy == LD->getMemoryVT())
return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
- LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment());
+ LD->getBasePtr(), LD->getPointerInfo(),
+ LD->getAlignment(), LD->getMemOperand()->getFlags());
// We need to create a zextload/sextload. We cannot just create a load
// followed by a zext/zext node because LowerMUL is also run during normal
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
- LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
- LD->isNonTemporal(), LD->getAlignment());
+ LD->getMemoryVT(), LD->getAlignment(),
+ LD->getMemOperand()->getFlags());
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
@@ -6391,8 +6583,9 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
- BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ return DAG.getBuildVector(
+ MVT::v2i32, SDLoc(N),
+ {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
}
// Construct a new BUILD_VECTOR with elements truncated to half the size.
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
@@ -6409,8 +6602,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl,
- MVT::getVectorVT(TruncVT, NumElts), Ops);
+ return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
@@ -6510,8 +6702,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
-static SDValue
-LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
+ SelectionDAG &DAG) {
// TODO: Should this propagate fast-math-flags?
// Convert to float
@@ -6532,8 +6724,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
// float4 result = as_float4(as_int4(xf*recip) + 0xb000);
X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
- Y = DAG.getConstant(0xb000, dl, MVT::i32);
- Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
// Convert back to short.
@@ -6542,8 +6733,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
return X;
}
-static SDValue
-LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
+ SelectionDAG &DAG) {
// TODO: Should this propagate fast-math-flags?
SDValue N2;
@@ -6571,8 +6762,7 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
// float4 result = as_float4(as_int4(xf*recip) + 0x89);
N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
- N1 = DAG.getConstant(0x89, dl, MVT::i32);
- N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
// Convert back to integer and return.
@@ -6683,8 +6873,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
// float4 result = as_float4(as_int4(xf*recip) + 2);
N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
- N1 = DAG.getConstant(2, dl, MVT::i32);
- N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N1 = DAG.getConstant(2, dl, MVT::v4i32);
N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
// Convert back to integer and return.
@@ -6770,21 +6959,21 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
- .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+ .setCallee(CC, RetTy, Callee, std::move(Args))
.setDiscardResult(ShouldUseSRet);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
if (!ShouldUseSRet)
return CallResult.first;
- SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue LoadSin =
+ DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
// Address of cos field.
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
- SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue LoadCos =
+ DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
@@ -6823,7 +7012,7 @@ SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
CLI.setDebugLoc(dl)
.setChain(Chain)
.setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
- ES, std::move(Args), 0);
+ ES, std::move(Args));
return LowerCallTo(CLI).first;
}
@@ -6871,13 +7060,13 @@ void ARMTargetLowering::ExpandDIV_Windows(
}
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
- // Monotonic load/store is legal for all targets
- if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
- return Op;
+ if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
+ // Acquire/Release load/store is not legal for targets without a dmb or
+ // equivalent available.
+ return SDValue();
- // Acquire/Release load/store is not legal for targets without a
- // dmb or equivalent available.
- return SDValue();
+ // Monotonic load/store is legal for all targets.
+ return Op;
}
static void ReplaceREADCYCLECOUNTER(SDNode *N,
@@ -6903,6 +7092,46 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
Results.push_back(Cycles32.getValue(1));
}
+static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
+ SDLoc dl(V.getNode());
+ SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
+ SDValue VHi = DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
+ dl, MVT::i32);
+ SDValue RegClass =
+ DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
+ SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
+ SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+ return SDValue(
+ DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
+}
+
+static void ReplaceCMP_SWAP_64Results(SDNode *N,
+ SmallVectorImpl<SDValue> & Results,
+ SelectionDAG &DAG) {
+ assert(N->getValueType(0) == MVT::i64 &&
+ "AtomicCmpSwap on types less than 64 should be legal");
+ SDValue Ops[] = {N->getOperand(1),
+ createGPRPairNode(DAG, N->getOperand(2)),
+ createGPRPairNode(DAG, N->getOperand(3)),
+ N->getOperand(0)};
+ SDNode *CmpSwap = DAG.getMachineNode(
+ ARM::CMP_SWAP_64, SDLoc(N),
+ DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
+
+ Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
+ SDValue(CmpSwap, 0)));
+ Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
+ SDValue(CmpSwap, 0)));
+ Results.push_back(SDValue(CmpSwap, 2));
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -6952,6 +7181,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SETCCE: return LowerSETCCE(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -7015,6 +7245,13 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::UREM:
Res = LowerREM(N, DAG);
break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ Res = LowerDivRem(SDValue(N, 0), DAG);
+ assert(Res.getNumOperands() == 2 && "DivRem needs two values");
+ Results.push_back(Res.getValue(0));
+ Results.push_back(Res.getValue(1));
+ return;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
@@ -7023,6 +7260,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
Results);
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceCMP_SWAP_64Results(N, Results, DAG);
+ return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -7034,11 +7274,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
-void ARMTargetLowering::
-SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
- MachineBasicBlock *DispatchBB, int FI) const {
+void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DispatchBB,
+ int FI) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineConstantPool *MCP = MF->getConstantPool();
@@ -7149,10 +7390,10 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
}
}
-void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
+void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineFrameInfo *MFI = MF->getFrameInfo();
@@ -7192,7 +7433,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// Get an ordered list of the machine basic blocks for the jump table.
std::vector<MachineBasicBlock*> LPadList;
- SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+ SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
LPadList.reserve(CallSiteNumToLPad.size());
for (unsigned I = 1; I <= MaxCSNum; ++I) {
SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
@@ -7210,7 +7451,6 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
MachineJumpTableInfo *JTI =
MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
// Create the MBBs for the dispatch code.
@@ -7254,6 +7494,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// registers being marked as clobbered.
MIB.addRegMask(RI.getNoPreservedMask());
+ bool IsPositionIndependent = isPositionIndependent();
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
@@ -7367,7 +7608,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
.addMemOperand(JTMMOLd));
unsigned NewVReg6 = NewVReg5;
- if (RelocM == Reloc::PIC_) {
+ if (IsPositionIndependent) {
NewVReg6 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
.addReg(ARM::CPSR, RegState::Define)
@@ -7450,7 +7691,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
.addImm(0)
.addMemOperand(JTMMOLd));
- if (RelocM == Reloc::PIC_) {
+ if (IsPositionIndependent) {
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
.addReg(NewVReg5, RegState::Kill)
.addReg(NewVReg4)
@@ -7534,7 +7775,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
(*I)->setIsEHPad(false);
// The instruction is gone now.
- MI->eraseFromParent();
+ MI.eraseFromParent();
}
static
@@ -7586,8 +7827,8 @@ static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
/// Emit a post-increment load operation with given size. The instructions
/// will be added to BB at Pos.
-static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
- const TargetInstrInfo *TII, DebugLoc dl,
+static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
+ const TargetInstrInfo *TII, const DebugLoc &dl,
unsigned LdSize, unsigned Data, unsigned AddrIn,
unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
@@ -7618,8 +7859,8 @@ static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
/// Emit a post-increment store operation with given size. The instructions
/// will be added to BB at Pos.
-static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
- const TargetInstrInfo *TII, DebugLoc dl,
+static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
+ const TargetInstrInfo *TII, const DebugLoc &dl,
unsigned StSize, unsigned Data, unsigned AddrIn,
unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
@@ -7647,7 +7888,7 @@ static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
}
MachineBasicBlock *
-ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ARMTargetLowering::EmitStructByval(MachineInstr &MI,
MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
@@ -7656,11 +7897,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
- unsigned dest = MI->getOperand(0).getReg();
- unsigned src = MI->getOperand(1).getReg();
- unsigned SizeVal = MI->getOperand(2).getImm();
- unsigned Align = MI->getOperand(3).getImm();
- DebugLoc dl = MI->getDebugLoc();
+ unsigned dest = MI.getOperand(0).getReg();
+ unsigned src = MI.getOperand(1).getReg();
+ unsigned SizeVal = MI.getOperand(2).getImm();
+ unsigned Align = MI.getOperand(3).getImm();
+ DebugLoc dl = MI.getDebugLoc();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -7732,7 +7973,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
srcIn = srcOut;
destIn = destOut;
}
- MI->eraseFromParent(); // The instruction is gone now.
+ MI.eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -7858,7 +8099,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Add epilogue to handle BytesLeft.
BB = exitMBB;
- MachineInstr *StartOfExit = exitMBB->begin();
+ auto StartOfExit = exitMBB->begin();
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
@@ -7876,16 +8117,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
destIn = destOut;
}
- MI->eraseFromParent(); // The instruction is gone now.
+ MI.eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
-ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
MachineBasicBlock *MBB) const {
const TargetMachine &TM = getTargetMachine();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
assert(Subtarget->isTargetWindows() &&
"__chkstk is only supported on Windows");
@@ -7940,21 +8181,23 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
ARM::SP)
- .addReg(ARM::SP).addReg(ARM::R4)));
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)));
- MI->eraseFromParent();
+ MI.eraseFromParent();
return MBB;
}
MachineBasicBlock *
-ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
+ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
- MF->push_back(ContBB);
+ MF->insert(++MBB->getIterator(), ContBB);
ContBB->splice(ContBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
ContBB->transferSuccessorsAndUpdatePHIs(MBB);
@@ -7965,75 +8208,89 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
MBB->addSuccessor(TrapBB);
BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
- .addReg(MI->getOperand(0).getReg())
+ .addReg(MI.getOperand(0).getReg())
.addMBB(TrapBB);
+ AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::t2B)).addMBB(ContBB));
MBB->addSuccessor(ContBB);
- MI->eraseFromParent();
+ MI.eraseFromParent();
return ContBB;
}
MachineBasicBlock *
-ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = MI.getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default: {
- MI->dump();
+ MI.dump();
llvm_unreachable("Unexpected instr type to insert");
}
+
+ // Thumb1 post-indexed loads are really just single-register LDMs.
+ case ARM::tLDR_postidx: {
+ BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
+ .addOperand(MI.getOperand(1)) // Rn_wb
+ .addOperand(MI.getOperand(2)) // Rn
+ .addOperand(MI.getOperand(3)) // PredImm
+ .addOperand(MI.getOperand(4)) // PredReg
+ .addOperand(MI.getOperand(0)); // Rt
+ MI.eraseFromParent();
+ return BB;
+ }
+
// The Thumb2 pre-indexed stores have the same MI operands, they just
// define them differently in the .td files from the isel patterns, so
// they need pseudos.
case ARM::t2STR_preidx:
- MI->setDesc(TII->get(ARM::t2STR_PRE));
+ MI.setDesc(TII->get(ARM::t2STR_PRE));
return BB;
case ARM::t2STRB_preidx:
- MI->setDesc(TII->get(ARM::t2STRB_PRE));
+ MI.setDesc(TII->get(ARM::t2STRB_PRE));
return BB;
case ARM::t2STRH_preidx:
- MI->setDesc(TII->get(ARM::t2STRH_PRE));
+ MI.setDesc(TII->get(ARM::t2STRH_PRE));
return BB;
case ARM::STRi_preidx:
case ARM::STRBi_preidx: {
- unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
- ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
+ unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
+ : ARM::STRB_PRE_IMM;
// Decode the offset.
- unsigned Offset = MI->getOperand(4).getImm();
+ unsigned Offset = MI.getOperand(4).getImm();
bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
Offset = ARM_AM::getAM2Offset(Offset);
if (isSub)
Offset = -Offset;
- MachineMemOperand *MMO = *MI->memoperands_begin();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
BuildMI(*BB, MI, dl, TII->get(NewOpc))
- .addOperand(MI->getOperand(0)) // Rn_wb
- .addOperand(MI->getOperand(1)) // Rt
- .addOperand(MI->getOperand(2)) // Rn
- .addImm(Offset) // offset (skip GPR==zero_reg)
- .addOperand(MI->getOperand(5)) // pred
- .addOperand(MI->getOperand(6))
- .addMemOperand(MMO);
- MI->eraseFromParent();
+ .addOperand(MI.getOperand(0)) // Rn_wb
+ .addOperand(MI.getOperand(1)) // Rt
+ .addOperand(MI.getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .addOperand(MI.getOperand(5)) // pred
+ .addOperand(MI.getOperand(6))
+ .addMemOperand(MMO);
+ MI.eraseFromParent();
return BB;
}
case ARM::STRr_preidx:
case ARM::STRBr_preidx:
case ARM::STRH_preidx: {
unsigned NewOpc;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
default: llvm_unreachable("unexpected opcode!");
case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
- for (unsigned i = 0; i < MI->getNumOperands(); ++i)
- MIB.addOperand(MI->getOperand(i));
- MI->eraseFromParent();
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i)
+ MIB.addOperand(MI.getOperand(i));
+ MI.eraseFromParent();
return BB;
}
@@ -8066,8 +8323,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
- BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
- .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, dl, TII->get(ARM::tBcc))
+ .addMBB(sinkMBB)
+ .addImm(MI.getOperand(3).getImm())
+ .addReg(MI.getOperand(4).getReg());
// copy0MBB:
// %FalseValue = ...
@@ -8081,12 +8340,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(*BB, BB->begin(), dl,
- TII->get(ARM::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(1).getReg())
+ .addMBB(copy0MBB)
+ .addReg(MI.getOperand(2).getReg())
+ .addMBB(thisMBB);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -8097,10 +8357,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Compare both parts that make up the double comparison separately for
// equality.
- bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+ bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
- unsigned LHS1 = MI->getOperand(1).getReg();
- unsigned LHS2 = MI->getOperand(2).getReg();
+ unsigned LHS1 = MI.getOperand(1).getReg();
+ unsigned LHS2 = MI.getOperand(2).getReg();
if (RHSisZero) {
AddDefaultPred(BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
@@ -8109,8 +8369,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(LHS2).addImm(0)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
} else {
- unsigned RHS1 = MI->getOperand(3).getReg();
- unsigned RHS2 = MI->getOperand(4).getReg();
+ unsigned RHS1 = MI.getOperand(3).getReg();
+ unsigned RHS2 = MI.getOperand(4).getReg();
AddDefaultPred(BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS1).addReg(RHS1));
@@ -8119,9 +8379,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
}
- MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
- if (MI->getOperand(0).getImm() == ARMCC::NE)
+ if (MI.getOperand(0).getImm() == ARMCC::NE)
std::swap(destMBB, exitMBB);
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -8131,7 +8391,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else
BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -8168,9 +8428,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Fn->insert(BBI, RSBBB);
Fn->insert(BBI, SinkBB);
- unsigned int ABSSrcReg = MI->getOperand(1).getReg();
- unsigned int ABSDstReg = MI->getOperand(0).getReg();
- bool ABSSrcKIll = MI->getOperand(1).isKill();
+ unsigned int ABSSrcReg = MI.getOperand(1).getReg();
+ unsigned int ABSDstReg = MI.getOperand(0).getReg();
+ bool ABSSrcKIll = MI.getOperand(1).isKill();
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
@@ -8215,7 +8475,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(ABSSrcReg).addMBB(BB);
// remove ABS instruction
- MI->eraseFromParent();
+ MI.eraseFromParent();
// return last added BB
return SinkBB;
@@ -8234,38 +8494,38 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
/// when it is expanded into LDM/STM. This is done as a post-isel lowering
/// instead of as a custom inserter because we need the use list from the SDNode.
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
- MachineInstr *MI, const SDNode *Node) {
+ MachineInstr &MI, const SDNode *Node) {
bool isThumb1 = Subtarget->isThumb1Only();
- DebugLoc DL = MI->getDebugLoc();
- MachineFunction *MF = MI->getParent()->getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineInstrBuilder MIB(*MF, MI);
// If the new dst/src is unused mark it as dead.
if (!Node->hasAnyUseOfValue(0)) {
- MI->getOperand(0).setIsDead(true);
+ MI.getOperand(0).setIsDead(true);
}
if (!Node->hasAnyUseOfValue(1)) {
- MI->getOperand(1).setIsDead(true);
+ MI.getOperand(1).setIsDead(true);
}
// The MEMCPY both defines and kills the scratch registers.
- for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
+ for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
: &ARM::GPRRegClass);
MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
}
}
-void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
- if (MI->getOpcode() == ARM::MEMCPY) {
+ if (MI.getOpcode() == ARM::MEMCPY) {
attachMEMCPYScratchRegs(Subtarget, MI, Node);
return;
}
- const MCInstrDesc *MCID = &MI->getDesc();
+ const MCInstrDesc *MCID = &MI.getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
@@ -8274,24 +8534,24 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
// Rename pseudo opcodes.
- unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
+ unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
if (NewOpc) {
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
- assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+ assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 &&
"converted opcode should be the same except for cc_out");
- MI->setDesc(*MCID);
+ MI.setDesc(*MCID);
// Add the optional cc_out operand
- MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
+ MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
}
unsigned ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
- if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+ if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
@@ -8299,14 +8559,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// since we already have an optional CPSR def.
bool definesCPSR = false;
bool deadCPSR = false;
- for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
- i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
+ ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
definesCPSR = true;
if (MO.isDead())
deadCPSR = true;
- MI->RemoveOperand(i);
+ MI.RemoveOperand(i);
break;
}
}
@@ -8316,14 +8576,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
}
assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
if (deadCPSR) {
- assert(!MI->getOperand(ccOutIdx).getReg() &&
+ assert(!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand");
return;
}
// If this instruction was defined with an optional CPSR def and its dag node
// had a live implicit CPSR def, then activate the optional CPSR def.
- MachineOperand &MO = MI->getOperand(ccOutIdx);
+ MachineOperand &MO = MI.getOperand(ccOutIdx);
MO.setReg(ARM::CPSR);
MO.setIsDef(true);
}
@@ -8453,16 +8713,12 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N0.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
- if (Result.getNode())
+ if (N0.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
return Result;
- }
- if (N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
- if (Result.getNode())
+ if (N1.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
return Result;
- }
return SDValue();
}
@@ -8544,7 +8800,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
// Get widened type and narrowed type.
MVT widenType;
unsigned numElem = VT.getVectorNumElements();
-
+
EVT inputLaneType = Vec.getValueType().getVectorElementType();
switch (inputLaneType.getSimpleVT().SimpleTy) {
case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
@@ -8570,11 +8826,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- if (Subtarget->isThumb1Only()) return SDValue();
-
- // Only perform the checks after legalize when the pattern is available.
- if (DCI.isBeforeLegalize()) return SDValue();
-
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
@@ -8702,14 +8953,97 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
return resNode;
}
+static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // UMAAL is similar to UMLAL except that it adds two unsigned values.
+ // While trying to combine for the other MLAL nodes, first search for the
+ // chance to use UMAAL. Check if Addc uses another addc node which can first
+ // be combined into a UMLAL. The other pattern is AddcNode being combined
+ // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
+
+ if (!Subtarget->hasV6Ops())
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ SDNode *PrevAddc = nullptr;
+ if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
+ PrevAddc = AddcNode->getOperand(0).getNode();
+ else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
+ PrevAddc = AddcNode->getOperand(1).getNode();
+
+ // If there's no addc chains, just return a search for any MLAL.
+ if (PrevAddc == nullptr)
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ // Try to convert the addc operand to an MLAL and if that fails try to
+ // combine AddcNode.
+ SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
+ if (MLAL != SDValue(PrevAddc, 0))
+ return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+
+ // Find the converted UMAAL or quit if it doesn't exist.
+ SDNode *UmlalNode = nullptr;
+ SDValue AddHi;
+ if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
+ UmlalNode = AddcNode->getOperand(0).getNode();
+ AddHi = AddcNode->getOperand(1);
+ } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
+ UmlalNode = AddcNode->getOperand(1).getNode();
+ AddHi = AddcNode->getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
+ // the ADDC as well as Zero.
+ auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
+
+ if (!Zero || Zero->getZExtValue() != 0)
+ return SDValue();
+
+ // Check that we have a glued ADDC node.
+ if (AddcNode->getValueType(1) != MVT::Glue)
+ return SDValue();
+
+ // Look for the glued ADDE.
+ SDNode* AddeNode = AddcNode->getGluedUser();
+ if (!AddeNode)
+ return SDValue();
+
+ if ((AddeNode->getOperand(0).getNode() == Zero &&
+ AddeNode->getOperand(1).getNode() == UmlalNode) ||
+ (AddeNode->getOperand(0).getNode() == UmlalNode &&
+ AddeNode->getOperand(1).getNode() == Zero)) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
+ UmlalNode->getOperand(2), AddHi };
+ SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
+ DAG.getVTList(MVT::i32, MVT::i32), Ops);
+
+ // Replace the ADDs' nodes uses by the UMAAL node's values.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
+
+ // Return original node to notify the driver to stop replacing.
+ return SDValue(AddcNode, 0);
+ }
+ return SDValue();
+}
+
/// PerformADDCCombine - Target-specific dag combine transform from
-/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
+/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
+/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
static SDValue PerformADDCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
- return AddCombineTo64bitMLAL(N, DCI, Subtarget);
+ if (Subtarget->isThumb1Only()) return SDValue();
+
+ // Only perform the checks after legalize when the pattern is available.
+ if (DCI.isBeforeLegalize()) return SDValue();
+ return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
@@ -8721,15 +9055,13 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
const ARMSubtarget *Subtarget){
// Attempt to create vpaddl for this add.
- SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
- if (Result.getNode())
+ if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget))
return Result;
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
- if (N0.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
- if (Result.getNode()) return Result;
- }
+ if (N0.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
+ return Result;
return SDValue();
}
@@ -8742,8 +9074,7 @@ static SDValue PerformADDCombine(SDNode *N,
SDValue N1 = N->getOperand(1);
// First try with the default operand order.
- SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
- if (Result.getNode())
+ if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
return Result;
// If that didn't work, try again with the operands commuted.
@@ -8758,10 +9089,9 @@ static SDValue PerformSUBCombine(SDNode *N,
SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
- if (N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
- if (Result.getNode()) return Result;
- }
+ if (N1.getNode()->hasOneUse())
+ if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
+ return Result;
return SDValue();
}
@@ -8931,8 +9261,7 @@ static SDValue PerformANDCombine(SDNode *N,
if (!Subtarget->isThumb1Only()) {
// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
- SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
- if (Result.getNode())
+ if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
return Result;
}
@@ -8974,8 +9303,7 @@ static SDValue PerformORCombine(SDNode *N,
if (!Subtarget->isThumb1Only()) {
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
- SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
- if (Result.getNode())
+ if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
}
@@ -9148,8 +9476,7 @@ static SDValue PerformXORCombine(SDNode *N,
if (!Subtarget->isThumb1Only()) {
// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
- SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
- if (Result.getNode())
+ if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
}
@@ -9311,17 +9638,15 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(LD);
SDValue BasePtr = LD->getBasePtr();
- SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
- LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- LD->getAlignment());
+ SDValue NewLD1 =
+ DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
+ LD->getAlignment(), LD->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
- SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
- LD->getPointerInfo(), LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- std::min(4U, LD->getAlignment() / 2));
+ SDValue NewLD2 = DAG.getLoad(
+ MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
+ std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
if (DCI.DAG.getDataLayout().isBigEndian())
@@ -9375,11 +9700,9 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
// into a pair of GPRs, which is fine when the value is used as a scalar,
// but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
SelectionDAG &DAG = DCI.DAG;
- if (N->getNumOperands() == 2) {
- SDValue RV = PerformVMOVDRRCombine(N, DAG);
- if (RV.getNode())
+ if (N->getNumOperands() == 2)
+ if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
return RV;
- }
// Load i64 elements as f64 values so that type legalization does not split
// them up into i32 values.
@@ -9396,7 +9719,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
DCI.AddToWorklist(V.getNode());
}
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
+ SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
@@ -9445,7 +9768,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// Assume only bit cast to i32 will go away.
if (Elt->getOperand(0).getValueType() == MVT::i32)
++NumOfBitCastedElts;
- } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt))
+ } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
// Constants are statically casted, thus do not count them as
// relevant operands.
--NumOfRelevantElts;
@@ -9472,7 +9795,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDLoc dl(N);
for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
SDValue V = N->getOperand(Idx);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
if (V.getOpcode() == ISD::BITCAST &&
V->getOperand(0).getValueType() == MVT::i32)
@@ -9540,8 +9863,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
SDValue Concat0Op1 = Op0.getOperand(1);
SDValue Concat1Op1 = Op1.getOperand(1);
- if (Concat0Op1.getOpcode() != ISD::UNDEF ||
- Concat1Op1.getOpcode() != ISD::UNDEF)
+ if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
return SDValue();
// Skip the transformation if any of the types are illegal.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -9568,7 +9890,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
NewMask.push_back(NewElt);
}
return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
- DAG.getUNDEF(VT), NewMask.data());
+ DAG.getUNDEF(VT), NewMask);
}
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
@@ -9964,7 +10286,7 @@ static SDValue PerformSTORECombine(SDNode *N,
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
DAG.getUNDEF(WideVec.getValueType()),
- ShuffleVec.data());
+ ShuffleVec);
// At this point all of the data is stored at the bottom of the
// register. We now need to save it to mem.
@@ -9995,8 +10317,8 @@ static SDValue PerformSTORECombine(SDNode *N,
StoreType, ShuffWide,
DAG.getIntPtrConstant(I, DL));
SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
Increment);
Chains.push_back(Ch);
@@ -10015,18 +10337,18 @@ static SDValue PerformSTORECombine(SDNode *N,
bool isBigEndian = DAG.getDataLayout().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
- SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
- BasePtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ SDValue NewST1 = DAG.getStore(
+ St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
+ BasePtr, St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
return DAG.getStore(NewST1.getValue(0), DL,
StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
- OffsetPtr, St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(),
- std::min(4U, St->getAlignment() / 2));
+ OffsetPtr, St->getPointerInfo(),
+ std::min(4U, St->getAlignment() / 2),
+ St->getMemOperand()->getFlags());
}
if (StVal.getValueType() == MVT::i64 &&
@@ -10049,9 +10371,8 @@ static SDValue PerformSTORECombine(SDNode *N,
DCI.AddToWorklist(ExtElt.getNode());
DCI.AddToWorklist(V.getNode());
return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment(),
- St->getAAInfo());
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags(), St->getAAInfo());
}
// If this is a legal vector store, try to combine it into a VST1_UPD.
@@ -10077,7 +10398,8 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue Op = N->getOperand(0);
- if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
+ if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
+ Op.getOpcode() != ISD::FMUL)
return SDValue();
SDValue ConstVec = Op->getOperand(1);
@@ -10134,7 +10456,7 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op = N->getOperand(0);
unsigned OpOpcode = Op.getNode()->getOpcode();
- if (!N->getValueType(0).isVector() ||
+ if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
return SDValue();
@@ -10475,7 +10797,7 @@ static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
// The operand to BFI is already a mask suitable for removing the bits it
// sets.
ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- APInt Mask = CI->getAPIntValue();
+ const APInt &Mask = CI->getAPIntValue();
KnownZero &= Mask;
KnownOne &= Mask;
return;
@@ -10533,7 +10855,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
} else {
assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
}
-
+
if (Op1->getOpcode() != ISD::OR)
return SDValue();
@@ -10563,7 +10885,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDLoc dl(X);
EVT VT = X.getValueType();
unsigned BitInX = AndC->getAPIntValue().logBase2();
-
+
if (BitInX != 0) {
// We must shift X first.
X = DAG.getNode(ISD::SRL, dl, VT, X,
@@ -10584,6 +10906,46 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
return V;
}
+/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
+SDValue
+ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
+ SDValue Cmp = N->getOperand(4);
+ if (Cmp.getOpcode() != ARMISD::CMPZ)
+ // Only looking at NE cases.
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
+ SDValue Chain = N->getOperand(0);
+ SDValue BB = N->getOperand(1);
+ SDValue ARMcc = N->getOperand(2);
+ ARMCC::CondCodes CC =
+ (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+ // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
+ // -> (brcond Chain BB CC CPSR Cmp)
+ if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
+ LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
+ LHS->getOperand(0)->hasOneUse()) {
+ auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
+ auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
+ auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
+ if ((LHS00C && LHS00C->getZExtValue() == 0) &&
+ (LHS01C && LHS01C->getZExtValue() == 1) &&
+ (LHS1C && LHS1C->getZExtValue() == 1) &&
+ (RHSC && RHSC->getZExtValue() == 0)) {
+ return DAG.getNode(
+ ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
+ LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
SDValue
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
@@ -10637,6 +10999,21 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
N->getOperand(3), NewCmp);
}
+ // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
+ // -> (cmov F T CC CPSR Cmp)
+ if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
+ auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
+ auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
+ if ((LHS0C && LHS0C->getZExtValue() == 0) &&
+ (LHS1C && LHS1C->getZExtValue() == 1) &&
+ (RHSC && RHSC->getZExtValue() == 0)) {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ LHS->getOperand(2), LHS->getOperand(3),
+ LHS->getOperand(4));
+ }
+ }
+
if (Res.getNode()) {
APInt KnownZero, KnownOne;
DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
@@ -10687,6 +11064,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
+ case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
@@ -11209,22 +11587,37 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
- if (Subtarget->isThumb1Only())
- return false;
-
EVT VT;
SDValue Ptr;
- bool isSEXTLoad = false;
+ bool isSEXTLoad = false, isNonExt;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
+ isNonExt = !ST->isTruncatingStore();
} else
return false;
+ if (Subtarget->isThumb1Only()) {
+ // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
+ // must be non-extending/truncating, i32, with an offset of 4.
+ assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
+ if (Op->getOpcode() != ISD::ADD || !isNonExt)
+ return false;
+ auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!RHS || RHS->getZExtValue() != 4)
+ return false;
+
+ Offset = Op->getOperand(1);
+ Base = Op->getOperand(0);
+ AM = ISD::POST_INC;
+ return true;
+ }
+
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
@@ -11333,6 +11726,26 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
return false;
}
+const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
+ // At this point, we have to lower this constraint to something else, so we
+ // lower it to an "r" or "w". However, by doing this we will force the result
+ // to be in register, while the X constraint is much more permissive.
+ //
+ // Although we are correct (we are free to emit anything, without
+ // constraints), we might break use cases that would expect us to be more
+ // efficient and emit something else.
+ if (!Subtarget->hasVFP2())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "w";
+ if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
+ (ConstraintVT.getSizeInBits() == 64 ||
+ ConstraintVT.getSizeInBits() == 128))
+ return "w";
+
+ return "r";
+}
+
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
@@ -11651,7 +12064,8 @@ static TargetLowering::ArgListTy getDivRemArgList(
}
SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
- assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) &&
+ assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
+ Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) &&
"Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
@@ -11675,7 +12089,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -11713,7 +12127,7 @@ SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
// Lower call
CallLoweringInfo CLI(DAG);
CLI.setChain(InChain)
- .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0)
+ .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
.setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
@@ -11961,23 +12375,20 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
- if (!getInsertFencesForAtomic())
- return nullptr;
-
switch (Ord) {
- case NotAtomic:
- case Unordered:
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
llvm_unreachable("Invalid fence: unordered/non-atomic");
- case Monotonic:
- case Acquire:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Acquire:
return nullptr; // Nothing to do
- case SequentiallyConsistent:
+ case AtomicOrdering::SequentiallyConsistent:
if (!IsStore)
return nullptr; // Nothing to do
/*FALLTHROUGH*/
- case Release:
- case AcquireRelease:
- if (Subtarget->isSwift())
+ case AtomicOrdering::Release:
+ case AtomicOrdering::AcquireRelease:
+ if (Subtarget->preferISHSTBarriers())
return makeDMB(Builder, ARM_MB::ISHST);
// FIXME: add a comment with a link to documentation justifying this.
else
@@ -11989,19 +12400,16 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
- if (!getInsertFencesForAtomic())
- return nullptr;
-
switch (Ord) {
- case NotAtomic:
- case Unordered:
+ case AtomicOrdering::NotAtomic:
+ case AtomicOrdering::Unordered:
llvm_unreachable("Invalid fence: unordered/not-atomic");
- case Monotonic:
- case Release:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Release:
return nullptr; // Nothing to do
- case Acquire:
- case AcquireRelease:
- case SequentiallyConsistent:
+ case AtomicOrdering::Acquire:
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
return makeDMB(Builder, ARM_MB::ISH);
}
llvm_unreachable("Unknown fence ordering in emitTrailingFence");
@@ -12042,7 +12450,17 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
- return true;
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement cmpxchg without spilling. If the address being exchanged is also
+ // on the stack and close enough to the spill slot, this can lead to a
+ // situation where the monitor always gets cleared and the atomic operation
+ // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+ return getTargetMachine().getOptLevel() != 0;
+}
+
+bool ARMTargetLowering::shouldInsertFencesForAtomic(
+ const Instruction *I) const {
+ return InsertFencesForAtomic;
}
// This has so far only been implemented for MachO.
@@ -12091,7 +12509,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire = isAtLeastAcquire(Ord);
+ bool IsAcquire = isAcquireOrStronger(Ord);
// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i32, i32} and we have to recombine them into a
@@ -12135,7 +12553,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease = isAtLeastRelease(Ord);
+ bool IsRelease = isReleaseOrStronger(Ord);
// Since the intrinsics must have legal type, the i64 intrinsics take two
// parameters: "i32, i32". We must marshal Val into the appropriate form