diff options
Diffstat (limited to 'gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp')
| -rw-r--r-- | gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp | 1460 |
1 files changed, 939 insertions, 521 deletions
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp index a2daa890943..3cfcb1e09f0 100644 --- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -65,6 +65,13 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); +// Disabled for causing self-hosting failures once returned-attribute inference +// was enabled. +static cl::opt<bool> +EnableThisRetForwarding("arm-this-return-forwarding", cl::Hidden, + cl::desc("Directly forward this return"), + cl::init(false)); + namespace { class ARMCCState : public CCState { public: @@ -240,7 +247,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Set the correct calling convention for ARMv7k WatchOS. It's just // AAPCS_VFP for functions as simple as libcalls. - if (Subtarget->isTargetWatchOS()) { + if (Subtarget->isTargetWatchABI()) { for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); } @@ -254,7 +261,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // RTLIB if (Subtarget->isAAPCS_ABI() && (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || - Subtarget->isTargetAndroid())) { + Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; @@ -406,17 +413,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } - // The half <-> float conversion functions are always soft-float, but are - // needed for some targets which use a hard-float calling convention by - // default. - if (Subtarget->isAAPCS_ABI()) { - setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); - } else { - setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); - setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); - setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); + // The half <-> float conversion functions are always soft-float on + // non-watchos platforms, but are needed for some targets which use a + // hard-float calling convention by default. + if (!Subtarget->isTargetWatchABI()) { + if (Subtarget->isAAPCS_ABI()) { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); + } else { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); + } } // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have @@ -577,6 +586,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v1i64, Expand); + setOperationAction(ISD::CTPOP, MVT::v2i64, Expand); + + setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); + setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); // NEON does not have single instruction CTTZ for vectors. setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); @@ -708,6 +722,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); } + } else { + // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); } setOperationAction(ISD::SADDO, MVT::i32, Custom); @@ -754,10 +772,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); - // These just redirect to CTTZ and CTLZ on ARM. - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); - // @llvm.readcyclecounter requires the Performance Monitors extension. // Default to the 0 expansion on unsupported platforms. // FIXME: Technically there are older ARM CPUs that have @@ -769,8 +783,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && - !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() + : Subtarget->hasDivideInARMMode(); + if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, LibCall); setOperationAction(ISD::UDIV, MVT::i32, LibCall); @@ -787,9 +802,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) - if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) { + if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || + Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); + HasStandaloneRem = false; setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); @@ -811,6 +828,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } else { setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); @@ -837,21 +856,21 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. If we are targeting a single threaded system, - // then set them all for expand so we can lower them later into their - // non-atomic form. - if (TM.Options.ThreadModel == ThreadModel::Single) - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // the default expansion. + InsertFencesForAtomic = false; + if (Subtarget->hasAnyDataBarrier() && + (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (!Subtarget->isThumb() || !Subtarget->isMClass()) + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. - if (!Subtarget->hasV8Ops()) { + if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) { // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. - setInsertFencesForAtomic(true); + InsertFencesForAtomic = true; } } else { // If there's anything we can use as a barrier, go through custom lowering @@ -913,6 +932,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + // Thumb-1 cannot currently select ARMISD::SUBE. + if (!Subtarget->isThumb1Only()) + setOperationAction(ISD::SETCCE, MVT::i32, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); @@ -960,7 +983,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetWatchOS()) { + if (Subtarget->isTargetWatchABI()) { setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); } @@ -1043,7 +1066,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setMinStackArgumentAlignment(4); // Prefer likely predicted branches to selects on out-of-order cores. - PredictableSelectIsExpensive = Subtarget->isLikeA9(); + PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -1110,7 +1133,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; - case ARMISD::tCALL: return "ARMISD::tCALL"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; @@ -1127,6 +1149,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::SSAT: return "ARMISD::SSAT"; + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -1203,6 +1227,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; @@ -1377,7 +1402,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, case CallingConv::ARM_APCS: case CallingConv::GHC: return CC; + case CallingConv::PreserveMost: + return CallingConv::PreserveMost; case CallingConv::ARM_AAPCS_VFP: + case CallingConv::Swift: return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) @@ -1419,18 +1447,18 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); + case CallingConv::PreserveMost: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. -SDValue -ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals, - bool isThisReturn, SDValue ThisVal) const { +SDValue ARMTargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, + SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -1446,7 +1474,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Pass 'this' value directly from the argument to return value, to avoid // reg unit interference - if (i == 0 && isThisReturn) { + if (i == 0 && isThisReturn && EnableThisRetForwarding) { assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && "unexpected return calling convention register assignment"); InVals.push_back(ThisVal); @@ -1510,23 +1538,21 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, } /// LowerMemOpCallTo - Store the argument to the stack. -SDValue -ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, - SDValue StackPtr, SDValue Arg, - SDLoc dl, SelectionDAG &DAG, - const CCValAssign &VA, - ISD::ArgFlagsTy Flags) const { +SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, + SDValue Arg, const SDLoc &dl, + SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); return DAG.getStore( Chain, dl, Arg, PtrOff, - MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), - false, false, 0); + MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); } -void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, +void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, @@ -1708,7 +1734,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, false, DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); @@ -1784,20 +1809,27 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. bool isDirect = false; - bool isARMFunc = false; + + const TargetMachine &TM = getTargetMachine(); + const Module *Mod = MF.getFunction()->getParent(); + const GlobalValue *GV = nullptr; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + GV = G->getGlobal(); + bool isStub = + !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO(); + + bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); bool isLocalARMFunc = false; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); auto PtrVt = getPointerTy(DAG.getDataLayout()); if (Subtarget->genLongCalls()) { - assert((Subtarget->isTargetWindows() || - getTargetMachine().getRelocationModel() == Reloc::Static) && - "long-calls with non-static relocation model!"); + assert((!isPositionIndependent() || Subtarget->isTargetWindows()) && + "long-calls codegen is not position independent!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); + if (isa<GlobalAddressSDNode>(Callee)) { // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = @@ -1808,8 +1840,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); @@ -1823,54 +1854,55 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); - } - } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - isDirect = true; - bool isDef = GV->isStrongDefinitionForLinker(); - bool isStub = (!isDef && Subtarget->isTargetMachO()) && - getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); - // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); - // tBX takes a register source operand. - if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); - Callee = DAG.getNode( - ARMISD::WrapperPIC, dl, PtrVt, - DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); - Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, true, 0); - } else if (Subtarget->isTargetCOFF()) { - assert(Subtarget->isTargetWindows() && - "Windows is the only supported COFF target"); - unsigned TargetFlags = GV->hasDLLImportStorageClass() - ? ARMII::MO_DLLIMPORT - : ARMII::MO_NO_FLAG; - Callee = - DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); - if (GV->hasDLLImportStorageClass()) + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + } + } else if (isa<GlobalAddressSDNode>(Callee)) { + // If we're optimizing for minimum size and the function is called three or + // more times in this block, we can improve codesize by calling indirectly + // as BLXr has a 16-bit encoding. + auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + auto *BB = CLI.CS->getParent(); + bool PreferIndirect = + Subtarget->isThumb() && MF.getFunction()->optForMinSize() && + std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) { + return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB; + }) > 2; + + if (!PreferIndirect) { + isDirect = true; + bool isDef = GV->isStrongDefinitionForLinker(); + + // ARM call to a local ARM function is predicable. + isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); + // tBX takes a register source operand. + if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); + Callee = DAG.getNode( + ARMISD::WrapperPIC, dl, PtrVt, + DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = - DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), - DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); - } else { - // On ELF targets for PIC code, direct calls should go through the PLT - unsigned OpFlags = 0; - if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) - OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags); + /* Alignment = */ 0, MachineMemOperand::MOInvariant); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } else { + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); + } } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; - bool isStub = Subtarget->isTargetMachO() && - getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { @@ -1882,17 +1914,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { - unsigned OpFlags = 0; - // On ELF targets for PIC code, direct calls should go through the PLT - if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) - OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); } } @@ -1902,11 +1928,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else - CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; + CallOpc = ARMISD::CALL; } else { if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - else if (doesNotRet && isDirect && Subtarget->hasRAS() && + else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority !MF.getFunction()->optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP @@ -2046,7 +2072,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if (!Def) return false; if (!Flags.isByVal()) { - if (!TII->isLoadFromStackSlot(Def, FI)) + if (!TII->isLoadFromStackSlot(*Def, FI)) return false; } else { return false; @@ -2086,9 +2112,9 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - const Function *CallerF = DAG.getMachineFunction().getFunction(); + MachineFunction &MF = DAG.getMachineFunction(); + const Function *CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF->getCallingConv(); - bool CCMatch = CallerCC == CalleeCC; assert(Subtarget->supportsTailCall()); @@ -2126,41 +2152,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; } - // If the calling conventions do not match, then we'd better make sure the - // results are returned in the same way as what the caller expects. - if (!CCMatch) { - SmallVector<CCValAssign, 16> RVLocs1; - ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1, - *DAG.getContext(), Call); - CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); - - SmallVector<CCValAssign, 16> RVLocs2; - ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2, - *DAG.getContext(), Call); - CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); - - if (RVLocs1.size() != RVLocs2.size()) + // Check that the call results are passed in the same way. + LLVMContext &C = *DAG.getContext(); + if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, + CCAssignFnForNode(CalleeCC, true, isVarArg), + CCAssignFnForNode(CallerCC, true, isVarArg))) + return false; + // The callee has to preserve all registers the caller needs to preserve. + const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + if (CalleeCC != CallerCC) { + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; - for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { - if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) - return false; - if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) - return false; - if (RVLocs1[i].isRegLoc()) { - if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) - return false; - } else { - if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) - return false; - } - } } // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. - const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). - getInfo<ARMFunctionInfo>(); + const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); if (AFI_Caller->getArgRegsSaveSize()) return false; @@ -2170,13 +2180,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext(), Call); + ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC, false, isVarArg)); if (CCInfo.getNextStackOffset()) { - MachineFunction &MF = DAG.getMachineFunction(); - // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -2213,6 +2220,10 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } } + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) + return false; } return true; @@ -2230,7 +2241,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, } static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, - SDLoc DL, SelectionDAG &DAG) { + const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); @@ -2263,11 +2274,11 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, } SDValue -ARMTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, +ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const { + const SDLoc &dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -2525,9 +2536,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; - if (RelocM == Reloc::Static) { + bool IsPositionIndependent = isPositionIndependent(); + if (!IsPositionIndependent) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -2538,11 +2549,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); - SDValue Result = - DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, 0); - if (RelocM == Reloc::Static) + SDValue Result = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + if (!IsPositionIndependent) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); @@ -2588,7 +2598,8 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SDValue FuncTLVGet = DAG.getLoad(MVT::i32, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, true, true, 4); + /* Alignment = */ 4, MachineMemOperand::MONonTemporal | + MachineMemOperand::MOInvariant); Chain = FuncTLVGet.getValue(1); MachineFunction &F = DAG.getMachineFunction(); @@ -2614,6 +2625,61 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); } +SDValue +ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); + + SDValue Chain = DAG.getEntryNode(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(Op); + + // Load the current TEB (thread environment block) + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getConstant(15, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(13, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(2, DL, MVT::i32)}; + SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), Ops); + + SDValue TEB = CurrentTEB.getValue(0); + Chain = CurrentTEB.getValue(1); + + // Load the ThreadLocalStoragePointer from the TEB + // A pointer to the TLS array is located at offset 0x2c from the TEB. + SDValue TLSArray = + DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); + TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); + + // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 + // offset into the TLSArray. + + // Load the TLS index from the C runtime + SDValue TLSIndex = + DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); + TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); + TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); + + SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, + DAG.getConstant(2, DL, MVT::i32)); + SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, + DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), + MachinePointerInfo()); + + // Get the offset of the start of the .tls section (section base) + const auto *GA = cast<GlobalAddressSDNode>(Op); + auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); + SDValue Offset = DAG.getLoad( + PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, + DAG.getTargetConstantPool(CPV, PtrVT, 4)), + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, @@ -2629,10 +2695,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); - Argument = - DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, 0); + Argument = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), Argument, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); @@ -2649,8 +2714,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), - DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), - 0); + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; @@ -2684,8 +2748,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); @@ -2693,8 +2756,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else { // local exec model assert(model == TLSModel::LocalExec); @@ -2704,8 +2766,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } // The address of the thread local variable is the add of the thread @@ -2718,6 +2779,9 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); + if (Subtarget->isTargetWindows()) + return LowerGlobalTLSAddressWindows(Op, DAG); + // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); @@ -2742,9 +2806,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - bool UseGOT_PREL = - !(GV->hasHiddenVisibility() || GV->hasLocalLinkage()); + const TargetMachine &TM = getTargetMachine(); + if (isPositionIndependent()) { + bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2760,15 +2824,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Result.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); if (UseGOT_PREL) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + Result = + DAG.getLoad(PtrVT, dl, Chain, Result, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } @@ -2785,8 +2848,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } @@ -2795,7 +2857,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; @@ -2803,15 +2864,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into multiple nodes unsigned Wrapper = - RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; + isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) + if (Subtarget->isGVIndirectSymbol(GV)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } @@ -2837,8 +2897,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, TargetFlags)); if (GV->hasDLLImportStorageClass()) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } @@ -2877,7 +2936,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, "RBIT intrinsic must have i32 type!"); return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1)); } - case Intrinsic::arm_thread_pointer: { + case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } @@ -2886,10 +2945,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; - unsigned PCAdj = (RelocM != Reloc::PIC_) - ? 0 : (Subtarget->isThumb() ? 4 : 8); + bool IsPositionIndependent = isPositionIndependent(); + unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); @@ -2897,10 +2955,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, 0); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); - if (RelocM == Reloc::PIC_) { + if (IsPositionIndependent) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -2966,7 +3023,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; - } else if (Subtarget->isSwift() && Ord == Release) { + } else if (Subtarget->preferISHSTBarriers() && + Ord == AtomicOrdering::Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! @@ -3016,13 +3074,14 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), - MachinePointerInfo(SV), false, false, 0); + MachinePointerInfo(SV)); } -SDValue -ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, - SDValue &Root, SelectionDAG &DAG, - SDLoc dl) const { +SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, + CCValAssign &NextVA, + SDValue &Root, + SelectionDAG &DAG, + const SDLoc &dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -3045,8 +3104,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ArgValue2 = DAG.getLoad( MVT::i32, dl, Root, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, - false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -3064,13 +3122,11 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // these values; otherwise, this reassembles a (byval) structure that // was split between registers and memory. // Return: The frame index registers were stored into. -int -ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - SDLoc dl, SDValue &Chain, - const Value *OrigArg, - unsigned InRegsParamRecordIdx, - int ArgOffset, - unsigned ArgSize) const { +int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, + const Value *OrigArg, + unsigned InRegsParamRecordIdx, + int ArgOffset, unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; @@ -3108,9 +3164,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { unsigned VReg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(OrigArg, 4 * i), false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(OrigArg, 4 * i)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); } @@ -3121,17 +3176,16 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, } // Setup stack frame, the va_list pointer will start from. -void -ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - SDLoc dl, SDValue &Chain, - unsigned ArgOffset, - unsigned TotalArgRegsSaveSize, - bool ForceMutable) const { +void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, + unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, + bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // Try to store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing + // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. @@ -3141,14 +3195,10 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, AFI->setVarArgsFrameIndex(FrameIndex); } -SDValue -ARMTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) - const { +SDValue ARMTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -3230,10 +3280,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isMemLoc()) { int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgValue2 = DAG.getLoad( - MVT::f64, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), - false, false, false, 0); + ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI)); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); @@ -3326,10 +3375,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad( - VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), - false, false, false, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI))); } lastInsIndex = index; } @@ -3373,10 +3421,9 @@ static bool isFloatingPointZero(SDValue Op) { /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. -SDValue -ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMcc, SelectionDAG &DAG, - SDLoc dl) const { +SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMcc, SelectionDAG &DAG, + const SDLoc &dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -3432,9 +3479,8 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. -SDValue -ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - SDLoc dl) const { +SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const SDLoc &dl) const { assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) @@ -3651,7 +3697,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, } } -SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, +SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const { if (Subtarget->isFPOnlySP() && VT == MVT::f64) { @@ -3677,14 +3723,150 @@ SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, } } +static bool isGTorGE(ISD::CondCode CC) { + return CC == ISD::SETGT || CC == ISD::SETGE; +} + +static bool isLTorLE(ISD::CondCode CC) { + return CC == ISD::SETLT || CC == ISD::SETLE; +} + +// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. +// All of these conditions (and their <= and >= counterparts) will do: +// x < k ? k : x +// x > k ? x : k +// k < x ? x : k +// k > x ? k : x +static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, + const SDValue TrueVal, const SDValue FalseVal, + const ISD::CondCode CC, const SDValue K) { + return (isGTorGE(CC) && + ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || + (isLTorLE(CC) && + ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); +} + +// Similar to isLowerSaturate(), but checks for upper-saturating conditions. +static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, + const SDValue TrueVal, const SDValue FalseVal, + const ISD::CondCode CC, const SDValue K) { + return (isGTorGE(CC) && + ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || + (isLTorLE(CC) && + ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); +} + +// Check if two chained conditionals could be converted into SSAT. +// +// SSAT can replace a set of two conditional selectors that bound a number to an +// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: +// +// x < -k ? -k : (x > k ? k : x) +// x < -k ? -k : (x < k ? x : k) +// x > -k ? (x > k ? k : x) : -k +// x < k ? (x < -k ? -k : x) : k +// etc. +// +// It returns true if the conversion can be done, false otherwise. +// Additionally, the variable is returned in parameter V and the constant in K. +static bool isSaturatingConditional(const SDValue &Op, SDValue &V, + uint64_t &K) { + + SDValue LHS1 = Op.getOperand(0); + SDValue RHS1 = Op.getOperand(1); + SDValue TrueVal1 = Op.getOperand(2); + SDValue FalseVal1 = Op.getOperand(3); + ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + + const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1; + if (Op2.getOpcode() != ISD::SELECT_CC) + return false; + + SDValue LHS2 = Op2.getOperand(0); + SDValue RHS2 = Op2.getOperand(1); + SDValue TrueVal2 = Op2.getOperand(2); + SDValue FalseVal2 = Op2.getOperand(3); + ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get(); + + // Find out which are the constants and which are the variables + // in each conditional + SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1) + ? &RHS1 + : NULL; + SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2) + ? &RHS2 + : NULL; + SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2; + SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; + SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; + SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; + + // We must detect cases where the original operations worked with 16- or + // 8-bit values. In such case, V2Tmp != V2 because the comparison operations + // must work with sign-extended values but the select operations return + // the original non-extended value. + SDValue V2TmpReg = V2Tmp; + if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) + V2TmpReg = V2Tmp->getOperand(0); + + // Check that the registers and the constants have the correct values + // in both conditionals + if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || + V2TmpReg != V2) + return false; + + // Figure out which conditional is saturating the lower/upper bound. + const SDValue *LowerCheckOp = + isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) + ? &Op + : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 + : NULL; + const SDValue *UpperCheckOp = + isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) + ? &Op + : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 + : NULL; + + if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) + return false; + + // Check that the constant in the lower-bound check is + // the opposite of the constant in the upper-bound check + // in 1's complement. + int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue(); + int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue(); + int64_t PosVal = std::max(Val1, Val2); + + if (((Val1 > Val2 && UpperCheckOp == &Op) || + (Val1 < Val2 && UpperCheckOp == &Op2)) && + Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { + + V = V2; + K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive + return true; + } + + return false; +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc dl(Op); + + // Try to convert two saturating conditional selects into a single SSAT + SDValue SatValue; + uint64_t SatConstant; + if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && + isSaturatingConditional(Op, SatValue, SatConstant)) + return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - SDLoc dl(Op); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, @@ -3785,10 +3967,9 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(0, SDLoc(Op), MVT::i32); if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) - return DAG.getLoad(MVT::i32, SDLoc(Op), - Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->isInvariant(), Ld->getAlignment()); + return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->getAlignment(), + Ld->getMemOperand()->getFlags()); llvm_unreachable("Unknown VFP cmp argument!"); } @@ -3805,21 +3986,17 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { SDValue Ptr = Ld->getBasePtr(); - RetVal1 = DAG.getLoad(MVT::i32, dl, - Ld->getChain(), Ptr, - Ld->getPointerInfo(), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->isInvariant(), Ld->getAlignment()); + RetVal1 = + DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), + Ld->getAlignment(), Ld->getMemOperand()->getFlags()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); SDValue NewPtr = DAG.getNode(ISD::ADD, dl, PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); - RetVal2 = DAG.getLoad(MVT::i32, dl, - Ld->getChain(), NewPtr, - Ld->getPointerInfo().getWithOffset(4), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->isInvariant(), NewAlign); + RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, + Ld->getPointerInfo().getWithOffset(4), NewAlign, + Ld->getMemOperand()->getFlags()); return; } @@ -3912,8 +4089,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { - SDValue Result = OptimizeVFPBrcond(Op, DAG); - if (Result.getNode()) + if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) return Result; } @@ -3954,19 +4130,17 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Addr, Op.getOperand(2), JTI); } - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (isPositionIndependent()) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), - false, false, false, 0); + MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } @@ -4160,7 +4334,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ SDValue Offset = DAG.getConstant(4, dl, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo()); } // Return LR, which contains the return address. Mark it an implicit live-in. @@ -4182,8 +4356,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), - false, false, false, 0); + MachinePointerInfo()); return FrameAddr; } @@ -4326,7 +4499,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. -static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { +static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32); @@ -4830,12 +5003,36 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } +static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Carry = Op.getOperand(2); + SDValue Cond = Op.getOperand(3); + SDLoc DL(Op); + + assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); + + assert(Carry.getOpcode() != ISD::CARRY_FALSE); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry); + + SDValue FVal = DAG.getConstant(0, DL, MVT::i32); + SDValue TVal = DAG.getConstant(1, DL, MVT::i32); + SDValue ARMcc = DAG.getConstant( + IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR, + Cmp.getValue(1), SDValue()); + return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc, + CCR, Chain.getValue(1)); +} + /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - SDLoc dl, EVT &VT, bool is128Bits, + const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type) { unsigned OpCmode, Imm; @@ -4983,7 +5180,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); // Try splatting with a VMOV.f32... - APFloat FPVal = CFP->getValueAPF(); + const APFloat &FPVal = CFP->getValueAPF(); int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); if (ImmVal != -1) { @@ -5425,7 +5622,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) { // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, - const ARMSubtarget *ST, SDLoc dl) { + const ARMSubtarget *ST, const SDLoc &dl) { uint64_t Val; if (!isa<ConstantSDNode>(N)) return SDValue(); @@ -5506,7 +5703,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; @@ -5589,7 +5786,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); + SDValue Val = DAG.getBuildVector(VecVT, dl, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -5639,7 +5836,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Vec = DAG.getUNDEF(VT); for (unsigned i = 0 ; i < NumElts; ++i) { SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); @@ -5685,7 +5882,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SmallVector<ShuffleSourceInfo, 2> Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { // A shuffle can only come from building a vector from various @@ -5812,7 +6009,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); - if (Entry.getOpcode() == ISD::UNDEF) + if (Entry.isUndef()) continue; auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0)); @@ -5849,7 +6046,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, ShuffleOps[i] = Sources[i].ShuffleVec; SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], - ShuffleOps[1], &Mask[0]); + ShuffleOps[1], Mask); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } @@ -5899,7 +6096,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - SDLoc dl) { + const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -5986,12 +6183,12 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32)); - if (V2.getNode()->getOpcode() == ISD::UNDEF) + if (V2.getNode()->isUndef()) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); + DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); + DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -6028,7 +6225,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize <= 32) { - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; @@ -6044,7 +6241,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { !isa<ConstantSDNode>(V1.getOperand(0))) { bool IsScalarToVector = true; for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + if (!V1.getOperand(i).isUndef()) { IsScalarToVector = false; break; } @@ -6071,8 +6268,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); - if (V2->getOpcode() == ISD::UNDEF && - isSingletonVEXTMask(ShuffleMask, VT, Imm)) { + if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) { return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, DAG.getConstant(Imm, dl, MVT::i32)); } @@ -6107,8 +6303,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // -> // concat(VZIP(v1, v2):0, :1) // - if (V1->getOpcode() == ISD::CONCAT_VECTORS && - V2->getOpcode() == ISD::UNDEF) { + if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) { SDValue SubV1 = V1->getOperand(0); SDValue SubV2 = V1->getOperand(1); EVT SubVT = SubV1.getValueType(); @@ -6179,11 +6374,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); - if (VT == MVT::v8i8) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); - if (NewOp.getNode()) + if (VT == MVT::v8i8) + if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG)) return NewOp; - } return SDValue(); } @@ -6222,11 +6415,11 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - if (Op0.getOpcode() != ISD::UNDEF) + if (!Op0.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), DAG.getIntPtrConstant(0, dl)); - if (Op1.getOpcode() != ISD::UNDEF) + if (!Op1.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), DAG.getIntPtrConstant(1, dl)); @@ -6355,17 +6548,16 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), - LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment()); + LD->getBasePtr(), LD->getPointerInfo(), + LD->getAlignment(), LD->getMemOperand()->getFlags()); // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), - LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->getAlignment(), + LD->getMemOperand()->getFlags()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, @@ -6391,8 +6583,9 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, - BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); + return DAG.getBuildVector( + MVT::v2i32, SDLoc(N), + {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)}); } // Construct a new BUILD_VECTOR with elements truncated to half the size. assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); @@ -6409,8 +6602,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(TruncVT, NumElts), Ops); + return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -6510,8 +6702,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } -static SDValue -LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { +static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, + SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? // Convert to float @@ -6532,8 +6724,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { // float4 result = as_float4(as_int4(xf*recip) + 0xb000); X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); - Y = DAG.getConstant(0xb000, dl, MVT::i32); - Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); + Y = DAG.getConstant(0xb000, dl, MVT::v4i32); X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); // Convert back to short. @@ -6542,8 +6733,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { return X; } -static SDValue -LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { +static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, + SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? SDValue N2; @@ -6571,8 +6762,7 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { // float4 result = as_float4(as_int4(xf*recip) + 0x89); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); - N1 = DAG.getConstant(0x89, dl, MVT::i32); - N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); + N1 = DAG.getConstant(0x89, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. @@ -6683,8 +6873,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // float4 result = as_float4(as_int4(xf*recip) + 2); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); - N1 = DAG.getConstant(2, dl, MVT::i32); - N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); + N1 = DAG.getConstant(2, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. @@ -6770,21 +6959,21 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) - .setCallee(CC, RetTy, Callee, std::move(Args), 0) + .setCallee(CC, RetTy, Callee, std::move(Args)) .setDiscardResult(ShouldUseSRet); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); if (!ShouldUseSRet) return CallResult.first; - SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, - MachinePointerInfo(), false, false, false, 0); + SDValue LoadSin = + DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); // Address of cos field. SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); - SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, - MachinePointerInfo(), false, false, false, 0); + SDValue LoadCos = + DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, @@ -6823,7 +7012,7 @@ SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()), - ES, std::move(Args), 0); + ES, std::move(Args)); return LowerCallTo(CLI).first; } @@ -6871,13 +7060,13 @@ void ARMTargetLowering::ExpandDIV_Windows( } static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { - // Monotonic load/store is legal for all targets - if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) - return Op; + if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering())) + // Acquire/Release load/store is not legal for targets without a dmb or + // equivalent available. + return SDValue(); - // Acquire/Release load/store is not legal for targets without a - // dmb or equivalent available. - return SDValue(); + // Monotonic load/store is legal for all targets. + return Op; } static void ReplaceREADCYCLECOUNTER(SDNode *N, @@ -6903,6 +7092,46 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, Results.push_back(Cycles32.getValue(1)); } +static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { + SDLoc dl(V.getNode()); + SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32); + SDValue VHi = DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), + dl, MVT::i32); + SDValue RegClass = + DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); + SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); + SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); + const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; + return SDValue( + DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); +} + +static void ReplaceCMP_SWAP_64Results(SDNode *N, + SmallVectorImpl<SDValue> & Results, + SelectionDAG &DAG) { + assert(N->getValueType(0) == MVT::i64 && + "AtomicCmpSwap on types less than 64 should be legal"); + SDValue Ops[] = {N->getOperand(1), + createGPRPairNode(DAG, N->getOperand(2)), + createGPRPairNode(DAG, N->getOperand(3)), + N->getOperand(0)}; + SDNode *CmpSwap = DAG.getMachineNode( + ARM::CMP_SWAP_64, SDLoc(N), + DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); + + Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, + SDValue(CmpSwap, 0))); + Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, + SDValue(CmpSwap, 0))); + Results.push_back(SDValue(CmpSwap, 2)); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -6952,6 +7181,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -7015,6 +7245,13 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::UREM: Res = LowerREM(N, DAG); break; + case ISD::SDIVREM: + case ISD::UDIVREM: + Res = LowerDivRem(SDValue(N, 0), DAG); + assert(Res.getNumOperands() == 2 && "DivRem needs two values"); + Results.push_back(Res.getValue(0)); + Results.push_back(Res.getValue(1)); + return; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -7023,6 +7260,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, Results); + case ISD::ATOMIC_CMP_SWAP: + ReplaceCMP_SWAP_64Results(N, Results, DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -7034,11 +7274,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. -void ARMTargetLowering:: -SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB, int FI) const { +void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, + MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, + int FI) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); @@ -7149,10 +7390,10 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, } } -void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, +void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineFrameInfo *MFI = MF->getFrameInfo(); @@ -7192,7 +7433,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // Get an ordered list of the machine basic blocks for the jump table. std::vector<MachineBasicBlock*> LPadList; - SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; + SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs; LPadList.reserve(CallSiteNumToLPad.size()); for (unsigned I = 1; I <= MaxCSNum; ++I) { SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; @@ -7210,7 +7451,6 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); unsigned MJTI = JTI->createJumpTableIndex(LPadList); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); // Create the MBBs for the dispatch code. @@ -7254,6 +7494,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // registers being marked as clobbered. MIB.addRegMask(RI.getNoPreservedMask()); + bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); @@ -7367,7 +7608,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, .addMemOperand(JTMMOLd)); unsigned NewVReg6 = NewVReg5; - if (RelocM == Reloc::PIC_) { + if (IsPositionIndependent) { NewVReg6 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) .addReg(ARM::CPSR, RegState::Define) @@ -7450,7 +7691,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, .addImm(0) .addMemOperand(JTMMOLd)); - if (RelocM == Reloc::PIC_) { + if (IsPositionIndependent) { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg4) @@ -7534,7 +7775,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, (*I)->setIsEHPad(false); // The instruction is gone now. - MI->eraseFromParent(); + MI.eraseFromParent(); } static @@ -7586,8 +7827,8 @@ static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { /// Emit a post-increment load operation with given size. The instructions /// will be added to BB at Pos. -static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, - const TargetInstrInfo *TII, DebugLoc dl, +static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, + const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); @@ -7618,8 +7859,8 @@ static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, /// Emit a post-increment store operation with given size. The instructions /// will be added to BB at Pos. -static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, - const TargetInstrInfo *TII, DebugLoc dl, +static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, + const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); @@ -7647,7 +7888,7 @@ static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, } MachineBasicBlock * -ARMTargetLowering::EmitStructByval(MachineInstr *MI, +ARMTargetLowering::EmitStructByval(MachineInstr &MI, MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). @@ -7656,11 +7897,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI->getOperand(0).getReg(); - unsigned src = MI->getOperand(1).getReg(); - unsigned SizeVal = MI->getOperand(2).getImm(); - unsigned Align = MI->getOperand(3).getImm(); - DebugLoc dl = MI->getDebugLoc(); + unsigned dest = MI.getOperand(0).getReg(); + unsigned src = MI.getOperand(1).getReg(); + unsigned SizeVal = MI.getOperand(2).getImm(); + unsigned Align = MI.getOperand(3).getImm(); + DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -7732,7 +7973,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, srcIn = srcOut; destIn = destOut; } - MI->eraseFromParent(); // The instruction is gone now. + MI.eraseFromParent(); // The instruction is gone now. return BB; } @@ -7858,7 +8099,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Add epilogue to handle BytesLeft. BB = exitMBB; - MachineInstr *StartOfExit = exitMBB->begin(); + auto StartOfExit = exitMBB->begin(); // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) @@ -7876,16 +8117,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, destIn = destOut; } - MI->eraseFromParent(); // The instruction is gone now. + MI.eraseFromParent(); // The instruction is gone now. return BB; } MachineBasicBlock * -ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, +ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); assert(Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"); @@ -7940,21 +8181,23 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) - .addReg(ARM::SP).addReg(ARM::R4))); + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup))); - MI->eraseFromParent(); + MI.eraseFromParent(); return MBB; } MachineBasicBlock * -ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI, +ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, MachineBasicBlock *MBB) const { - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock(); - MF->push_back(ContBB); + MF->insert(++MBB->getIterator(), ContBB); ContBB->splice(ContBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); ContBB->transferSuccessorsAndUpdatePHIs(MBB); @@ -7965,75 +8208,89 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI, MBB->addSuccessor(TrapBB); BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ)) - .addReg(MI->getOperand(0).getReg()) + .addReg(MI.getOperand(0).getReg()) .addMBB(TrapBB); + AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::t2B)).addMBB(ContBB)); MBB->addSuccessor(ContBB); - MI->eraseFromParent(); + MI.eraseFromParent(); return ContBB; } MachineBasicBlock * -ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, +ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = MI.getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: { - MI->dump(); + MI.dump(); llvm_unreachable("Unexpected instr type to insert"); } + + // Thumb1 post-indexed loads are really just single-register LDMs. + case ARM::tLDR_postidx: { + BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) + .addOperand(MI.getOperand(1)) // Rn_wb + .addOperand(MI.getOperand(2)) // Rn + .addOperand(MI.getOperand(3)) // PredImm + .addOperand(MI.getOperand(4)) // PredReg + .addOperand(MI.getOperand(0)); // Rt + MI.eraseFromParent(); + return BB; + } + // The Thumb2 pre-indexed stores have the same MI operands, they just // define them differently in the .td files from the isel patterns, so // they need pseudos. case ARM::t2STR_preidx: - MI->setDesc(TII->get(ARM::t2STR_PRE)); + MI.setDesc(TII->get(ARM::t2STR_PRE)); return BB; case ARM::t2STRB_preidx: - MI->setDesc(TII->get(ARM::t2STRB_PRE)); + MI.setDesc(TII->get(ARM::t2STRB_PRE)); return BB; case ARM::t2STRH_preidx: - MI->setDesc(TII->get(ARM::t2STRH_PRE)); + MI.setDesc(TII->get(ARM::t2STRH_PRE)); return BB; case ARM::STRi_preidx: case ARM::STRBi_preidx: { - unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? - ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; + unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM + : ARM::STRB_PRE_IMM; // Decode the offset. - unsigned Offset = MI->getOperand(4).getImm(); + unsigned Offset = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; Offset = ARM_AM::getAM2Offset(Offset); if (isSub) Offset = -Offset; - MachineMemOperand *MMO = *MI->memoperands_begin(); + MachineMemOperand *MMO = *MI.memoperands_begin(); BuildMI(*BB, MI, dl, TII->get(NewOpc)) - .addOperand(MI->getOperand(0)) // Rn_wb - .addOperand(MI->getOperand(1)) // Rt - .addOperand(MI->getOperand(2)) // Rn - .addImm(Offset) // offset (skip GPR==zero_reg) - .addOperand(MI->getOperand(5)) // pred - .addOperand(MI->getOperand(6)) - .addMemOperand(MMO); - MI->eraseFromParent(); + .addOperand(MI.getOperand(0)) // Rn_wb + .addOperand(MI.getOperand(1)) // Rt + .addOperand(MI.getOperand(2)) // Rn + .addImm(Offset) // offset (skip GPR==zero_reg) + .addOperand(MI.getOperand(5)) // pred + .addOperand(MI.getOperand(6)) + .addMemOperand(MMO); + MI.eraseFromParent(); return BB; } case ARM::STRr_preidx: case ARM::STRBr_preidx: case ARM::STRH_preidx: { unsigned NewOpc; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); - for (unsigned i = 0; i < MI->getNumOperands(); ++i) - MIB.addOperand(MI->getOperand(i)); - MI->eraseFromParent(); + for (unsigned i = 0; i < MI.getNumOperands(); ++i) + MIB.addOperand(MI.getOperand(i)); + MI.eraseFromParent(); return BB; } @@ -8066,8 +8323,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) - .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + BuildMI(BB, dl, TII->get(ARM::tBcc)) + .addMBB(sinkMBB) + .addImm(MI.getOperand(3).getImm()) + .addReg(MI.getOperand(4).getReg()); // copy0MBB: // %FalseValue = ... @@ -8081,12 +8340,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(*BB, BB->begin(), dl, - TII->get(ARM::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(1).getReg()) + .addMBB(copy0MBB) + .addReg(MI.getOperand(2).getReg()) + .addMBB(thisMBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. + MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8097,10 +8357,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Compare both parts that make up the double comparison separately for // equality. - bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; + bool RHSisZero = MI.getOpcode() == ARM::BCCZi64; - unsigned LHS1 = MI->getOperand(1).getReg(); - unsigned LHS2 = MI->getOperand(2).getReg(); + unsigned LHS1 = MI.getOperand(1).getReg(); + unsigned LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) @@ -8109,8 +8369,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { - unsigned RHS1 = MI->getOperand(3).getReg(); - unsigned RHS2 = MI->getOperand(4).getReg(); + unsigned RHS1 = MI.getOperand(3).getReg(); + unsigned RHS2 = MI.getOperand(4).getReg(); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS1).addReg(RHS1)); @@ -8119,9 +8379,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addImm(ARMCC::EQ).addReg(ARM::CPSR); } - MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); + MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB(); MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); - if (MI->getOperand(0).getImm() == ARMCC::NE) + if (MI.getOperand(0).getImm() == ARMCC::NE) std::swap(destMBB, exitMBB); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -8131,7 +8391,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, else BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. + MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8168,9 +8428,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, Fn->insert(BBI, RSBBB); Fn->insert(BBI, SinkBB); - unsigned int ABSSrcReg = MI->getOperand(1).getReg(); - unsigned int ABSDstReg = MI->getOperand(0).getReg(); - bool ABSSrcKIll = MI->getOperand(1).isKill(); + unsigned int ABSSrcReg = MI.getOperand(1).getReg(); + unsigned int ABSDstReg = MI.getOperand(0).getReg(); + bool ABSSrcKIll = MI.getOperand(1).isKill(); bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or @@ -8215,7 +8475,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction - MI->eraseFromParent(); + MI.eraseFromParent(); // return last added BB return SinkBB; @@ -8234,38 +8494,38 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, /// when it is expanded into LDM/STM. This is done as a post-isel lowering /// instead of as a custom inserter because we need the use list from the SDNode. static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, - MachineInstr *MI, const SDNode *Node) { + MachineInstr &MI, const SDNode *Node) { bool isThumb1 = Subtarget->isThumb1Only(); - DebugLoc DL = MI->getDebugLoc(); - MachineFunction *MF = MI->getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineInstrBuilder MIB(*MF, MI); // If the new dst/src is unused mark it as dead. if (!Node->hasAnyUseOfValue(0)) { - MI->getOperand(0).setIsDead(true); + MI.getOperand(0).setIsDead(true); } if (!Node->hasAnyUseOfValue(1)) { - MI->getOperand(1).setIsDead(true); + MI.getOperand(1).setIsDead(true); } // The MEMCPY both defines and kills the scratch registers. - for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) { + for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) { unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } } -void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, +void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { - if (MI->getOpcode() == ARM::MEMCPY) { + if (MI.getOpcode() == ARM::MEMCPY) { attachMEMCPYScratchRegs(Subtarget, MI, Node); return; } - const MCInstrDesc *MCID = &MI->getDesc(); + const MCInstrDesc *MCID = &MI.getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's @@ -8274,24 +8534,24 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). // Rename pseudo opcodes. - unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); + unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); if (NewOpc) { const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); - assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && + assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 && "converted opcode should be the same except for cc_out"); - MI->setDesc(*MCID); + MI.setDesc(*MCID); // Add the optional cc_out operand - MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); + MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); } unsigned ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. - if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { + if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } @@ -8299,14 +8559,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // since we already have an optional CPSR def. bool definesCPSR = false; bool deadCPSR = false; - for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); - i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e; + ++i) { + const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { definesCPSR = true; if (MO.isDead()) deadCPSR = true; - MI->RemoveOperand(i); + MI.RemoveOperand(i); break; } } @@ -8316,14 +8576,14 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, } assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); if (deadCPSR) { - assert(!MI->getOperand(ccOutIdx).getReg() && + assert(!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"); return; } // If this instruction was defined with an optional CPSR def and its dag node // had a live implicit CPSR def, then activate the optional CPSR def. - MachineOperand &MO = MI->getOperand(ccOutIdx); + MachineOperand &MO = MI.getOperand(ccOutIdx); MO.setReg(ARM::CPSR); MO.setIsDef(true); } @@ -8453,16 +8713,12 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - if (N0.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); - if (Result.getNode()) + if (N0.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes)) return Result; - } - if (N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); - if (Result.getNode()) + if (N1.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes)) return Result; - } return SDValue(); } @@ -8544,7 +8800,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); - + EVT inputLaneType = Vec.getValueType().getVectorElementType(); switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; @@ -8570,11 +8826,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb1Only()) return SDValue(); - - // Only perform the checks after legalize when the pattern is available. - if (DCI.isBeforeLegalize()) return SDValue(); - // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is @@ -8702,14 +8953,97 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, return resNode; } +static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // UMAAL is similar to UMLAL except that it adds two unsigned values. + // While trying to combine for the other MLAL nodes, first search for the + // chance to use UMAAL. Check if Addc uses another addc node which can first + // be combined into a UMLAL. The other pattern is AddcNode being combined + // into an UMLAL and then using another addc is handled in ISelDAGToDAG. + + if (!Subtarget->hasV6Ops()) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + SDNode *PrevAddc = nullptr; + if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) + PrevAddc = AddcNode->getOperand(0).getNode(); + else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) + PrevAddc = AddcNode->getOperand(1).getNode(); + + // If there's no addc chains, just return a search for any MLAL. + if (PrevAddc == nullptr) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + // Try to convert the addc operand to an MLAL and if that fails try to + // combine AddcNode. + SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); + if (MLAL != SDValue(PrevAddc, 0)) + return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + + // Find the converted UMAAL or quit if it doesn't exist. + SDNode *UmlalNode = nullptr; + SDValue AddHi; + if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) { + UmlalNode = AddcNode->getOperand(0).getNode(); + AddHi = AddcNode->getOperand(1); + } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) { + UmlalNode = AddcNode->getOperand(1).getNode(); + AddHi = AddcNode->getOperand(0); + } else { + return SDValue(); + } + + // The ADDC should be glued to an ADDE node, which uses the same UMLAL as + // the ADDC as well as Zero. + auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3)); + + if (!Zero || Zero->getZExtValue() != 0) + return SDValue(); + + // Check that we have a glued ADDC node. + if (AddcNode->getValueType(1) != MVT::Glue) + return SDValue(); + + // Look for the glued ADDE. + SDNode* AddeNode = AddcNode->getGluedUser(); + if (!AddeNode) + return SDValue(); + + if ((AddeNode->getOperand(0).getNode() == Zero && + AddeNode->getOperand(1).getNode() == UmlalNode) || + (AddeNode->getOperand(0).getNode() == UmlalNode && + AddeNode->getOperand(1).getNode() == Zero)) { + + SelectionDAG &DAG = DCI.DAG; + SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), + UmlalNode->getOperand(2), AddHi }; + SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode), + DAG.getVTList(MVT::i32, MVT::i32), Ops); + + // Replace the ADDs' nodes uses by the UMAAL node's values. + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); + + // Return original node to notify the driver to stop replacing. + return SDValue(AddcNode, 0); + } + return SDValue(); +} + /// PerformADDCCombine - Target-specific dag combine transform from -/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. +/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or +/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL static SDValue PerformADDCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - return AddCombineTo64bitMLAL(N, DCI, Subtarget); + if (Subtarget->isThumb1Only()) return SDValue(); + + // Only perform the checks after legalize when the pattern is available. + if (DCI.isBeforeLegalize()) return SDValue(); + return AddCombineTo64bitUMAAL(N, DCI, Subtarget); } /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with @@ -8721,15 +9055,13 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, const ARMSubtarget *Subtarget){ // Attempt to create vpaddl for this add. - SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); - if (Result.getNode()) + if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) - if (N0.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N0, N1, DCI); - if (Result.getNode()) return Result; - } + if (N0.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI)) + return Result; return SDValue(); } @@ -8742,8 +9074,7 @@ static SDValue PerformADDCombine(SDNode *N, SDValue N1 = N->getOperand(1); // First try with the default operand order. - SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); - if (Result.getNode()) + if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget)) return Result; // If that didn't work, try again with the operands commuted. @@ -8758,10 +9089,9 @@ static SDValue PerformSUBCombine(SDNode *N, SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) - if (N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DCI); - if (Result.getNode()) return Result; - } + if (N1.getNode()->hasOneUse()) + if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI)) + return Result; return SDValue(); } @@ -8931,8 +9261,7 @@ static SDValue PerformANDCombine(SDNode *N, if (!Subtarget->isThumb1Only()) { // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) - SDValue Result = combineSelectAndUseCommutative(N, true, DCI); - if (Result.getNode()) + if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI)) return Result; } @@ -8974,8 +9303,7 @@ static SDValue PerformORCombine(SDNode *N, if (!Subtarget->isThumb1Only()) { // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) - SDValue Result = combineSelectAndUseCommutative(N, false, DCI); - if (Result.getNode()) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } @@ -9148,8 +9476,7 @@ static SDValue PerformXORCombine(SDNode *N, if (!Subtarget->isThumb1Only()) { // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) - SDValue Result = combineSelectAndUseCommutative(N, false, DCI); - if (Result.getNode()) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; } @@ -9311,17 +9638,15 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SelectionDAG &DAG = DCI.DAG; SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); - SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, - LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment()); + SDValue NewLD1 = + DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), + LD->getAlignment(), LD->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); - SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, - LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - std::min(4U, LD->getAlignment() / 2)); + SDValue NewLD2 = DAG.getLoad( + MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), + std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); if (DCI.DAG.getDataLayout().isBigEndian()) @@ -9375,11 +9700,9 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, // into a pair of GPRs, which is fine when the value is used as a scalar, // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. SelectionDAG &DAG = DCI.DAG; - if (N->getNumOperands() == 2) { - SDValue RV = PerformVMOVDRRCombine(N, DAG); - if (RV.getNode()) + if (N->getNumOperands() == 2) + if (SDValue RV = PerformVMOVDRRCombine(N, DAG)) return RV; - } // Load i64 elements as f64 values so that type legalization does not split // them up into i32 values. @@ -9396,7 +9719,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); + SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -9445,7 +9768,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Assume only bit cast to i32 will go away. if (Elt->getOperand(0).getValueType() == MVT::i32) ++NumOfBitCastedElts; - } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) + } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt)) // Constants are statically casted, thus do not count them as // relevant operands. --NumOfRelevantElts; @@ -9472,7 +9795,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDLoc dl(N); for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { SDValue V = N->getOperand(Idx); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; if (V.getOpcode() == ISD::BITCAST && V->getOperand(0).getValueType() == MVT::i32) @@ -9540,8 +9863,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); SDValue Concat0Op1 = Op0.getOperand(1); SDValue Concat1Op1 = Op1.getOperand(1); - if (Concat0Op1.getOpcode() != ISD::UNDEF || - Concat1Op1.getOpcode() != ISD::UNDEF) + if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef()) return SDValue(); // Skip the transformation if any of the types are illegal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -9568,7 +9890,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { NewMask.push_back(NewElt); } return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, - DAG.getUNDEF(VT), NewMask.data()); + DAG.getUNDEF(VT), NewMask); } /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, @@ -9964,7 +10286,7 @@ static SDValue PerformSTORECombine(SDNode *N, SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), - ShuffleVec.data()); + ShuffleVec); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. @@ -9995,8 +10317,8 @@ static SDValue PerformSTORECombine(SDNode *N, StoreType, ShuffWide, DAG.getIntPtrConstant(I, DL)); SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); Chains.push_back(Ch); @@ -10015,18 +10337,18 @@ static SDValue PerformSTORECombine(SDNode *N, bool isBigEndian = DAG.getDataLayout().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); - SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), - BasePtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); + SDValue NewST1 = DAG.getStore( + St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0), + BasePtr, St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(isBigEndian ? 0 : 1), - OffsetPtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), - std::min(4U, St->getAlignment() / 2)); + OffsetPtr, St->getPointerInfo(), + std::min(4U, St->getAlignment() / 2), + St->getMemOperand()->getFlags()); } if (StVal.getValueType() == MVT::i64 && @@ -10049,9 +10371,8 @@ static SDValue PerformSTORECombine(SDNode *N, DCI.AddToWorklist(ExtElt.getNode()); DCI.AddToWorklist(V.getNode()); return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment(), - St->getAAInfo()); + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags(), St->getAAInfo()); } // If this is a legal vector store, try to combine it into a VST1_UPD. @@ -10077,7 +10398,8 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue Op = N->getOperand(0); - if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL) + if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || + Op.getOpcode() != ISD::FMUL) return SDValue(); SDValue ConstVec = Op->getOperand(1); @@ -10134,7 +10456,7 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, SDValue Op = N->getOperand(0); unsigned OpOpcode = Op.getNode()->getOpcode(); - if (!N->getValueType(0).isVector() || + if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() || (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) return SDValue(); @@ -10475,7 +10797,7 @@ static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, // The operand to BFI is already a mask suitable for removing the bits it // sets. ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - APInt Mask = CI->getAPIntValue(); + const APInt &Mask = CI->getAPIntValue(); KnownZero &= Mask; KnownOne &= Mask; return; @@ -10533,7 +10855,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D } else { assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); } - + if (Op1->getOpcode() != ISD::OR) return SDValue(); @@ -10563,7 +10885,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDLoc dl(X); EVT VT = X.getValueType(); unsigned BitInX = AndC->getAPIntValue().logBase2(); - + if (BitInX != 0) { // We must shift X first. X = DAG.getNode(ISD::SRL, dl, VT, X, @@ -10584,6 +10906,46 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D return V; } +/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. +SDValue +ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { + SDValue Cmp = N->getOperand(4); + if (Cmp.getOpcode() != ARMISD::CMPZ) + // Only looking at NE cases. + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue LHS = Cmp.getOperand(0); + SDValue RHS = Cmp.getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue BB = N->getOperand(1); + SDValue ARMcc = N->getOperand(2); + ARMCC::CondCodes CC = + (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + + // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) + // -> (brcond Chain BB CC CPSR Cmp) + if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() && + LHS->getOperand(0)->getOpcode() == ARMISD::CMOV && + LHS->getOperand(0)->hasOneUse()) { + auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0)); + auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1)); + auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + auto *RHSC = dyn_cast<ConstantSDNode>(RHS); + if ((LHS00C && LHS00C->getZExtValue() == 0) && + (LHS01C && LHS01C->getZExtValue() == 1) && + (LHS1C && LHS1C->getZExtValue() == 1) && + (RHSC && RHSC->getZExtValue() == 0)) { + return DAG.getNode( + ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2), + LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4)); + } + } + + return SDValue(); +} + /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { @@ -10637,6 +10999,21 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { N->getOperand(3), NewCmp); } + // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) + // -> (cmov F T CC CPSR Cmp) + if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) { + auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)); + auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); + auto *RHSC = dyn_cast<ConstantSDNode>(RHS); + if ((LHS0C && LHS0C->getZExtValue() == 0) && + (LHS1C && LHS1C->getZExtValue() == 1) && + (RHSC && RHSC->getZExtValue() == 0)) { + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, + LHS->getOperand(2), LHS->getOperand(3), + LHS->getOperand(4)); + } + } + if (Res.getNode()) { APInt KnownZero, KnownOne; DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); @@ -10687,6 +11064,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); + case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: @@ -11209,22 +11587,37 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { - if (Subtarget->isThumb1Only()) - return false; - EVT VT; SDValue Ptr; - bool isSEXTLoad = false; + bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); + isNonExt = !ST->isTruncatingStore(); } else return false; + if (Subtarget->isThumb1Only()) { + // Thumb-1 can do a limited post-inc load or store as an updating LDM. It + // must be non-extending/truncating, i32, with an offset of 4. + assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!"); + if (Op->getOpcode() != ISD::ADD || !isNonExt) + return false; + auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1)); + if (!RHS || RHS->getZExtValue() != 4) + return false; + + Offset = Op->getOperand(1); + Base = Op->getOperand(0); + AM = ISD::POST_INC; + return true; + } + bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) @@ -11333,6 +11726,26 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { return false; } +const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { + // At this point, we have to lower this constraint to something else, so we + // lower it to an "r" or "w". However, by doing this we will force the result + // to be in register, while the X constraint is much more permissive. + // + // Although we are correct (we are free to emit anything, without + // constraints), we might break use cases that would expect us to be more + // efficient and emit something else. + if (!Subtarget->hasVFP2()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "w"; + if (ConstraintVT.isVector() && Subtarget->hasNEON() && + (ConstraintVT.getSizeInBits() == 64 || + ConstraintVT.getSizeInBits() == 128)) + return "w"; + + return "r"; +} + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType @@ -11651,7 +12064,8 @@ static TargetLowering::ArgListTy getDivRemArgList( } SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) && + assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || + Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && @@ -11675,7 +12089,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) - .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); @@ -11713,7 +12127,7 @@ SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { // Lower call CallLoweringInfo CLI(DAG); CLI.setChain(InChain) - .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0) + .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); @@ -11961,23 +12375,20 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - switch (Ord) { - case NotAtomic: - case Unordered: + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/non-atomic"); - case Monotonic: - case Acquire: + case AtomicOrdering::Monotonic: + case AtomicOrdering::Acquire: return nullptr; // Nothing to do - case SequentiallyConsistent: + case AtomicOrdering::SequentiallyConsistent: if (!IsStore) return nullptr; // Nothing to do /*FALLTHROUGH*/ - case Release: - case AcquireRelease: - if (Subtarget->isSwift()) + case AtomicOrdering::Release: + case AtomicOrdering::AcquireRelease: + if (Subtarget->preferISHSTBarriers()) return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else @@ -11989,19 +12400,16 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - switch (Ord) { - case NotAtomic: - case Unordered: + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/not-atomic"); - case Monotonic: - case Release: + case AtomicOrdering::Monotonic: + case AtomicOrdering::Release: return nullptr; // Nothing to do - case Acquire: - case AcquireRelease: - case SequentiallyConsistent: + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitTrailingFence"); @@ -12042,7 +12450,17 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { - return true; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement cmpxchg without spilling. If the address being exchanged is also + // on the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + return getTargetMachine().getOptLevel() != 0; +} + +bool ARMTargetLowering::shouldInsertFencesForAtomic( + const Instruction *I) const { + return InsertFencesForAtomic; } // This has so far only been implemented for MachO. @@ -12091,7 +12509,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); - bool IsAcquire = isAtLeastAcquire(Ord); + bool IsAcquire = isAcquireOrStronger(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a @@ -12135,7 +12553,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsRelease = isAtLeastRelease(Ord); + bool IsRelease = isReleaseOrStronger(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form |
