diff options
| author | 2017-03-14 08:07:52 +0000 | |
|---|---|---|
| committer | 2017-03-14 08:07:52 +0000 | |
| commit | 1cb66ada17adf0954eaadba4d02ec2470365a3ac (patch) | |
| tree | 521159d8f39562a43fffd680147eb5a71709b9b1 /gnu/llvm/lib/Target | |
| parent | Mark the sshd_config UsePrivilegeSeparation option as deprecated, (diff) | |
| download | wireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.tar.xz wireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.zip | |
Import LLVM 4.0.0 release including clang and lld.
Diffstat (limited to 'gnu/llvm/lib/Target')
49 files changed, 713 insertions, 572 deletions
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64.td b/gnu/llvm/lib/Target/AArch64/AArch64.td index 740766b151b..91c335fac32 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64.td +++ b/gnu/llvm/lib/Target/AArch64/AArch64.td @@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; -def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs", - "AvoidQuadLdStPairs", "true", - "Do not form quad load/store pair operations">; +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", @@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M1 processors", - [FeatureAvoidQuadLdStPairs, + [FeatureSlowPaired128, FeatureCRC, FeatureCrypto, FeatureCustomCheapAsMoveHandling, @@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M2/M3 processors", - [FeatureAvoidQuadLdStPairs, + [FeatureSlowPaired128, FeatureCRC, FeatureCrypto, FeatureCustomCheapAsMoveHandling, diff --git a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 9058617768d..938779d2369 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -91,7 +91,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType<v2i32>>, CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, - CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>, + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, // Big endian vectors must be passed as if they were 1-element vectors so that // their lanes are in a consistent order. @@ -138,8 +138,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>, - // A SwiftError is passed in X19. - CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>, + // A SwiftError is passed in X21. + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, @@ -289,7 +289,7 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>; + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the diff --git a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5c8acba26aa..4c789926e3e 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { return false; // On some CPUs quad load/store pairs are slower than two single load/stores. - if (Subtarget.avoidQuadLdStPairs()) { + if (Subtarget.isPaired128Slow()) { switch (MI.getOpcode()) { default: break; diff --git a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8a76c42b589..8e312dcf276 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -687,9 +687,30 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineInstrBuilder MIB; DebugLoc DL = I->getDebugLoc(); MachineBasicBlock *MBB = I->getParent(); + MachineOperand RegOp0 = getLdStRegOp(*RtMI); + MachineOperand RegOp1 = getLdStRegOp(*Rt2MI); + // Kill flags may become invalid when moving stores for pairing. + if (RegOp0.isUse()) { + if (!MergeForward) { + // Clear kill flags on store if moving upwards. Example: + // STRWui %w0, ... + // USE %w1 + // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards + RegOp0.setIsKill(false); + RegOp1.setIsKill(false); + } else { + // Clear kill flags of the first stores register. Example: + // STRWui %w1, ... + // USE kill %w1 ; need to clear kill flag when moving STRWui downwards + // STRW %w0 + unsigned Reg = getLdStRegOp(*I).getReg(); + for (MachineInstr &MI : make_range(std::next(I), Paired)) + MI.clearRegisterKills(Reg, TRI); + } + } MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc))) - .addOperand(getLdStRegOp(*RtMI)) - .addOperand(getLdStRegOp(*Rt2MI)) + .addOperand(RegOp0) + .addOperand(RegOp1) .addOperand(BaseRegOp) .addImm(OffsetImm) .setMemRefs(I->mergeMemRefsWith(*Paired)); @@ -832,9 +853,11 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, .addImm(Imms); } } - StoreI->clearRegisterKills(StRt, TRI); - (void)BitExtMI; + // Clear kill flags between store and load. + for (MachineInstr &MI : make_range(StoreI->getIterator(), + BitExtMI->getIterator())) + MI.clearRegisterKills(StRt, TRI); DEBUG(dbgs() << "Promoting load by replacing :\n "); DEBUG(StoreI->print(dbgs())); diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td index 0b2badff7cc..13022009af1 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "Enable SI Machine Scheduler" >; +// Unless +-flat-for-global is specified, turn on FlatForGlobal for +// all OS-es on VI and newer hardware to avoid assertion failures due +// to missing ADDR64 variants of MUBUF instructions. +// FIXME: moveToVALU should be able to handle converting addr64 MUBUF +// instructions. + def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "FlatForGlobal", "true", diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 4acd55eb612..974e79fff3d 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; - if (STM.isAmdCodeObjectV2()) { + if (STM.isAmdCodeObjectV2(*MF)) { getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } @@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); - if (MFI->isKernel() && STM.isAmdCodeObjectV2()) { + if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { AMDGPUTargetStreamer *TS = static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); SmallString<128> SymbolName; @@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, // FIXME: Should use getKernArgSize header.kernarg_segment_byte_size = - STM.getKernArgSegmentSize(MFI->getABIArgOffset()); + STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 2b4fc5397b1..5bf347e4865 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { unsigned Opc = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; - // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, - // omod - SDValue Ops[8]; - - SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); - SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); - SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); - CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e48c1943cb0..54caa2c5dfa 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, SDLoc SL(N); switch (Opc) { case ISD::FADD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) SDValue LHS = N0.getOperand(0); SDValue RHS = N0.getOperand(1); @@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, } case ISD::FMA: case ISD::FMAD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) SDValue LHS = N0.getOperand(0); SDValue MHS = N0.getOperand(1); @@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(KILL) + NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(SENDMSGHALT) diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 69567aa5f71..f6adceac6f1 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -119,6 +119,16 @@ protected: public: AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); + bool mayIgnoreSignedZero(SDValue Op) const { + if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only + return true; + + if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op)) + return BO->Flags.hasNoSignedZeros(); + + return false; + } + bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; bool isTruncateFree(EVT Src, EVT Dest) const override; @@ -320,6 +330,7 @@ enum NodeType : unsigned { INTERP_P2, PC_ADD_REL_OFFSET, KILL, + DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index f079c8d0c70..d7fa28bdc00 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode< // This argument to this node is a dword address. def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; +// Force dependencies for vector trunc stores +def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; + def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 74851aedbb2..c35a67de1d7 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, ParseSubtargetFeatures(GPU, FullFS); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es + // on VI and newer hardware to avoid assertion failures due to missing ADDR64 + // variants of MUBUF instructions. + if (!hasAddr64() && !FS.contains("flat-for-global")) { + FlatForGlobal = true; + } + // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? @@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const { - unsigned ImplicitBytes = getImplicitArgNumBytes(); +unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, + unsigned ExplicitArgBytes) const { + unsigned ImplicitBytes = getImplicitArgNumBytes(MF); if (ImplicitBytes == 0) return ExplicitArgBytes; diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 51ba501bddd..0e3cb7dc1f8 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -311,22 +311,31 @@ public: return EnableXNACK; } - bool isAmdCodeObjectV2() const { - return isAmdHsaOS() || isMesa3DOS(); + bool isMesaKernel(const MachineFunction &MF) const { + return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } + + // Covers VS/PS/CS graphics shaders + bool isMesaGfxShader(const MachineFunction &MF) const { + return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } + + bool isAmdCodeObjectV2(const MachineFunction &MF) const { + return isAmdHsaOS() || isMesaKernel(MF); } /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. - unsigned getExplicitKernelArgOffset() const { - return isAmdCodeObjectV2() ? 0 : 36; + unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { + return isAmdCodeObjectV2(MF) ? 0 : 36; } unsigned getAlignmentForImplicitArgPtr() const { return isAmdHsaOS() ? 8 : 4; } - unsigned getImplicitArgNumBytes() const { - if (isMesa3DOS()) + unsigned getImplicitArgNumBytes(const MachineFunction &MF) const { + if (isMesaKernel(MF)) return 16; if (isAmdHsaOS() && isOpenCLEnv()) return 32; @@ -585,7 +594,7 @@ public: return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } - unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const; + unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const; /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; diff --git a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index de7ce5cb9e4..77fee4356b6 100644 --- a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, llvm_unreachable("Unsupported private trunc store"); } - SDValue Chain = Store->getChain(); + SDValue OldChain = Store->getChain(); + bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); + // Skip dummy + SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; SDValue BasePtr = Store->getBasePtr(); SDValue Offset = Store->getOffset(); EVT MemVT = Store->getMemoryVT(); @@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, // Store dword // TODO: Can we be smarter about MachinePointerInfo? - return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + + // If we are part of expanded vector, make our neighbors depend on this store + if (VectorTrunc) { + // Make all other vector elements depend on this store + Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); + DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); + } + return NewStore; } SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Neither LOCAL nor PRIVATE can do vectors at the moment if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && VT.isVector()) { + if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + // Add an extra level of chain to isolate this vector + SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); + // TODO: can the chain be replaced without creating a new store? + SDValue NewStore = DAG.getTruncStore( + NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), + MemVT, StoreNode->getAlignment(), + StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); + StoreNode = cast<StoreSDNode>(NewStore); + } + return scalarizeVectorStore(StoreNode, DAG); } @@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Put the mask in correct place SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); - // Put the mask in correct place + // Put the value bits in correct place SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); @@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments( unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset(); + unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( diff --git a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td index 19795bdde64..9210e66b0fe 100644 --- a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; def MOV : R600_1OP <0x19, "MOV", []>; + +// This is a hack to get rid of DUMMY_CHAIN nodes. +// Most DUMMY_CHAINs should be eliminated during legalization, but undef +// values can sneak in some to selection. +let isPseudo = 1, isCodeGenOnly = 1 in { +def DUMMY_CHAIN : AMDGPUInst < + (outs), + (ins), + "DUMMY_CHAIN", + [(R600dummy_chain)] +>; +} // end let isPseudo = 1, isCodeGenOnly = 1 + + let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < diff --git a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index d0a69eafc58..0b571551588 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } @@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { - assert(ST.isAmdCodeObjectV2()); + assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)); MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, bool CopyBuffer = ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister && + ST.isAmdCodeObjectV2(MF) && ScratchRsrcReg != PreloadedPrivateBufferReg; // This needs to be careful of the copying order to avoid overwriting one of @@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .addReg(PreloadedPrivateBufferReg, RegState::Kill); } - if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) { - assert(!ST.isAmdCodeObjectV2()); + if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) { + assert(!ST.isAmdCodeObjectV2(MF)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - BuildMI(MBB, I, DL, SMovB32, Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, SMovB32, Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + if (MFI->hasPrivateMemoryInputPtr()) { + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + + if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); + + BuildMI(MBB, I, DL, Mov64, Rsrc01) + .addReg(PreloadedPrivateBufferReg) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } else { + const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); + + PointerType *PtrTy = + PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), + AMDGPUAS::CONSTANT_ADDRESS); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + auto MMO = MF.getMachineMemOperand(PtrInfo, + MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable, + 0, 0); + BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) + .addReg(PreloadedPrivateBufferReg) + .addImm(0) // offset + .addImm(0) // glc + .addMemOperand(MMO) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } + } else { + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + + BuildMI(MBB, I, DL, SMovB32, Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + } BuildMI(MBB, I, DL, SMovB32, Rsrc2) .addImm(Rsrc23 & 0xffffffff) diff --git a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9140fe6cd14..b98f9f400ee 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (!AMDGPU::isShader(CallConv)) { assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); } else { - assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() && + assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && @@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments( !Info->hasWorkItemIDZ()); } + if (Info->hasPrivateMemoryInputPtr()) { + unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI); + MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(PrivateMemoryPtrReg); + } + // FIXME: How should these inputs interact with inreg / custom SGPR inputs? if (Info->hasPrivateSegmentBuffer()) { unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); @@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (VA.isMemLoc()) { VT = Ins[i].VT; EVT MemVT = VA.getLocVT(); - const unsigned Offset = Subtarget->getExplicitKernelArgOffset() + + const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. @@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (getTargetMachine().getOptLevel() == CodeGenOpt::None) HasStackObjects = true; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects) { // If we have stack objects, we unquestionably need the private buffer // resource. For the Code Object V2 ABI, this will be the first 4 user @@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // TODO: Should this propagate fast-math-flags? switch (IntrinsicID) { + case Intrinsic::amdgcn_implicit_buffer_ptr: { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdCodeObjectV2()) { + if (!Subtarget->isAmdCodeObjectV2(MF)) { DiagnosticInfoUnsupported BadIntrin( *MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); diff --git a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td index b86c0419118..38e31e75ee6 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -997,6 +997,11 @@ def : Pat < >; def : Pat < + (i1 (trunc i16:$a)), + (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) +>; + +def : Pat < (i1 (trunc i64:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index e911817c451..ecd46b95ca6 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), - WorkItemIDZ(false) { + WorkItemIDZ(false), + PrivateMemoryInputPtr(false) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const Function *F = MF.getFunction(); @@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || MaySpill) PrivateSegmentWaveByteOffset = true; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F->hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; + } else if (ST.isMesaGfxShader(MF)) { + if (HasStackObjects || MaySpill) + PrivateMemoryInputPtr = true; } // We don't need to worry about accessing spills with flat instructions. @@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { return FlatScratchInitUserSGPR; } +unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { + PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return PrivateMemoryPtrUserSGPR; +} + SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( MachineFunction *MF, unsigned FrameIndex, diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 3b4e233cd78..6fc8d18bceb 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned ScratchRSrcReg; unsigned ScratchWaveOffsetReg; + // Input registers for non-HSA ABI + unsigned PrivateMemoryPtrUserSGPR; + // Input registers setup for the HSA ABI. // User SGPRs in allocation order. unsigned PrivateSegmentBufferUserSGPR; @@ -163,6 +166,11 @@ private: bool WorkItemIDY : 1; bool WorkItemIDZ : 1; + // Private memory buffer + // Compute directly in sgpr[0:1] + // Other shaders indirect 64-bits at sgpr[0:1] + bool PrivateMemoryInputPtr : 1; + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -198,6 +206,7 @@ public: unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); + unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI); // Add system SGPRs. unsigned addWorkGroupIDX() { @@ -302,6 +311,10 @@ public: return WorkItemIDZ; } + bool hasPrivateMemoryInputPtr() const { + return PrivateMemoryInputPtr; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } @@ -338,6 +351,10 @@ public: return QueuePtrUserSGPR; } + unsigned getPrivateMemoryPtrUserSGPR() const { + return PrivateMemoryPtrUserSGPR; + } + bool hasSpilledSGPRs() const { return HasSpilledSGPRs; } diff --git a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8c4b24a4504..a1ed5e8441d 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - assert(ST.isAmdCodeObjectV2() && - "Non-CodeObjectV2 ABI currently uses relocations"); - assert(MFI->hasPrivateSegmentBuffer()); - return MFI->PrivateSegmentBufferUserSGPR; + if (ST.isAmdCodeObjectV2(MF)) { + assert(MFI->hasPrivateSegmentBuffer()); + return MFI->PrivateSegmentBufferUserSGPR; + } + assert(MFI->hasPrivateMemoryInputPtr()); + return MFI->PrivateMemoryPtrUserSGPR; case SIRegisterInfo::KERNARG_SEGMENT_PTR: assert(MFI->hasKernargSegmentPtr()); return MFI->KernargSegmentPtrUserSGPR; diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td index a15b9ceff2f..8cae83cd9d1 100644 --- a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -607,12 +607,6 @@ def : Pat< (COPY $src) >; -def : Pat< - (i1 (trunc i16:$src)), - (COPY $src) ->; - - def : Pat < (i16 (trunc i64:$src)), (EXTRACT_SUBREG $src, sub0) diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 5efa64d25ce..c2a4d4ba99b 100644 --- a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> { } class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { + // v_div_scale_{f32|f64} do not support input modifiers. + let HasModifiers = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; + let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> { @@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { let SchedRW = [WriteFloatFMA, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } // Double precision division pre-scale. def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { let SchedRW = [WriteDouble, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>; diff --git a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 8ec9cb02813..95db35ce8ff 100644 --- a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); + // Emit the XRay table for this function. + emitXRayTable(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. diff --git a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td index 9c278a52a7f..7a7b7fede7c 100644 --- a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td @@ -26,8 +26,8 @@ def CC_ARM_APCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, @@ -51,8 +51,8 @@ def RetCC_ARM_APCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, @@ -166,8 +166,8 @@ def CC_ARM_AAPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, @@ -182,8 +182,8 @@ def RetCC_ARM_AAPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, @@ -206,8 +206,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // HFAs are passed in a contiguous block of registers, or on the stack CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, @@ -227,8 +227,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -267,8 +267,8 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; -// R6 is used to pass swifterror, remove it from CSR. -def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>; +// R8 is used to pass swifterror, remove it from CSR. +def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; diff --git a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 95fcc8dcb45..baa4e0330cf 100644 --- a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1225,16 +1225,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::tTPsoft: case ARM::TPsoft: { + const bool Thumb = Opcode == ARM::tTPsoft; + MachineInstrBuilder MIB; - if (Opcode == ARM::tTPsoft) + if (STI->genLongCalls()) { + MachineFunction *MF = MBB.getParent(); + MachineConstantPool *MCP = MF->getConstantPool(); + unsigned PCLabelID = AFI->createPICLabelUId(); + MachineConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(), + "__aeabi_read_tp", PCLabelID, 0); + unsigned Reg = MI.getOperand(0).getReg(); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get( ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__aeabi_read_tp", 0); - else + TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) + .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); + if (!Thumb) + MIB.addImm(0); + MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get( ARM::BL)) - .addExternalSymbol("__aeabi_read_tp", 0); + TII->get(Thumb ? ARM::tBLXr : ARM::BLX)); + if (Thumb) + MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.addReg(Reg, RegState::Kill); + } else { + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Thumb ? ARM::tBL : ARM::BL)); + if (Thumb) + MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.addExternalSymbol("__aeabi_read_tp", 0); + } MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp index 32b7c87e61b..0f84a235916 100644 --- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -97,171 +97,6 @@ namespace { }; } -void ARMTargetLowering::InitLibcallCallingConvs() { - // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or - // AAPCS_VFP. - for (const auto LC : { - RTLIB::SHL_I16, - RTLIB::SHL_I32, - RTLIB::SHL_I64, - RTLIB::SHL_I128, - RTLIB::SRL_I16, - RTLIB::SRL_I32, - RTLIB::SRL_I64, - RTLIB::SRL_I128, - RTLIB::SRA_I16, - RTLIB::SRA_I32, - RTLIB::SRA_I64, - RTLIB::SRA_I128, - RTLIB::MUL_I8, - RTLIB::MUL_I16, - RTLIB::MUL_I32, - RTLIB::MUL_I64, - RTLIB::MUL_I128, - RTLIB::MULO_I32, - RTLIB::MULO_I64, - RTLIB::MULO_I128, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, - RTLIB::SDIV_I32, - RTLIB::SDIV_I64, - RTLIB::SDIV_I128, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, - RTLIB::UDIV_I32, - RTLIB::UDIV_I64, - RTLIB::UDIV_I128, - RTLIB::SREM_I8, - RTLIB::SREM_I16, - RTLIB::SREM_I32, - RTLIB::SREM_I64, - RTLIB::SREM_I128, - RTLIB::UREM_I8, - RTLIB::UREM_I16, - RTLIB::UREM_I32, - RTLIB::UREM_I64, - RTLIB::UREM_I128, - RTLIB::SDIVREM_I8, - RTLIB::SDIVREM_I16, - RTLIB::SDIVREM_I32, - RTLIB::SDIVREM_I64, - RTLIB::SDIVREM_I128, - RTLIB::UDIVREM_I8, - RTLIB::UDIVREM_I16, - RTLIB::UDIVREM_I32, - RTLIB::UDIVREM_I64, - RTLIB::UDIVREM_I128, - RTLIB::NEG_I32, - RTLIB::NEG_I64, - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_F128, - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_F128, - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_F128, - RTLIB::FPEXT_F64_F128, - RTLIB::FPEXT_F32_F128, - RTLIB::FPEXT_F32_F64, - RTLIB::FPEXT_F16_F32, - RTLIB::FPROUND_F32_F16, - RTLIB::FPROUND_F64_F16, - RTLIB::FPROUND_F80_F16, - RTLIB::FPROUND_F128_F16, - RTLIB::FPROUND_F64_F32, - RTLIB::FPROUND_F80_F32, - RTLIB::FPROUND_F128_F32, - RTLIB::FPROUND_F80_F64, - RTLIB::FPROUND_F128_F64, - RTLIB::FPTOSINT_F32_I32, - RTLIB::FPTOSINT_F32_I64, - RTLIB::FPTOSINT_F32_I128, - RTLIB::FPTOSINT_F64_I32, - RTLIB::FPTOSINT_F64_I64, - RTLIB::FPTOSINT_F64_I128, - RTLIB::FPTOSINT_F80_I32, - RTLIB::FPTOSINT_F80_I64, - RTLIB::FPTOSINT_F80_I128, - RTLIB::FPTOSINT_F128_I32, - RTLIB::FPTOSINT_F128_I64, - RTLIB::FPTOSINT_F128_I128, - RTLIB::FPTOUINT_F32_I32, - RTLIB::FPTOUINT_F32_I64, - RTLIB::FPTOUINT_F32_I128, - RTLIB::FPTOUINT_F64_I32, - RTLIB::FPTOUINT_F64_I64, - RTLIB::FPTOUINT_F64_I128, - RTLIB::FPTOUINT_F80_I32, - RTLIB::FPTOUINT_F80_I64, - RTLIB::FPTOUINT_F80_I128, - RTLIB::FPTOUINT_F128_I32, - RTLIB::FPTOUINT_F128_I64, - RTLIB::FPTOUINT_F128_I128, - RTLIB::SINTTOFP_I32_F32, - RTLIB::SINTTOFP_I32_F64, - RTLIB::SINTTOFP_I32_F80, - RTLIB::SINTTOFP_I32_F128, - RTLIB::SINTTOFP_I64_F32, - RTLIB::SINTTOFP_I64_F64, - RTLIB::SINTTOFP_I64_F80, - RTLIB::SINTTOFP_I64_F128, - RTLIB::SINTTOFP_I128_F32, - RTLIB::SINTTOFP_I128_F64, - RTLIB::SINTTOFP_I128_F80, - RTLIB::SINTTOFP_I128_F128, - RTLIB::UINTTOFP_I32_F32, - RTLIB::UINTTOFP_I32_F64, - RTLIB::UINTTOFP_I32_F80, - RTLIB::UINTTOFP_I32_F128, - RTLIB::UINTTOFP_I64_F32, - RTLIB::UINTTOFP_I64_F64, - RTLIB::UINTTOFP_I64_F80, - RTLIB::UINTTOFP_I64_F128, - RTLIB::UINTTOFP_I128_F32, - RTLIB::UINTTOFP_I128_F64, - RTLIB::UINTTOFP_I128_F80, - RTLIB::UINTTOFP_I128_F128, - RTLIB::OEQ_F32, - RTLIB::OEQ_F64, - RTLIB::OEQ_F128, - RTLIB::UNE_F32, - RTLIB::UNE_F64, - RTLIB::UNE_F128, - RTLIB::OGE_F32, - RTLIB::OGE_F64, - RTLIB::OGE_F128, - RTLIB::OLT_F32, - RTLIB::OLT_F64, - RTLIB::OLT_F128, - RTLIB::OLE_F32, - RTLIB::OLE_F64, - RTLIB::OLE_F128, - RTLIB::OGT_F32, - RTLIB::OGT_F64, - RTLIB::OGT_F128, - RTLIB::UO_F32, - RTLIB::UO_F64, - RTLIB::UO_F128, - RTLIB::O_F32, - RTLIB::O_F64, - RTLIB::O_F128, - }) - setLibcallCallingConv(LC, CallingConv::ARM_AAPCS); -} - // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 @@ -349,7 +184,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - InitLibcallCallingConvs(); + if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && + !Subtarget->isTargetWatchOS()) { + const auto &E = Subtarget->getTargetTriple().getEnvironment(); + + bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF || + E == Triple::MuslEABIHF; + // Windows is a special case. Technically, we will replace all of the "GNU" + // calls with calls to MSVCRT if appropriate and adjust the calling + // convention then. + IsHFTarget = IsHFTarget || Subtarget->isTargetWindows(); + + for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) + setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), + IsHFTarget ? CallingConv::ARM_AAPCS_VFP + : CallingConv::ARM_AAPCS); + } if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. @@ -1937,7 +1787,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { + if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && + Outs[0].VT == MVT::i32) { assert(VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i32 && @@ -3176,17 +3027,20 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); } +static bool isReadOnly(const GlobalValue *GV) { + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->getBaseObject(); + return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) || + isa<Function>(GV); +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); const TargetMachine &TM = getTargetMachine(); - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GV = GA->getBaseObject(); - bool IsRO = - (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) || - isa<Function>(GV); + bool IsRO = isReadOnly(GV); // promoteToConstantPool only if not generating XO text section if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) @@ -7721,11 +7575,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: - if (Subtarget->isTargetWindows()) + if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); return LowerSDIV(Op, DAG); case ISD::UDIV: - if (Subtarget->isTargetWindows()) + if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); return LowerUDIV(Op, DAG); case ISD::ADDC: diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h index 7a7f91f4d3c..84c6eb845bb 100644 --- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h @@ -538,8 +538,6 @@ class InstrItineraryData; bool HasStandaloneRem = true; - void InitLibcallCallingConvs(); - void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); diff --git a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 97ca11ca501..d054578deb6 100644 --- a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -413,6 +413,7 @@ public: Match_RequiresDifferentOperands, Match_RequiresNoZeroRegister, Match_RequiresSameSrcAndDst, + Match_NoFCCRegisterForCurrentISA, Match_NonZeroOperandForSync, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "MipsGenAsmMatcher.inc" @@ -1461,8 +1462,6 @@ public: bool isFCCAsmReg() const { if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC)) return false; - if (!AsmParser.hasEightFccRegisters()) - return RegIdx.Index == 0; return RegIdx.Index <= 7; } bool isACCAsmReg() const { @@ -4053,6 +4052,7 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, return Match_RequiresSameSrcAndDst; } } + unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { switch (Inst.getOpcode()) { // As described by the MIPSR6 spec, daui must not use the zero operand for @@ -4131,9 +4131,15 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) return Match_RequiresDifferentOperands; return Match_Success; - default: - return Match_Success; } + + uint64_t TSFlags = getInstDesc(Inst.getOpcode()).TSFlags; + if ((TSFlags & MipsII::HasFCCRegOperand) && + (Inst.getOperand(0).getReg() != Mips::FCC0) && !hasEightFccRegisters()) + return Match_NoFCCRegisterForCurrentISA; + + return Match_Success; + } static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, @@ -4191,6 +4197,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "invalid operand ($zero) for instruction"); case Match_RequiresSameSrcAndDst: return Error(IDLoc, "source and destination must match"); + case Match_NoFCCRegisterForCurrentISA: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "non-zero fcc register doesn't exist in current ISA level"); case Match_Immz: return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'"); case Match_UImm1_0: diff --git a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 35de7b27bf1..a90db2384c4 100644 --- a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -123,7 +123,9 @@ namespace MipsII { HasForbiddenSlot = 1 << 5, /// IsPCRelativeLoad - A Load instruction with implicit source register /// ($pc) with explicit offset and destination register - IsPCRelativeLoad = 1 << 6 + IsPCRelativeLoad = 1 << 6, + /// HasFCCRegOperand - Instruction uses an $fcc<x> register. + HasFCCRegOperand = 1 << 7 }; } diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td index fc83761e409..5600f71ff68 100644 --- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td @@ -27,9 +27,20 @@ def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6; def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, - CEQS_FM_MM<0>; + CEQS_FM_MM<0> { + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; +} + def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, - CEQS_FM_MM<1>; + CEQS_FM_MM<1> { + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; +} def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6; @@ -164,6 +175,98 @@ let AdditionalPredicates = [InMicroMips] in { def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>, LW_FM_MM<0x26>; } + + multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt, + InstrItinClass itin> { + def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 0> { + let BaseOpcode = "c.f."#NAME; + let isCommutable = 1; + } + def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 1> { + let BaseOpcode = "c.un."#NAME; + let isCommutable = 1; + } + def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 2> { + let BaseOpcode = "c.eq."#NAME; + let isCommutable = 1; + } + def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 3> { + let BaseOpcode = "c.ueq."#NAME; + let isCommutable = 1; + } + def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 4> { + let BaseOpcode = "c.olt."#NAME; + } + def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 5> { + let BaseOpcode = "c.ult."#NAME; + } + def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 6> { + let BaseOpcode = "c.ole."#NAME; + } + def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 7> { + let BaseOpcode = "c.ule."#NAME; + } + def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 8> { + let BaseOpcode = "c.sf."#NAME; + let isCommutable = 1; + } + def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 9> { + let BaseOpcode = "c.ngle."#NAME; + } + def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 10> { + let BaseOpcode = "c.seq."#NAME; + let isCommutable = 1; + } + def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 11> { + let BaseOpcode = "c.ngl."#NAME; + } + def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 12> { + let BaseOpcode = "c.lt."#NAME; + } + def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 13> { + let BaseOpcode = "c.nge."#NAME; + } + def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 14> { + let BaseOpcode = "c.le."#NAME; + } + def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 15> { + let BaseOpcode = "c.ngt."#NAME; + } + } + + defm S : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>, + ISA_MIPS1_NOT_32R6_64R6; + defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>, + ISA_MIPS1_NOT_32R6_64R6, FGR_32; + let DecoderNamespace = "Mips64" in + defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>, + ISA_MIPS1_NOT_32R6_64R6, FGR_64; + + defm S_MM : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6; + defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_32; + defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_64; + + defm : BC1_ALIASES<BC1T_MM, "bc1t", BC1F_MM, "bc1f">, + ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; } //===----------------------------------------------------------------------===// diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td index 8b595f9e6c4..774976828a0 100644 --- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td @@ -766,6 +766,7 @@ class SWXC1_FM_MM<bits<9> funct> : MMArch { class CEQS_FM_MM<bits<2> fmt> : MMArch { bits<5> fs; bits<5> ft; + bits<3> fcc; bits<4> cond; bits<32> Inst; @@ -773,13 +774,17 @@ class CEQS_FM_MM<bits<2> fmt> : MMArch { let Inst{31-26} = 0x15; let Inst{25-21} = ft; let Inst{20-16} = fs; - let Inst{15-13} = 0x0; // cc + let Inst{15-13} = fcc; let Inst{12} = 0; let Inst{11-10} = fmt; let Inst{9-6} = cond; let Inst{5-0} = 0x3c; } +class C_COND_FM_MM<bits <2> fmt, bits<4> c> : CEQS_FM_MM<fmt> { + let cond = c; +} + class BC1F_FM_MM<bits<5> tf> : MMArch { bits<16> offset; diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 179695bc698..04d6529a073 100644 --- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -1037,6 +1037,22 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, // TODO: implement } +// Emit .dtprelword or .dtpreldword directive +// and value for debug thread local expression. +void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, + unsigned Size) const { + switch (Size) { + case 4: + OutStreamer->EmitDTPRel32Value(Value); + break; + case 8: + OutStreamer->EmitDTPRel64Value(Value); + break; + default: + llvm_unreachable("Unexpected size of expression value."); + } +} + // Align all targets of indirect branches on bundle size. Used only if target // is NaCl. void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) { diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h index 259e557e128..c5cf5241c23 100644 --- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h +++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h @@ -140,6 +140,7 @@ public: void EmitStartOfAsmFile(Module &M) override; void EmitEndOfAsmFile(Module &M) override; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + void EmitDebugValue(const MCExpr *Value, unsigned Size) const override; }; } diff --git a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp index 29f3e2c07e0..a44192f57aa 100644 --- a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -698,8 +698,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) { unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0); emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1); - emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg( - Mips::FCC0, RegState::ImplicitDefine); + emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg) + .addReg(RightReg); emitInst(CondMovOpc, ResultReg) .addReg(RegWithOne) .addReg(Mips::FCC0) diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td index ab7aa9dcdca..df42d56d041 100644 --- a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td +++ b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td @@ -219,6 +219,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin, let isTerminator = 1; let hasDelaySlot = DelaySlot; let Defs = [AT]; + let hasFCCRegOperand = 1; } class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin, @@ -229,41 +230,106 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin, !strconcat("c.$cond.", typestr)>, HARDFLOAT { let Defs = [FCC0]; let isCodeGenOnly = 1; + let hasFCCRegOperand = 1; } + +// Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather +// duplicating the instruction definition for MIPS1 - MIPS3, we expand +// c.cond.ft if necessary, and reject it after constructing the +// instruction if the ISA doesn't support it. class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC, InstrItinClass itin> : - InstSE<(outs), (ins RC:$fs, RC:$ft), - !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin, - FrmFR>, HARDFLOAT; + InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft), + !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin, + FrmFR>, HARDFLOAT { + let isCompare = 1; + let hasFCCRegOperand = 1; +} + multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt, InstrItinClass itin> { - def C_F_#NAME : C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM<fmt, 0>; - def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM<fmt, 1>; - def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM<fmt, 2>; - def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM<fmt, 3>; - def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM<fmt, 4>; - def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM<fmt, 5>; - def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM<fmt, 6>; - def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM<fmt, 7>; - def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM<fmt, 8>; - def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM<fmt, 9>; - def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM<fmt, 10>; - def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM<fmt, 11>; - def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM<fmt, 12>; - def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM<fmt, 13>; - def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM<fmt, 14>; - def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>; + def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, + C_COND_FM<fmt, 0> { + let BaseOpcode = "c.f."#NAME; + let isCommutable = 1; + } + def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, + C_COND_FM<fmt, 1> { + let BaseOpcode = "c.un."#NAME; + let isCommutable = 1; + } + def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, + C_COND_FM<fmt, 2> { + let BaseOpcode = "c.eq."#NAME; + let isCommutable = 1; + } + def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, + C_COND_FM<fmt, 3> { + let BaseOpcode = "c.ueq."#NAME; + let isCommutable = 1; + } + def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, + C_COND_FM<fmt, 4> { + let BaseOpcode = "c.olt."#NAME; + } + def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, + C_COND_FM<fmt, 5> { + let BaseOpcode = "c.ult."#NAME; + } + def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, + C_COND_FM<fmt, 6> { + let BaseOpcode = "c.ole."#NAME; + } + def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, + C_COND_FM<fmt, 7> { + let BaseOpcode = "c.ule."#NAME; + } + def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, + C_COND_FM<fmt, 8> { + let BaseOpcode = "c.sf."#NAME; + let isCommutable = 1; + } + def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, + C_COND_FM<fmt, 9> { + let BaseOpcode = "c.ngle."#NAME; + } + def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, + C_COND_FM<fmt, 10> { + let BaseOpcode = "c.seq."#NAME; + let isCommutable = 1; + } + def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, + C_COND_FM<fmt, 11> { + let BaseOpcode = "c.ngl."#NAME; + } + def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, + C_COND_FM<fmt, 12> { + let BaseOpcode = "c.lt."#NAME; + } + def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, + C_COND_FM<fmt, 13> { + let BaseOpcode = "c.nge."#NAME; + } + def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, + C_COND_FM<fmt, 14> { + let BaseOpcode = "c.le."#NAME; + } + def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, + C_COND_FM<fmt, 15> { + let BaseOpcode = "c.ngt."#NAME; + } } +let AdditionalPredicates = [NotInMicroMips] in { defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6; defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_32; let DecoderNamespace = "Mips64" in defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_64; - +} //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// @@ -549,13 +615,29 @@ def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>, /// Floating Point Compare let AdditionalPredicates = [NotInMicroMips] in { def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>, - ISA_MIPS1_NOT_32R6_64R6; + ISA_MIPS1_NOT_32R6_64R6 { + + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; + } def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - ISA_MIPS1_NOT_32R6_64R6, FGR_32; + ISA_MIPS1_NOT_32R6_64R6, FGR_32 { + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; + } } let DecoderNamespace = "Mips64" in def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - ISA_MIPS1_NOT_32R6_64R6, FGR_64; + ISA_MIPS1_NOT_32R6_64R6, FGR_64 { + // FIXME: This is a required to work around the fact that thiese instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; +} //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions @@ -602,15 +684,6 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd), //===----------------------------------------------------------------------===// // InstAliases. //===----------------------------------------------------------------------===// -def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>, - ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; -def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>, - ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT; -def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>, - ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; -def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>, - ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT; - def : MipsInstAlias <"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>, ISA_MIPS2, HARDFLOAT; @@ -630,6 +703,80 @@ def : MipsInstAlias def : MipsInstAlias <"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>, FGR_64, ISA_MIPS2, HARDFLOAT; + +multiclass C_COND_ALIASES<string TypeStr, RegisterOperand RC> { + def : MipsInstAlias<!strconcat("c.f.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_F_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.un.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_UN_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.eq.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_EQ_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ueq.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_UEQ_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.olt.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_OLT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ult.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_ULT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ole.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_OLE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ule.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_ULE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.sf.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_SF_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ngle.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGLE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.seq.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_SEQ_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ngl.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGL_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.lt.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_LT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.nge.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.le.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_LE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ngt.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; +} + +multiclass BC1_ALIASES<Instruction BCTrue, string BCTrueString, + Instruction BCFalse, string BCFalseString> { + def : MipsInstAlias<!strconcat(BCTrueString, " $offset"), + (BCTrue FCC0, brtarget:$offset), 1>; + + def : MipsInstAlias<!strconcat(BCFalseString, " $offset"), + (BCFalse FCC0, brtarget:$offset), 1>; +} + +let AdditionalPredicates = [NotInMicroMips] in { + defm S : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6; + defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_32; + defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_64; + + defm : BC1_ALIASES<BC1T, "bc1t", BC1F, "bc1f">, ISA_MIPS1_NOT_32R6_64R6, + HARDFLOAT; + defm : BC1_ALIASES<BC1TL, "bc1tl", BC1FL, "bc1fl">, ISA_MIPS2_NOT_32R6_64R6, + HARDFLOAT; +} //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td index 1437fb75434..817d9b44b9c 100644 --- a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -101,12 +101,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, bit IsPCRelativeLoad = 0; // Load instruction with implicit source register // ($pc) and with explicit offset and destination // register + bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is + // present in MIPS-I to MIPS-III. - // TSFlags layout should be kept in sync with MipsInstrInfo.h. + // TSFlags layout should be kept in sync with MCTargetDesc/MipsBaseInfo.h. let TSFlags{3-0} = FormBits; let TSFlags{4} = isCTI; let TSFlags{5} = hasForbiddenSlot; let TSFlags{6} = IsPCRelativeLoad; + let TSFlags{7} = hasFCCRegOperand; let DecoderNamespace = "Mips"; @@ -829,6 +832,7 @@ class BC1F_FM<bit nd, bit tf> : StdArch { class CEQS_FM<bits<5> fmt> : StdArch { bits<5> fs; bits<5> ft; + bits<3> fcc; bits<4> cond; bits<32> Inst; @@ -837,7 +841,7 @@ class CEQS_FM<bits<5> fmt> : StdArch { let Inst{25-21} = fmt; let Inst{20-16} = ft; let Inst{15-11} = fs; - let Inst{10-8} = 0; // cc + let Inst{10-8} = fcc; let Inst{7-4} = 0x3; let Inst{3-0} = cond; } diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp index fadab780612..c5d6a05d661 100644 --- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -148,3 +148,11 @@ MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL, // Otherwise, we work the same as ELF. return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align); } + +const MCExpr * +MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const { + const MCExpr *Expr = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + return MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(0x8000, getContext()), getContext()); +} diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h index e5423f9578a..a37ec154ff7 100644 --- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h +++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h @@ -42,6 +42,8 @@ class MipsTargetMachine; MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C, unsigned &Align) const override; + /// Describe a TLS variable address within debug info. + const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; }; } // end namespace llvm diff --git a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 90111bbea07..f615cc7cc97 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1508,8 +1508,14 @@ def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst), PPC970_DGroup_Single; } // hasSideEffects = 0 +def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src), + "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>; +def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src), + "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src), "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; +def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src), + "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; def : Pat<(int_ppc_dcbt xoaddr:$dst), (DCBT 0, xoaddr:$dst)>; @@ -2381,6 +2387,13 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), "mftb $RT, $SPR", IIC_SprMFTB>; +def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR), + "mfpmr $RT, $SPR", IIC_SprMFPMR>; + +def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT), + "mtpmr $SPR, $RT", IIC_SprMTPMR>; + + // A pseudo-instruction used to implement the read of the 64-bit cycle counter // on a 32-bit target. let hasSideEffects = 1, usesCustomInserter = 1 in diff --git a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td index edabe774867..d240529bc73 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -118,6 +118,8 @@ def IIC_SprTLBIE : InstrItinClass; def IIC_SprABORT : InstrItinClass; def IIC_SprMSGSYNC : InstrItinClass; def IIC_SprSTOP : InstrItinClass; +def IIC_SprMFPMR : InstrItinClass; +def IIC_SprMTPMR : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td index f687d326b52..15d5991b938 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -249,6 +249,10 @@ def PPCE500mcItineraries : ProcessorItineraries< InstrStage<5, [E500_SFX0]>], [8, 1], [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, InstrStage<4, [E500_SFX0]>], [7, 1], // Latency = 4, Repeat rate = 4 @@ -257,6 +261,10 @@ def PPCE500mcItineraries : ProcessorItineraries< InstrStage<1, [E500_SFX0, E500_SFX1]>], [4, 1], // Latency = 1, Repeat rate = 1 [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, InstrStage<4, [E500_SFX0]>], [7, 1], // Latency = 4, Repeat rate = 4 diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td index 5db886cf8f9..32f8e652dd5 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td @@ -313,20 +313,24 @@ def PPCE5500Itineraries : ProcessorItineraries< InstrStage<5, [E5500_CFX_0]>], [9, 2], // Latency = 5, Repeat rate = 5 [E5500_GPR_Bypass, E5500_CR_Bypass]>, - InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, - InstrStage<4, [E5500_SFX0]>], + InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_CFX_0]>], [8, 2], // Latency = 4, Repeat rate = 4 [E5500_GPR_Bypass, E5500_GPR_Bypass]>, InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<1, [E5500_CFX_0]>], [5], // Latency = 1, Repeat rate = 1 [E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<4, [E5500_CFX_0]>], [8, 2], // Latency = 4, Repeat rate = 4 [NoBypass, E5500_GPR_Bypass]>, InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, - InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + InstrStage<1, [E5500_CFX_0]>], [5], // Latency = 1, Repeat rate = 1 [E5500_GPR_Bypass]>, InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, diff --git a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 192b942490e..985acf92a2d 100644 --- a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -77,11 +77,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, default: return false; case X86::TCRETURNdi: - case X86::TCRETURNdicc: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: - case X86::TCRETURNdi64cc: case X86::TCRETURNri64: case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; @@ -99,10 +97,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, Offset = StackAdj - MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); - if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { - assert(Offset == 0 && "Conditional tail call cannot adjust the stack."); - } - if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); @@ -111,21 +105,12 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Jump to label or value in register. bool IsWin64 = STI->isTargetWin64(); - if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || - Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { + if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) { unsigned Op; switch (Opcode) { case X86::TCRETURNdi: Op = X86::TAILJMPd; break; - case X86::TCRETURNdicc: - Op = X86::TAILJMPd_CC; - break; - case X86::TCRETURNdi64cc: - assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder."); - // TODO: We could do it for Win64 "leaf" functions though; PR30337. - Op = X86::TAILJMPd64_CC; - break; default: // Note: Win64 uses REX prefixes indirect jumps out of functions, but // not direct ones. @@ -141,10 +126,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } - if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { - MIB.addImm(MBBI->getOperand(2).getImm()); - } - } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm diff --git a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f13b722eb3..08fe2bad281 100644 --- a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28788,10 +28788,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// If a vector select has an operand that is -1 or 0, simplify the select to a -/// bitwise logic operation. -static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +/// If a vector select has an operand that is -1 or 0, try to simplify the +/// select to a bitwise logic operation. +static SDValue +combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -28853,18 +28855,28 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, } } - if (!TValIsAllOnes && !FValIsAllZeros) + // vselect Cond, 111..., 000... -> Cond + if (TValIsAllOnes && FValIsAllZeros) + return DAG.getBitcast(VT, Cond); + + if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT)) return SDValue(); - SDValue Ret; - if (TValIsAllOnes && FValIsAllZeros) - Ret = Cond; - else if (TValIsAllOnes) - Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS)); - else if (FValIsAllZeros) - Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS)); + // vselect Cond, 111..., X -> or Cond, X + if (TValIsAllOnes) { + SDValue CastRHS = DAG.getBitcast(CondVT, RHS); + SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); + return DAG.getBitcast(VT, Or); + } + + // vselect Cond, X, 000... -> and Cond, X + if (FValIsAllZeros) { + SDValue CastLHS = DAG.getBitcast(CondVT, LHS); + SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); + return DAG.getBitcast(VT, And); + } - return DAG.getBitcast(VT, Ret); + return SDValue(); } static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { @@ -29353,7 +29365,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } } - if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget)) + if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) return V; // If this is a *dynamic* select (non-constant condition) and we can match @@ -31260,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } -/// Check if truncation with saturation form type \p SrcVT to \p DstVT -/// is valid for the given \p Subtarget. -static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - // FIXME: Scalar type may be supported if we move it to vector register. - if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512) - return false; - - EVT SrcElVT = SrcVT.getScalarType(); - EVT DstElVT = DstVT.getScalarType(); - if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64) - return false; - if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32) - return false; - if (SrcVT.is512BitVector() || Subtarget.hasVLX()) - return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); - return false; -} - -/// Return true if VPACK* instruction can be used for the given types -/// and it is avalable on \p Subtarget. -static bool -isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { - if (Subtarget.hasSSE2()) - // v16i16 -> v16i8 - if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8) - return true; - if (Subtarget.hasSSE41()) - // v8i32 -> v8i16 - if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16) - return true; - return false; -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectUSatPattern(SDValue In, EVT VT) { - if (In.getOpcode() != ISD::UMIN) - return SDValue(); - - //Saturation with truncation. We truncate from InVT to VT. - assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() && - "Unexpected types for truncate operation"); - - APInt C; - if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { - // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according - // the element size of the destination type. - return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) : - SDValue(); - } - return SDValue(); -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// The types should allow to use VPMOVUS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512USatPattern(SDValue In, EVT VT, - const X86Subtarget &Subtarget) { - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectUSatPattern(In, VT); -} - -static SDValue -combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDValue USatVal = detectUSatPattern(In, VT); - if (USatVal) { - if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL); - return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi); - } - } - return SDValue(); -} - /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -31913,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); - if (SDValue Val = - detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget)) - return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -32539,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; - // Try to combine truncation with unsigned saturation. - if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) - return Val; - // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { @@ -33778,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, } } - // Try to synthesize horizontal adds from adds of shuffles. + // Try to synthesize horizontal subs from subs of shuffles. EVT VT = N->getValueType(0); if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && - isHorizontalBinOp(Op0, Op1, true)) + isHorizontalBinOp(Op0, Op1, false)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); return OptimizeConditionalInDecrement(N, DAG); diff --git a/gnu/llvm/lib/Target/X86/X86InstrControl.td b/gnu/llvm/lib/Target/X86/X86InstrControl.td index 4ea223e82be..2f260c48df4 100644 --- a/gnu/llvm/lib/Target/X86/X86InstrControl.td +++ b/gnu/llvm/lib/Target/X86/X86InstrControl.td @@ -264,21 +264,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, "jmp{l}\t{*}$dst", [], IIC_JMP_MEM>; } -// Conditional tail calls are similar to the above, but they are branches -// rather than barriers, and they use EFLAGS. -let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [ESP, EFLAGS] in { - def TCRETURNdicc : PseudoI<(outs), - (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; - - // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i32imm_pcrel:$dst, i32imm:$cond), - "", - [], IIC_JMP_REL>; -} - //===----------------------------------------------------------------------===// // Call Instructions... @@ -340,19 +325,3 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; } } - -// Conditional tail calls are similar to the above, but they are branches -// rather than barriers, and they use EFLAGS. -let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [RSP, EFLAGS] in { - def TCRETURNdi64cc : PseudoI<(outs), - (ins i64i32imm_pcrel:$dst, i32imm:$offset, - i32imm:$cond), []>; - - // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst, i32imm:$cond), - "", - [], IIC_JMP_REL>; -} diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp index e3484d062bc..627b6120b04 100644 --- a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5108,85 +5108,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { return !isPredicated(MI); } -bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - case X86::TCRETURNdi: - case X86::TCRETURNri: - case X86::TCRETURNmi: - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - return true; - default: - return false; - } -} - -bool X86InstrInfo::canMakeTailCallConditional( - SmallVectorImpl<MachineOperand> &BranchCond, - const MachineInstr &TailCall) const { - if (TailCall.getOpcode() != X86::TCRETURNdi && - TailCall.getOpcode() != X86::TCRETURNdi64) { - // Only direct calls can be done with a conditional branch. - return false; - } - - if (Subtarget.isTargetWin64()) { - // Conditional tail calls confuse the Win64 unwinder. - // TODO: Allow them for "leaf" functions; PR30337. - return false; - } - - assert(BranchCond.size() == 1); - if (BranchCond[0].getImm() > X86::LAST_VALID_COND) { - // Can't make a conditional tail call with this condition. - return false; - } - - const X86MachineFunctionInfo *X86FI = - TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>(); - if (X86FI->getTCReturnAddrDelta() != 0 || - TailCall.getOperand(1).getImm() != 0) { - // A conditional tail call cannot do any stack adjustment. - return false; - } - - return true; -} - -void X86InstrInfo::replaceBranchWithTailCall( - MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond, - const MachineInstr &TailCall) const { - assert(canMakeTailCallConditional(BranchCond, TailCall)); - - MachineBasicBlock::iterator I = MBB.end(); - while (I != MBB.begin()) { - --I; - if (I->isDebugValue()) - continue; - if (!I->isBranch()) - assert(0 && "Can't find the branch to replace!"); - - X86::CondCode CC = getCondFromBranchOpc(I->getOpcode()); - assert(BranchCond.size() == 1); - if (CC != BranchCond[0].getImm()) - continue; - - break; - } - - unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc - : X86::TCRETURNdi64cc; - - auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc)); - MIB->addOperand(TailCall.getOperand(0)); // Destination. - MIB.addImm(0); // Stack offset (not used). - MIB->addOperand(BranchCond[0]); // Condition. - MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters. - - I->eraseFromParent(); -} - // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may // not be a fallthrough MBB now due to layout changes). Return nullptr if the // fallthrough MBB cannot be identified. diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.h b/gnu/llvm/lib/Target/X86/X86InstrInfo.h index 8d746172dcb..acfdef4da7a 100644 --- a/gnu/llvm/lib/Target/X86/X86InstrInfo.h +++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.h @@ -316,13 +316,6 @@ public: // Branch analysis. bool isUnpredicatedTerminator(const MachineInstr &MI) const override; - bool isUnconditionalTailCall(const MachineInstr &MI) const override; - bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond, - const MachineInstr &TailCall) const override; - void replaceBranchWithTailCall(MachineBasicBlock &MBB, - SmallVectorImpl<MachineOperand> &Cond, - const MachineInstr &TailCall) const override; - bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, diff --git a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp index a38a4b30b77..feeb2fd5993 100644 --- a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -498,16 +498,11 @@ ReSimplify: break; } - // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction. + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction. { unsigned Opcode; case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode; case X86::TAILJMPd: case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode; - case X86::TAILJMPd_CC: - case X86::TAILJMPd64_CC: - Opcode = X86::GetCondBranchFromCond( - static_cast<X86::CondCode>(MI->getOperand(1).getImm())); - goto SetTailJmpOpcode; SetTailJmpOpcode: MCOperand Saved = OutMI.getOperand(0); @@ -1281,11 +1276,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::TAILJMPr: case X86::TAILJMPm: case X86::TAILJMPd: - case X86::TAILJMPd_CC: case X86::TAILJMPr64: case X86::TAILJMPm64: case X86::TAILJMPd64: - case X86::TAILJMPd64_CC: case X86::TAILJMPr64_REX: case X86::TAILJMPm64_REX: // Lower these as normal, but add some comments. diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp index 727ff70c3ff..586bb7bd7b1 100644 --- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp @@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) stackAlignment = 16; - - assert((!isPMULLDSlow() || hasSSE41()) && - "Feature Slow PMULLD can only be set on a subtarget with SSE4.1"); } void X86Subtarget::initializeEnvironment() { |
