summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Target
diff options
context:
space:
mode:
authorpatrick <patrick@openbsd.org>2017-03-14 08:07:52 +0000
committerpatrick <patrick@openbsd.org>2017-03-14 08:07:52 +0000
commit1cb66ada17adf0954eaadba4d02ec2470365a3ac (patch)
tree521159d8f39562a43fffd680147eb5a71709b9b1 /gnu/llvm/lib/Target
parentMark the sshd_config UsePrivilegeSeparation option as deprecated, (diff)
downloadwireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.tar.xz
wireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.zip
Import LLVM 4.0.0 release including clang and lld.
Diffstat (limited to 'gnu/llvm/lib/Target')
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64.td9
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td8
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp2
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp31
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPU.td6
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp6
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp10
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp7
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h11
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td3
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp12
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h23
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp30
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/R600Instructions.td14
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp58
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp18
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIInstructions.td5
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp15
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h17
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp10
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td6
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td6
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp3
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMCallingConv.td28
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp34
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp202
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp17
-rw-r--r--gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h4
-rw-r--r--gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td107
-rw-r--r--gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td7
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp16
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h1
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsFastISel.cpp4
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsInstrFPU.td211
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsInstrFormats.td8
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp8
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h2
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td13
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCSchedule.td2
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td8
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td10
-rw-r--r--gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp21
-rw-r--r--gnu/llvm/lib/Target/X86/X86ISelLowering.cpp141
-rw-r--r--gnu/llvm/lib/Target/X86/X86InstrControl.td31
-rw-r--r--gnu/llvm/lib/Target/X86/X86InstrInfo.cpp79
-rw-r--r--gnu/llvm/lib/Target/X86/X86InstrInfo.h7
-rw-r--r--gnu/llvm/lib/Target/X86/X86MCInstLower.cpp9
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.cpp3
49 files changed, 713 insertions, 572 deletions
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64.td b/gnu/llvm/lib/Target/AArch64/AArch64.td
index 740766b151b..91c335fac32 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64.td
+++ b/gnu/llvm/lib/Target/AArch64/AArch64.td
@@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
"Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
-def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
- "AvoidQuadLdStPairs", "true",
- "Do not form quad load/store pair operations">;
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+ "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
@@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M1 processors",
- [FeatureAvoidQuadLdStPairs,
+ [FeatureSlowPaired128,
FeatureCRC,
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
@@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M2/M3 processors",
- [FeatureAvoidQuadLdStPairs,
+ [FeatureSlowPaired128,
FeatureCRC,
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 9058617768d..938779d2369 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -91,7 +91,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
// their lanes are in a consistent order.
@@ -138,8 +138,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
- // A SwiftError is passed in X19.
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+ // A SwiftError is passed in X21.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
@@ -289,7 +289,7 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
def CSR_AArch64_AAPCS_SwiftError
- : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>;
+ : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5c8acba26aa..4c789926e3e 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
return false;
// On some CPUs quad load/store pairs are slower than two single load/stores.
- if (Subtarget.avoidQuadLdStPairs()) {
+ if (Subtarget.isPaired128Slow()) {
switch (MI.getOpcode()) {
default:
break;
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8a76c42b589..8e312dcf276 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -687,9 +687,30 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineInstrBuilder MIB;
DebugLoc DL = I->getDebugLoc();
MachineBasicBlock *MBB = I->getParent();
+ MachineOperand RegOp0 = getLdStRegOp(*RtMI);
+ MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
+ // Kill flags may become invalid when moving stores for pairing.
+ if (RegOp0.isUse()) {
+ if (!MergeForward) {
+ // Clear kill flags on store if moving upwards. Example:
+ // STRWui %w0, ...
+ // USE %w1
+ // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
+ RegOp0.setIsKill(false);
+ RegOp1.setIsKill(false);
+ } else {
+ // Clear kill flags of the first stores register. Example:
+ // STRWui %w1, ...
+ // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
+ // STRW %w0
+ unsigned Reg = getLdStRegOp(*I).getReg();
+ for (MachineInstr &MI : make_range(std::next(I), Paired))
+ MI.clearRegisterKills(Reg, TRI);
+ }
+ }
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
- .addOperand(getLdStRegOp(*RtMI))
- .addOperand(getLdStRegOp(*Rt2MI))
+ .addOperand(RegOp0)
+ .addOperand(RegOp1)
.addOperand(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
@@ -832,9 +853,11 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
.addImm(Imms);
}
}
- StoreI->clearRegisterKills(StRt, TRI);
- (void)BitExtMI;
+ // Clear kill flags between store and load.
+ for (MachineInstr &MI : make_range(StoreI->getIterator(),
+ BitExtMI->getIterator()))
+ MI.clearRegisterKills(StRt, TRI);
DEBUG(dbgs() << "Promoting load by replacing :\n ");
DEBUG(StoreI->print(dbgs()));
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0b2badff7cc..13022009af1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
"Enable SI Machine Scheduler"
>;
+// Unless +-flat-for-global is specified, turn on FlatForGlobal for
+// all OS-es on VI and newer hardware to avoid assertion failures due
+// to missing ADDR64 variants of MUBUF instructions.
+// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
+// instructions.
+
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal",
"true",
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 4acd55eb612..974e79fff3d 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
- if (STM.isAmdCodeObjectV2()) {
+ if (STM.isAmdCodeObjectV2(*MF)) {
getSIProgramInfo(KernelInfo, *MF);
EmitAmdKernelCodeT(*MF, KernelInfo);
}
@@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
- if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
+ if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
SmallString<128> SymbolName;
@@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
// FIXME: Should use getKernArgSize
header.kernarg_segment_byte_size =
- STM.getKernArgSegmentSize(MFI->getABIArgOffset());
+ STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2b4fc5397b1..5bf347e4865 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
- // omod
- SDValue Ops[8];
-
- SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
- SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
- SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
- CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e48c1943cb0..54caa2c5dfa 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDLoc SL(N);
switch (Opc) {
case ISD::FADD: {
+ if (!mayIgnoreSignedZero(N0))
+ return SDValue();
+
// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
SDValue LHS = N0.getOperand(0);
SDValue RHS = N0.getOperand(1);
@@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
}
case ISD::FMA:
case ISD::FMAD: {
+ if (!mayIgnoreSignedZero(N0))
+ return SDValue();
+
// (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
SDValue LHS = N0.getOperand(0);
SDValue MHS = N0.getOperand(1);
@@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(KILL)
+ NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(SENDMSGHALT)
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 69567aa5f71..f6adceac6f1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -119,6 +119,16 @@ protected:
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
+ bool mayIgnoreSignedZero(SDValue Op) const {
+ if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+ return true;
+
+ if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
+ return BO->Flags.hasNoSignedZeros();
+
+ return false;
+ }
+
bool isFAbsFree(EVT VT) const override;
bool isFNegFree(EVT VT) const override;
bool isTruncateFree(EVT Src, EVT Dest) const override;
@@ -320,6 +330,7 @@ enum NodeType : unsigned {
INTERP_P2,
PC_ADD_REL_OFFSET,
KILL,
+ DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index f079c8d0c70..d7fa28bdc00 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 74851aedbb2..c35a67de1d7 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
ParseSubtargetFeatures(GPU, FullFS);
+ // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+ // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+ // variants of MUBUF instructions.
+ if (!hasAddr64() && !FS.contains("flat-for-global")) {
+ FlatForGlobal = true;
+ }
+
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
@@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
-unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
- unsigned ImplicitBytes = getImplicitArgNumBytes();
+unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplicitArgBytes) const {
+ unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
if (ImplicitBytes == 0)
return ExplicitArgBytes;
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 51ba501bddd..0e3cb7dc1f8 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -311,22 +311,31 @@ public:
return EnableXNACK;
}
- bool isAmdCodeObjectV2() const {
- return isAmdHsaOS() || isMesa3DOS();
+ bool isMesaKernel(const MachineFunction &MF) const {
+ return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ }
+
+ // Covers VS/PS/CS graphics shaders
+ bool isMesaGfxShader(const MachineFunction &MF) const {
+ return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ }
+
+ bool isAmdCodeObjectV2(const MachineFunction &MF) const {
+ return isAmdHsaOS() || isMesaKernel(MF);
}
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
- unsigned getExplicitKernelArgOffset() const {
- return isAmdCodeObjectV2() ? 0 : 36;
+ unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+ return isAmdCodeObjectV2(MF) ? 0 : 36;
}
unsigned getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? 8 : 4;
}
- unsigned getImplicitArgNumBytes() const {
- if (isMesa3DOS())
+ unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
+ if (isMesaKernel(MF))
return 16;
if (isAmdHsaOS() && isOpenCLEnv())
return 32;
@@ -585,7 +594,7 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
- unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
+ unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index de7ce5cb9e4..77fee4356b6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
llvm_unreachable("Unsupported private trunc store");
}
- SDValue Chain = Store->getChain();
+ SDValue OldChain = Store->getChain();
+ bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
+ // Skip dummy
+ SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
SDValue BasePtr = Store->getBasePtr();
SDValue Offset = Store->getOffset();
EVT MemVT = Store->getMemoryVT();
@@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
// Store dword
// TODO: Can we be smarter about MachinePointerInfo?
- return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+ SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+
+ // If we are part of expanded vector, make our neighbors depend on this store
+ if (VectorTrunc) {
+ // Make all other vector elements depend on this store
+ Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
+ }
+ return NewStore;
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Neither LOCAL nor PRIVATE can do vectors at the moment
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
+ if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+ // Add an extra level of chain to isolate this vector
+ SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
+ // TODO: can the chain be replaced without creating a new store?
+ SDValue NewStore = DAG.getTruncStore(
+ NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
+ MemVT, StoreNode->getAlignment(),
+ StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
+ StoreNode = cast<StoreSDNode>(NewStore);
+ }
+
return scalarizeVectorStore(StoreNode, DAG);
}
@@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Put the mask in correct place
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
- // Put the mask in correct place
+ // Put the value bits in correct place
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
@@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
- unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
+ unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(
diff --git a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
index 19795bdde64..9210e66b0fe 100644
--- a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
def MOV : R600_1OP <0x19, "MOV", []>;
+
+// This is a hack to get rid of DUMMY_CHAIN nodes.
+// Most DUMMY_CHAINs should be eliminated during legalization, but undef
+// values can sneak in some to selection.
+let isPseudo = 1, isCodeGenOnly = 1 in {
+def DUMMY_CHAIN : AMDGPUInst <
+ (outs),
+ (ins),
+ "DUMMY_CHAIN",
+ [(R600dummy_chain)]
+>;
+} // end let isPseudo = 1, isCodeGenOnly = 1
+
+
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d0a69eafc58..0b571551588 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
}
@@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
- assert(ST.isAmdCodeObjectV2());
+ assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
MRI.addLiveIn(PreloadedPrivateBufferReg);
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
@@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
bool CopyBuffer = ResourceRegUsed &&
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
+ ST.isAmdCodeObjectV2(MF) &&
ScratchRsrcReg != PreloadedPrivateBufferReg;
// This needs to be careful of the copying order to avoid overwriting one of
@@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
- assert(!ST.isAmdCodeObjectV2());
+ if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
+ assert(!ST.isAmdCodeObjectV2(MF));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
- BuildMI(MBB, I, DL, SMovB32, Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
- BuildMI(MBB, I, DL, SMovB32, Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ if (MFI->hasPrivateMemoryInputPtr()) {
+ unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+
+ if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
+ const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
+
+ BuildMI(MBB, I, DL, Mov64, Rsrc01)
+ .addReg(PreloadedPrivateBufferReg)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ } else {
+ const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
+
+ PointerType *PtrTy =
+ PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+ AMDGPUAS::CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ auto MMO = MF.getMachineMemOperand(PtrInfo,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable,
+ 0, 0);
+ BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
+ .addReg(PreloadedPrivateBufferReg)
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addMemOperand(MMO)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ }
+ } else {
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ }
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
.addImm(Rsrc23 & 0xffffffff)
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9140fe6cd14..b98f9f400ee 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (!AMDGPU::isShader(CallConv)) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
- assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
+ assert(!Info->hasDispatchPtr() &&
!Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
@@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments(
!Info->hasWorkItemIDZ());
}
+ if (Info->hasPrivateMemoryInputPtr()) {
+ unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
+ MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+ CCInfo.AllocateReg(PrivateMemoryPtrReg);
+ }
+
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info->hasPrivateSegmentBuffer()) {
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
@@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
- const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+ const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
@@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
HasStackObjects = true;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// TODO: Should this propagate fast-math-flags?
switch (IntrinsicID) {
+ case Intrinsic::amdgcn_implicit_buffer_ptr: {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+ }
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
- if (!Subtarget->isAmdCodeObjectV2()) {
+ if (!Subtarget->isAmdCodeObjectV2(MF)) {
DiagnosticInfoUnsupported BadIntrin(
*MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc());
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
index b86c0419118..38e31e75ee6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -997,6 +997,11 @@ def : Pat <
>;
def : Pat <
+ (i1 (trunc i16:$a)),
+ (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
+>;
+
+def : Pat <
(i1 (trunc i64:$a)),
(V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e911817c451..ecd46b95ca6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateSegmentWaveByteOffset(false),
WorkItemIDX(false),
WorkItemIDY(false),
- WorkItemIDZ(false) {
+ WorkItemIDZ(false),
+ PrivateMemoryInputPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
@@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (HasStackObjects || MaySpill)
PrivateSegmentWaveByteOffset = true;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F->hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
+ } else if (ST.isMesaGfxShader(MF)) {
+ if (HasStackObjects || MaySpill)
+ PrivateMemoryInputPtr = true;
}
// We don't need to worry about accessing spills with flat instructions.
@@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return FlatScratchInitUserSGPR;
}
+unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
+ PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return PrivateMemoryPtrUserSGPR;
+}
+
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
MachineFunction *MF,
unsigned FrameIndex,
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 3b4e233cd78..6fc8d18bceb 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned ScratchRSrcReg;
unsigned ScratchWaveOffsetReg;
+ // Input registers for non-HSA ABI
+ unsigned PrivateMemoryPtrUserSGPR;
+
// Input registers setup for the HSA ABI.
// User SGPRs in allocation order.
unsigned PrivateSegmentBufferUserSGPR;
@@ -163,6 +166,11 @@ private:
bool WorkItemIDY : 1;
bool WorkItemIDZ : 1;
+ // Private memory buffer
+ // Compute directly in sgpr[0:1]
+ // Other shaders indirect 64-bits at sgpr[0:1]
+ bool PrivateMemoryInputPtr : 1;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -198,6 +206,7 @@ public:
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
unsigned addDispatchID(const SIRegisterInfo &TRI);
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
+ unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
unsigned addWorkGroupIDX() {
@@ -302,6 +311,10 @@ public:
return WorkItemIDZ;
}
+ bool hasPrivateMemoryInputPtr() const {
+ return PrivateMemoryInputPtr;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -338,6 +351,10 @@ public:
return QueuePtrUserSGPR;
}
+ unsigned getPrivateMemoryPtrUserSGPR() const {
+ return PrivateMemoryPtrUserSGPR;
+ }
+
bool hasSpilledSGPRs() const {
return HasSpilledSGPRs;
}
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8c4b24a4504..a1ed5e8441d 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
- assert(ST.isAmdCodeObjectV2() &&
- "Non-CodeObjectV2 ABI currently uses relocations");
- assert(MFI->hasPrivateSegmentBuffer());
- return MFI->PrivateSegmentBufferUserSGPR;
+ if (ST.isAmdCodeObjectV2(MF)) {
+ assert(MFI->hasPrivateSegmentBuffer());
+ return MFI->PrivateSegmentBufferUserSGPR;
+ }
+ assert(MFI->hasPrivateMemoryInputPtr());
+ return MFI->PrivateMemoryPtrUserSGPR;
case SIRegisterInfo::KERNARG_SEGMENT_PTR:
assert(MFI->hasKernargSegmentPtr());
return MFI->KernargSegmentPtrUserSGPR;
diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a15b9ceff2f..8cae83cd9d1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -607,12 +607,6 @@ def : Pat<
(COPY $src)
>;
-def : Pat<
- (i1 (trunc i16:$src)),
- (COPY $src)
->;
-
-
def : Pat <
(i16 (trunc i64:$src)),
(EXTRACT_SUBREG $src, sub0)
diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5efa64d25ce..c2a4d4ba99b 100644
--- a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
}
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+ // v_div_scale_{f32|f64} do not support input modifiers.
+ let HasModifiers = 0;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
- let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
+ let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
}
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
let hasExtraSrcRegAllocReq = 1;
+ let AsmMatchConverter = "";
}
// Double precision division pre-scale.
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
let SchedRW = [WriteDouble, WriteSALU];
let hasExtraSrcRegAllocReq = 1;
+ let AsmMatchConverter = "";
}
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
diff --git a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 8ec9cb02813..95db35ce8ff 100644
--- a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
+ // Emit the XRay table for this function.
+ emitXRayTable();
+
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.
diff --git a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
index 9c278a52a7f..7a7b7fede7c 100644
--- a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -26,8 +26,8 @@ def CC_ARM_APCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -51,8 +51,8 @@ def RetCC_ARM_APCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -166,8 +166,8 @@ def CC_ARM_AAPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -182,8 +182,8 @@ def RetCC_ARM_AAPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -206,8 +206,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// HFAs are passed in a contiguous block of registers, or on the stack
CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
@@ -227,8 +227,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -267,8 +267,8 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
-// R6 is used to pass swifterror, remove it from CSR.
-def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>;
+// R8 is used to pass swifterror, remove it from CSR.
+def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
diff --git a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 95fcc8dcb45..baa4e0330cf 100644
--- a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1225,16 +1225,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case ARM::tTPsoft:
case ARM::TPsoft: {
+ const bool Thumb = Opcode == ARM::tTPsoft;
+
MachineInstrBuilder MIB;
- if (Opcode == ARM::tTPsoft)
+ if (STI->genLongCalls()) {
+ MachineFunction *MF = MBB.getParent();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ unsigned PCLabelID = AFI->createPICLabelUId();
+ MachineConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(),
+ "__aeabi_read_tp", PCLabelID, 0);
+ unsigned Reg = MI.getOperand(0).getReg();
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get( ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__aeabi_read_tp", 0);
- else
+ TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
+ .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+ if (!Thumb)
+ MIB.addImm(0);
+ MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get( ARM::BL))
- .addExternalSymbol("__aeabi_read_tp", 0);
+ TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
+ if (Thumb)
+ MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.addReg(Reg, RegState::Kill);
+ } else {
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Thumb ? ARM::tBL : ARM::BL));
+ if (Thumb)
+ MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.addExternalSymbol("__aeabi_read_tp", 0);
+ }
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 32b7c87e61b..0f84a235916 100644
--- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -97,171 +97,6 @@ namespace {
};
}
-void ARMTargetLowering::InitLibcallCallingConvs() {
- // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or
- // AAPCS_VFP.
- for (const auto LC : {
- RTLIB::SHL_I16,
- RTLIB::SHL_I32,
- RTLIB::SHL_I64,
- RTLIB::SHL_I128,
- RTLIB::SRL_I16,
- RTLIB::SRL_I32,
- RTLIB::SRL_I64,
- RTLIB::SRL_I128,
- RTLIB::SRA_I16,
- RTLIB::SRA_I32,
- RTLIB::SRA_I64,
- RTLIB::SRA_I128,
- RTLIB::MUL_I8,
- RTLIB::MUL_I16,
- RTLIB::MUL_I32,
- RTLIB::MUL_I64,
- RTLIB::MUL_I128,
- RTLIB::MULO_I32,
- RTLIB::MULO_I64,
- RTLIB::MULO_I128,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16,
- RTLIB::SDIV_I32,
- RTLIB::SDIV_I64,
- RTLIB::SDIV_I128,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16,
- RTLIB::UDIV_I32,
- RTLIB::UDIV_I64,
- RTLIB::UDIV_I128,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16,
- RTLIB::SREM_I32,
- RTLIB::SREM_I64,
- RTLIB::SREM_I128,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16,
- RTLIB::UREM_I32,
- RTLIB::UREM_I64,
- RTLIB::UREM_I128,
- RTLIB::SDIVREM_I8,
- RTLIB::SDIVREM_I16,
- RTLIB::SDIVREM_I32,
- RTLIB::SDIVREM_I64,
- RTLIB::SDIVREM_I128,
- RTLIB::UDIVREM_I8,
- RTLIB::UDIVREM_I16,
- RTLIB::UDIVREM_I32,
- RTLIB::UDIVREM_I64,
- RTLIB::UDIVREM_I128,
- RTLIB::NEG_I32,
- RTLIB::NEG_I64,
- RTLIB::ADD_F32,
- RTLIB::ADD_F64,
- RTLIB::ADD_F80,
- RTLIB::ADD_F128,
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::MUL_F32,
- RTLIB::MUL_F64,
- RTLIB::MUL_F80,
- RTLIB::MUL_F128,
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_F128,
- RTLIB::POWI_F32,
- RTLIB::POWI_F64,
- RTLIB::POWI_F80,
- RTLIB::POWI_F128,
- RTLIB::FPEXT_F64_F128,
- RTLIB::FPEXT_F32_F128,
- RTLIB::FPEXT_F32_F64,
- RTLIB::FPEXT_F16_F32,
- RTLIB::FPROUND_F32_F16,
- RTLIB::FPROUND_F64_F16,
- RTLIB::FPROUND_F80_F16,
- RTLIB::FPROUND_F128_F16,
- RTLIB::FPROUND_F64_F32,
- RTLIB::FPROUND_F80_F32,
- RTLIB::FPROUND_F128_F32,
- RTLIB::FPROUND_F80_F64,
- RTLIB::FPROUND_F128_F64,
- RTLIB::FPTOSINT_F32_I32,
- RTLIB::FPTOSINT_F32_I64,
- RTLIB::FPTOSINT_F32_I128,
- RTLIB::FPTOSINT_F64_I32,
- RTLIB::FPTOSINT_F64_I64,
- RTLIB::FPTOSINT_F64_I128,
- RTLIB::FPTOSINT_F80_I32,
- RTLIB::FPTOSINT_F80_I64,
- RTLIB::FPTOSINT_F80_I128,
- RTLIB::FPTOSINT_F128_I32,
- RTLIB::FPTOSINT_F128_I64,
- RTLIB::FPTOSINT_F128_I128,
- RTLIB::FPTOUINT_F32_I32,
- RTLIB::FPTOUINT_F32_I64,
- RTLIB::FPTOUINT_F32_I128,
- RTLIB::FPTOUINT_F64_I32,
- RTLIB::FPTOUINT_F64_I64,
- RTLIB::FPTOUINT_F64_I128,
- RTLIB::FPTOUINT_F80_I32,
- RTLIB::FPTOUINT_F80_I64,
- RTLIB::FPTOUINT_F80_I128,
- RTLIB::FPTOUINT_F128_I32,
- RTLIB::FPTOUINT_F128_I64,
- RTLIB::FPTOUINT_F128_I128,
- RTLIB::SINTTOFP_I32_F32,
- RTLIB::SINTTOFP_I32_F64,
- RTLIB::SINTTOFP_I32_F80,
- RTLIB::SINTTOFP_I32_F128,
- RTLIB::SINTTOFP_I64_F32,
- RTLIB::SINTTOFP_I64_F64,
- RTLIB::SINTTOFP_I64_F80,
- RTLIB::SINTTOFP_I64_F128,
- RTLIB::SINTTOFP_I128_F32,
- RTLIB::SINTTOFP_I128_F64,
- RTLIB::SINTTOFP_I128_F80,
- RTLIB::SINTTOFP_I128_F128,
- RTLIB::UINTTOFP_I32_F32,
- RTLIB::UINTTOFP_I32_F64,
- RTLIB::UINTTOFP_I32_F80,
- RTLIB::UINTTOFP_I32_F128,
- RTLIB::UINTTOFP_I64_F32,
- RTLIB::UINTTOFP_I64_F64,
- RTLIB::UINTTOFP_I64_F80,
- RTLIB::UINTTOFP_I64_F128,
- RTLIB::UINTTOFP_I128_F32,
- RTLIB::UINTTOFP_I128_F64,
- RTLIB::UINTTOFP_I128_F80,
- RTLIB::UINTTOFP_I128_F128,
- RTLIB::OEQ_F32,
- RTLIB::OEQ_F64,
- RTLIB::OEQ_F128,
- RTLIB::UNE_F32,
- RTLIB::UNE_F64,
- RTLIB::UNE_F128,
- RTLIB::OGE_F32,
- RTLIB::OGE_F64,
- RTLIB::OGE_F128,
- RTLIB::OLT_F32,
- RTLIB::OLT_F64,
- RTLIB::OLT_F128,
- RTLIB::OLE_F32,
- RTLIB::OLE_F64,
- RTLIB::OLE_F128,
- RTLIB::OGT_F32,
- RTLIB::OGT_F64,
- RTLIB::OGT_F128,
- RTLIB::UO_F32,
- RTLIB::UO_F64,
- RTLIB::UO_F128,
- RTLIB::O_F32,
- RTLIB::O_F64,
- RTLIB::O_F128,
- })
- setLibcallCallingConv(LC, CallingConv::ARM_AAPCS);
-}
-
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -349,7 +184,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- InitLibcallCallingConvs();
+ if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
+ !Subtarget->isTargetWatchOS()) {
+ const auto &E = Subtarget->getTargetTriple().getEnvironment();
+
+ bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
+ E == Triple::MuslEABIHF;
+ // Windows is a special case. Technically, we will replace all of the "GNU"
+ // calls with calls to MSVCRT if appropriate and adjust the calling
+ // convention then.
+ IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
+
+ for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
+ setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
+ IsHFTarget ? CallingConv::ARM_AAPCS_VFP
+ : CallingConv::ARM_AAPCS);
+ }
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
@@ -1937,7 +1787,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
- if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
+ Outs[0].VT == MVT::i32) {
assert(VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
@@ -3176,17 +3027,20 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
}
+static bool isReadOnly(const GlobalValue *GV) {
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->getBaseObject();
+ return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
+ isa<Function>(GV);
+}
+
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const TargetMachine &TM = getTargetMachine();
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->getBaseObject();
- bool IsRO =
- (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
- isa<Function>(GV);
+ bool IsRO = isReadOnly(GV);
// promoteToConstantPool only if not generating XO text section
if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
@@ -7721,11 +7575,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV:
- if (Subtarget->isTargetWindows())
+ if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ true);
return LowerSDIV(Op, DAG);
case ISD::UDIV:
- if (Subtarget->isTargetWindows())
+ if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
return LowerUDIV(Op, DAG);
case ISD::ADDC:
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
index 7a7f91f4d3c..84c6eb845bb 100644
--- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -538,8 +538,6 @@ class InstrItineraryData;
bool HasStandaloneRem = true;
- void InitLibcallCallingConvs();
-
void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
diff --git a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 97ca11ca501..d054578deb6 100644
--- a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -413,6 +413,7 @@ public:
Match_RequiresDifferentOperands,
Match_RequiresNoZeroRegister,
Match_RequiresSameSrcAndDst,
+ Match_NoFCCRegisterForCurrentISA,
Match_NonZeroOperandForSync,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "MipsGenAsmMatcher.inc"
@@ -1461,8 +1462,6 @@ public:
bool isFCCAsmReg() const {
if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
return false;
- if (!AsmParser.hasEightFccRegisters())
- return RegIdx.Index == 0;
return RegIdx.Index <= 7;
}
bool isACCAsmReg() const {
@@ -4053,6 +4052,7 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
return Match_RequiresSameSrcAndDst;
}
}
+
unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
switch (Inst.getOpcode()) {
// As described by the MIPSR6 spec, daui must not use the zero operand for
@@ -4131,9 +4131,15 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
return Match_RequiresDifferentOperands;
return Match_Success;
- default:
- return Match_Success;
}
+
+ uint64_t TSFlags = getInstDesc(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & MipsII::HasFCCRegOperand) &&
+ (Inst.getOperand(0).getReg() != Mips::FCC0) && !hasEightFccRegisters())
+ return Match_NoFCCRegisterForCurrentISA;
+
+ return Match_Success;
+
}
static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
@@ -4191,6 +4197,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "invalid operand ($zero) for instruction");
case Match_RequiresSameSrcAndDst:
return Error(IDLoc, "source and destination must match");
+ case Match_NoFCCRegisterForCurrentISA:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "non-zero fcc register doesn't exist in current ISA level");
case Match_Immz:
return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
case Match_UImm1_0:
diff --git a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 35de7b27bf1..a90db2384c4 100644
--- a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -123,7 +123,9 @@ namespace MipsII {
HasForbiddenSlot = 1 << 5,
/// IsPCRelativeLoad - A Load instruction with implicit source register
/// ($pc) with explicit offset and destination register
- IsPCRelativeLoad = 1 << 6
+ IsPCRelativeLoad = 1 << 6,
+ /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
+ HasFCCRegOperand = 1 << 7
};
}
diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index fc83761e409..5600f71ff68 100644
--- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -27,9 +27,20 @@ def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6;
def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
- CEQS_FM_MM<0>;
+ CEQS_FM_MM<0> {
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+}
+
def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
- CEQS_FM_MM<1>;
+ CEQS_FM_MM<1> {
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+}
def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
@@ -164,6 +175,98 @@ let AdditionalPredicates = [InMicroMips] in {
def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>,
LW_FM_MM<0x26>;
}
+
+ multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt,
+ InstrItinClass itin> {
+ def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 0> {
+ let BaseOpcode = "c.f."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 1> {
+ let BaseOpcode = "c.un."#NAME;
+ let isCommutable = 1;
+ }
+ def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 2> {
+ let BaseOpcode = "c.eq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 3> {
+ let BaseOpcode = "c.ueq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 4> {
+ let BaseOpcode = "c.olt."#NAME;
+ }
+ def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 5> {
+ let BaseOpcode = "c.ult."#NAME;
+ }
+ def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 6> {
+ let BaseOpcode = "c.ole."#NAME;
+ }
+ def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 7> {
+ let BaseOpcode = "c.ule."#NAME;
+ }
+ def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 8> {
+ let BaseOpcode = "c.sf."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 9> {
+ let BaseOpcode = "c.ngle."#NAME;
+ }
+ def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 10> {
+ let BaseOpcode = "c.seq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 11> {
+ let BaseOpcode = "c.ngl."#NAME;
+ }
+ def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 12> {
+ let BaseOpcode = "c.lt."#NAME;
+ }
+ def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 13> {
+ let BaseOpcode = "c.nge."#NAME;
+ }
+ def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 14> {
+ let BaseOpcode = "c.le."#NAME;
+ }
+ def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 15> {
+ let BaseOpcode = "c.ngt."#NAME;
+ }
+ }
+
+ defm S : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>,
+ ISA_MIPS1_NOT_32R6_64R6;
+ defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ let DecoderNamespace = "Mips64" in
+ defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+ defm S_MM : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6;
+ defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+ defm : BC1_ALIASES<BC1T_MM, "bc1t", BC1F_MM, "bc1f">,
+ ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
}
//===----------------------------------------------------------------------===//
diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index 8b595f9e6c4..774976828a0 100644
--- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -766,6 +766,7 @@ class SWXC1_FM_MM<bits<9> funct> : MMArch {
class CEQS_FM_MM<bits<2> fmt> : MMArch {
bits<5> fs;
bits<5> ft;
+ bits<3> fcc;
bits<4> cond;
bits<32> Inst;
@@ -773,13 +774,17 @@ class CEQS_FM_MM<bits<2> fmt> : MMArch {
let Inst{31-26} = 0x15;
let Inst{25-21} = ft;
let Inst{20-16} = fs;
- let Inst{15-13} = 0x0; // cc
+ let Inst{15-13} = fcc;
let Inst{12} = 0;
let Inst{11-10} = fmt;
let Inst{9-6} = cond;
let Inst{5-0} = 0x3c;
}
+class C_COND_FM_MM<bits <2> fmt, bits<4> c> : CEQS_FM_MM<fmt> {
+ let cond = c;
+}
+
class BC1F_FM_MM<bits<5> tf> : MMArch {
bits<16> offset;
diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 179695bc698..04d6529a073 100644
--- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1037,6 +1037,22 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// TODO: implement
}
+// Emit .dtprelword or .dtpreldword directive
+// and value for debug thread local expression.
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value,
+ unsigned Size) const {
+ switch (Size) {
+ case 4:
+ OutStreamer->EmitDTPRel32Value(Value);
+ break;
+ case 8:
+ OutStreamer->EmitDTPRel64Value(Value);
+ break;
+ default:
+ llvm_unreachable("Unexpected size of expression value.");
+ }
+}
+
// Align all targets of indirect branches on bundle size. Used only if target
// is NaCl.
void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 259e557e128..c5cf5241c23 100644
--- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -140,6 +140,7 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+ void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
};
}
diff --git a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
index 29f3e2c07e0..a44192f57aa 100644
--- a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -698,8 +698,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
- emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg(
- Mips::FCC0, RegState::ImplicitDefine);
+ emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg)
+ .addReg(RightReg);
emitInst(CondMovOpc, ResultReg)
.addReg(RegWithOne)
.addReg(Mips::FCC0)
diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
index ab7aa9dcdca..df42d56d041 100644
--- a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -219,6 +219,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
let isTerminator = 1;
let hasDelaySlot = DelaySlot;
let Defs = [AT];
+ let hasFCCRegOperand = 1;
}
class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
@@ -229,41 +230,106 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
!strconcat("c.$cond.", typestr)>, HARDFLOAT {
let Defs = [FCC0];
let isCodeGenOnly = 1;
+ let hasFCCRegOperand = 1;
}
+
+// Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather
+// duplicating the instruction definition for MIPS1 - MIPS3, we expand
+// c.cond.ft if necessary, and reject it after constructing the
+// instruction if the ISA doesn't support it.
class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC,
InstrItinClass itin> :
- InstSE<(outs), (ins RC:$fs, RC:$ft),
- !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin,
- FrmFR>, HARDFLOAT;
+ InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft),
+ !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin,
+ FrmFR>, HARDFLOAT {
+ let isCompare = 1;
+ let hasFCCRegOperand = 1;
+}
+
multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
InstrItinClass itin> {
- def C_F_#NAME : C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM<fmt, 0>;
- def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM<fmt, 1>;
- def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM<fmt, 2>;
- def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM<fmt, 3>;
- def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM<fmt, 4>;
- def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM<fmt, 5>;
- def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM<fmt, 6>;
- def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM<fmt, 7>;
- def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM<fmt, 8>;
- def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM<fmt, 9>;
- def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM<fmt, 10>;
- def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM<fmt, 11>;
- def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM<fmt, 12>;
- def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM<fmt, 13>;
- def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM<fmt, 14>;
- def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>;
+ def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 0> {
+ let BaseOpcode = "c.f."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 1> {
+ let BaseOpcode = "c.un."#NAME;
+ let isCommutable = 1;
+ }
+ def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 2> {
+ let BaseOpcode = "c.eq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 3> {
+ let BaseOpcode = "c.ueq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 4> {
+ let BaseOpcode = "c.olt."#NAME;
+ }
+ def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 5> {
+ let BaseOpcode = "c.ult."#NAME;
+ }
+ def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 6> {
+ let BaseOpcode = "c.ole."#NAME;
+ }
+ def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 7> {
+ let BaseOpcode = "c.ule."#NAME;
+ }
+ def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 8> {
+ let BaseOpcode = "c.sf."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 9> {
+ let BaseOpcode = "c.ngle."#NAME;
+ }
+ def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 10> {
+ let BaseOpcode = "c.seq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 11> {
+ let BaseOpcode = "c.ngl."#NAME;
+ }
+ def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 12> {
+ let BaseOpcode = "c.lt."#NAME;
+ }
+ def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 13> {
+ let BaseOpcode = "c.nge."#NAME;
+ }
+ def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 14> {
+ let BaseOpcode = "c.le."#NAME;
+ }
+ def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 15> {
+ let BaseOpcode = "c.ngt."#NAME;
+ }
}
+let AdditionalPredicates = [NotInMicroMips] in {
defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
FGR_32;
let DecoderNamespace = "Mips64" in
defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
FGR_64;
-
+}
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
@@ -549,13 +615,29 @@ def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
/// Floating Point Compare
let AdditionalPredicates = [NotInMicroMips] in {
def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
- ISA_MIPS1_NOT_32R6_64R6;
+ ISA_MIPS1_NOT_32R6_64R6 {
+
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+ }
def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32 {
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+ }
}
let DecoderNamespace = "Mips64" in
def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64 {
+ // FIXME: This is a required to work around the fact that thiese instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+}
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
@@ -602,15 +684,6 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd),
//===----------------------------------------------------------------------===//
// InstAliases.
//===----------------------------------------------------------------------===//
-def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
- ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
- ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
- ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
- ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-
def : MipsInstAlias
<"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
ISA_MIPS2, HARDFLOAT;
@@ -630,6 +703,80 @@ def : MipsInstAlias
def : MipsInstAlias
<"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
FGR_64, ISA_MIPS2, HARDFLOAT;
+
+multiclass C_COND_ALIASES<string TypeStr, RegisterOperand RC> {
+ def : MipsInstAlias<!strconcat("c.f.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_F_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.un.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_UN_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.eq.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_EQ_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ueq.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_UEQ_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.olt.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_OLT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ult.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_ULT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ole.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_OLE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ule.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_ULE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.sf.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_SF_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ngle.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGLE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.seq.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_SEQ_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ngl.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGL_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.lt.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_LT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.nge.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.le.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_LE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ngt.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+}
+
+multiclass BC1_ALIASES<Instruction BCTrue, string BCTrueString,
+ Instruction BCFalse, string BCFalseString> {
+ def : MipsInstAlias<!strconcat(BCTrueString, " $offset"),
+ (BCTrue FCC0, brtarget:$offset), 1>;
+
+ def : MipsInstAlias<!strconcat(BCFalseString, " $offset"),
+ (BCFalse FCC0, brtarget:$offset), 1>;
+}
+
+let AdditionalPredicates = [NotInMicroMips] in {
+ defm S : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6;
+ defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+ defm : BC1_ALIASES<BC1T, "bc1t", BC1F, "bc1f">, ISA_MIPS1_NOT_32R6_64R6,
+ HARDFLOAT;
+ defm : BC1_ALIASES<BC1TL, "bc1tl", BC1FL, "bc1fl">, ISA_MIPS2_NOT_32R6_64R6,
+ HARDFLOAT;
+}
//===----------------------------------------------------------------------===//
// Floating Point Patterns
//===----------------------------------------------------------------------===//
diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
index 1437fb75434..817d9b44b9c 100644
--- a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -101,12 +101,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
// ($pc) and with explicit offset and destination
// register
+ bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is
+ // present in MIPS-I to MIPS-III.
- // TSFlags layout should be kept in sync with MipsInstrInfo.h.
+ // TSFlags layout should be kept in sync with MCTargetDesc/MipsBaseInfo.h.
let TSFlags{3-0} = FormBits;
let TSFlags{4} = isCTI;
let TSFlags{5} = hasForbiddenSlot;
let TSFlags{6} = IsPCRelativeLoad;
+ let TSFlags{7} = hasFCCRegOperand;
let DecoderNamespace = "Mips";
@@ -829,6 +832,7 @@ class BC1F_FM<bit nd, bit tf> : StdArch {
class CEQS_FM<bits<5> fmt> : StdArch {
bits<5> fs;
bits<5> ft;
+ bits<3> fcc;
bits<4> cond;
bits<32> Inst;
@@ -837,7 +841,7 @@ class CEQS_FM<bits<5> fmt> : StdArch {
let Inst{25-21} = fmt;
let Inst{20-16} = ft;
let Inst{15-11} = fs;
- let Inst{10-8} = 0; // cc
+ let Inst{10-8} = fcc;
let Inst{7-4} = 0x3;
let Inst{3-0} = cond;
}
diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index fadab780612..c5d6a05d661 100644
--- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -148,3 +148,11 @@ MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL,
// Otherwise, we work the same as ELF.
return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
}
+
+const MCExpr *
+MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ return MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+}
diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index e5423f9578a..a37ec154ff7 100644
--- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -42,6 +42,8 @@ class MipsTargetMachine;
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
unsigned &Align) const override;
+ /// Describe a TLS variable address within debug info.
+ const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
} // end namespace llvm
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 90111bbea07..f615cc7cc97 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1508,8 +1508,14 @@ def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
PPC970_DGroup_Single;
} // hasSideEffects = 0
+def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src),
+ "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>;
+def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src),
+ "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
"icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src),
+ "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
def : Pat<(int_ppc_dcbt xoaddr:$dst),
(DCBT 0, xoaddr:$dst)>;
@@ -2381,6 +2387,13 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
"mftb $RT, $SPR", IIC_SprMFTB>;
+def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfpmr $RT, $SPR", IIC_SprMFPMR>;
+
+def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
+ "mtpmr $SPR, $RT", IIC_SprMTPMR>;
+
+
// A pseudo-instruction used to implement the read of the 64-bit cycle counter
// on a 32-bit target.
let hasSideEffects = 1, usesCustomInserter = 1 in
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
index edabe774867..d240529bc73 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -118,6 +118,8 @@ def IIC_SprTLBIE : InstrItinClass;
def IIC_SprABORT : InstrItinClass;
def IIC_SprMSGSYNC : InstrItinClass;
def IIC_SprSTOP : InstrItinClass;
+def IIC_SprMFPMR : InstrItinClass;
+def IIC_SprMTPMR : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index f687d326b52..15d5991b938 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -249,6 +249,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
InstrStage<5, [E500_SFX0]>],
[8, 1],
[E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<4, [E500_SFX0]>],
[7, 1], // Latency = 4, Repeat rate = 4
@@ -257,6 +261,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
InstrStage<1, [E500_SFX0, E500_SFX1]>],
[4, 1], // Latency = 1, Repeat rate = 1
[E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<4, [E500_SFX0]>],
[7, 1], // Latency = 4, Repeat rate = 4
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 5db886cf8f9..32f8e652dd5 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -313,20 +313,24 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<5, [E5500_CFX_0]>],
[9, 2], // Latency = 5, Repeat rate = 5
[E5500_GPR_Bypass, E5500_CR_Bypass]>,
- InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
- InstrStage<4, [E5500_SFX0]>],
+ InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_CFX_0]>],
[8, 2], // Latency = 4, Repeat rate = 4
[E5500_GPR_Bypass, E5500_GPR_Bypass]>,
InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_CFX_0]>],
[5], // Latency = 1, Repeat rate = 1
[E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<4, [E5500_CFX_0]>],
[8, 2], // Latency = 4, Repeat rate = 4
[NoBypass, E5500_GPR_Bypass]>,
InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
- InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ InstrStage<1, [E5500_CFX_0]>],
[5], // Latency = 1, Repeat rate = 1
[E5500_GPR_Bypass]>,
InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
diff --git a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 192b942490e..985acf92a2d 100644
--- a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -77,11 +77,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
default:
return false;
case X86::TCRETURNdi:
- case X86::TCRETURNdicc:
case X86::TCRETURNri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
- case X86::TCRETURNdi64cc:
case X86::TCRETURNri64:
case X86::TCRETURNmi64: {
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
@@ -99,10 +97,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Offset = StackAdj - MaxTCDelta;
assert(Offset >= 0 && "Offset should never be negative");
- if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
- assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
- }
-
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
@@ -111,21 +105,12 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Jump to label or value in register.
bool IsWin64 = STI->isTargetWin64();
- if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
- Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
+ if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
unsigned Op;
switch (Opcode) {
case X86::TCRETURNdi:
Op = X86::TAILJMPd;
break;
- case X86::TCRETURNdicc:
- Op = X86::TAILJMPd_CC;
- break;
- case X86::TCRETURNdi64cc:
- assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder.");
- // TODO: We could do it for Win64 "leaf" functions though; PR30337.
- Op = X86::TAILJMPd64_CC;
- break;
default:
// Note: Win64 uses REX prefixes indirect jumps out of functions, but
// not direct ones.
@@ -141,10 +126,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
- if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
- MIB.addImm(MBBI->getOperand(2).getImm());
- }
-
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
unsigned Op = (Opcode == X86::TCRETURNmi)
? X86::TAILJMPm
diff --git a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f13b722eb3..08fe2bad281 100644
--- a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28788,10 +28788,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// If a vector select has an operand that is -1 or 0, simplify the select to a
-/// bitwise logic operation.
-static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+/// If a vector select has an operand that is -1 or 0, try to simplify the
+/// select to a bitwise logic operation.
+static SDValue
+combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
@@ -28853,18 +28855,28 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
}
}
- if (!TValIsAllOnes && !FValIsAllZeros)
+ // vselect Cond, 111..., 000... -> Cond
+ if (TValIsAllOnes && FValIsAllZeros)
+ return DAG.getBitcast(VT, Cond);
+
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT))
return SDValue();
- SDValue Ret;
- if (TValIsAllOnes && FValIsAllZeros)
- Ret = Cond;
- else if (TValIsAllOnes)
- Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
- else if (FValIsAllZeros)
- Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS));
+ // vselect Cond, 111..., X -> or Cond, X
+ if (TValIsAllOnes) {
+ SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
+ SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
+ return DAG.getBitcast(VT, Or);
+ }
+
+ // vselect Cond, X, 000... -> and Cond, X
+ if (FValIsAllZeros) {
+ SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
+ SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
+ return DAG.getBitcast(VT, And);
+ }
- return DAG.getBitcast(VT, Ret);
+ return SDValue();
}
static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
@@ -29353,7 +29365,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
+ if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
return V;
// If this is a *dynamic* select (non-constant condition) and we can match
@@ -31260,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
-/// Check if truncation with saturation form type \p SrcVT to \p DstVT
-/// is valid for the given \p Subtarget.
-static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
- const X86Subtarget &Subtarget) {
- if (!Subtarget.hasAVX512())
- return false;
-
- // FIXME: Scalar type may be supported if we move it to vector register.
- if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
- return false;
-
- EVT SrcElVT = SrcVT.getScalarType();
- EVT DstElVT = DstVT.getScalarType();
- if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
- return false;
- if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
- return false;
- if (SrcVT.is512BitVector() || Subtarget.hasVLX())
- return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
- return false;
-}
-
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
- if (Subtarget.hasSSE2())
- // v16i16 -> v16i8
- if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
- return true;
- if (Subtarget.hasSSE41())
- // v8i32 -> v8i16
- if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
- return true;
- return false;
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectUSatPattern(SDValue In, EVT VT) {
- if (In.getOpcode() != ISD::UMIN)
- return SDValue();
-
- //Saturation with truncation. We truncate from InVT to VT.
- assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
- "Unexpected types for truncate operation");
-
- APInt C;
- if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
- // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
- // the element size of the destination type.
- return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
- SDValue();
- }
- return SDValue();
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// The types should allow to use VPMOVUS* instruction on AVX512.
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
- const X86Subtarget &Subtarget) {
- if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
- return SDValue();
- return detectUSatPattern(In, VT);
-}
-
-static SDValue
-combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- SDValue USatVal = detectUSatPattern(In, VT);
- if (USatVal) {
- if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
- return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
- if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
- return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
- }
- }
- return SDValue();
-}
-
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31913,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
- if (SDValue Val =
- detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
- return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -32539,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
- // Try to combine truncation with unsigned saturation.
- if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
- return Val;
-
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
@@ -33778,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
}
}
- // Try to synthesize horizontal adds from adds of shuffles.
+ // Try to synthesize horizontal subs from subs of shuffles.
EVT VT = N->getValueType(0);
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
- isHorizontalBinOp(Op0, Op1, true))
+ isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
diff --git a/gnu/llvm/lib/Target/X86/X86InstrControl.td b/gnu/llvm/lib/Target/X86/X86InstrControl.td
index 4ea223e82be..2f260c48df4 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrControl.td
+++ b/gnu/llvm/lib/Target/X86/X86InstrControl.td
@@ -264,21 +264,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
}
-// Conditional tail calls are similar to the above, but they are branches
-// rather than barriers, and they use EFLAGS.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
- isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
- let Uses = [ESP, EFLAGS] in {
- def TCRETURNdicc : PseudoI<(outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
-
- // This gets substituted to a conditional jump instruction in MC lowering.
- def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
- (ins i32imm_pcrel:$dst, i32imm:$cond),
- "",
- [], IIC_JMP_REL>;
-}
-
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -340,19 +325,3 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
}
}
-
-// Conditional tail calls are similar to the above, but they are branches
-// rather than barriers, and they use EFLAGS.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
- isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
- let Uses = [RSP, EFLAGS] in {
- def TCRETURNdi64cc : PseudoI<(outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset,
- i32imm:$cond), []>;
-
- // This gets substituted to a conditional jump instruction in MC lowering.
- def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$cond),
- "",
- [], IIC_JMP_REL>;
-}
diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
index e3484d062bc..627b6120b04 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5108,85 +5108,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
return !isPredicated(MI);
}
-bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- case X86::TCRETURNdi:
- case X86::TCRETURNri:
- case X86::TCRETURNmi:
- case X86::TCRETURNdi64:
- case X86::TCRETURNri64:
- case X86::TCRETURNmi64:
- return true;
- default:
- return false;
- }
-}
-
-bool X86InstrInfo::canMakeTailCallConditional(
- SmallVectorImpl<MachineOperand> &BranchCond,
- const MachineInstr &TailCall) const {
- if (TailCall.getOpcode() != X86::TCRETURNdi &&
- TailCall.getOpcode() != X86::TCRETURNdi64) {
- // Only direct calls can be done with a conditional branch.
- return false;
- }
-
- if (Subtarget.isTargetWin64()) {
- // Conditional tail calls confuse the Win64 unwinder.
- // TODO: Allow them for "leaf" functions; PR30337.
- return false;
- }
-
- assert(BranchCond.size() == 1);
- if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
- // Can't make a conditional tail call with this condition.
- return false;
- }
-
- const X86MachineFunctionInfo *X86FI =
- TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>();
- if (X86FI->getTCReturnAddrDelta() != 0 ||
- TailCall.getOperand(1).getImm() != 0) {
- // A conditional tail call cannot do any stack adjustment.
- return false;
- }
-
- return true;
-}
-
-void X86InstrInfo::replaceBranchWithTailCall(
- MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
- const MachineInstr &TailCall) const {
- assert(canMakeTailCallConditional(BranchCond, TailCall));
-
- MachineBasicBlock::iterator I = MBB.end();
- while (I != MBB.begin()) {
- --I;
- if (I->isDebugValue())
- continue;
- if (!I->isBranch())
- assert(0 && "Can't find the branch to replace!");
-
- X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
- assert(BranchCond.size() == 1);
- if (CC != BranchCond[0].getImm())
- continue;
-
- break;
- }
-
- unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
- : X86::TCRETURNdi64cc;
-
- auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
- MIB->addOperand(TailCall.getOperand(0)); // Destination.
- MIB.addImm(0); // Stack offset (not used).
- MIB->addOperand(BranchCond[0]); // Condition.
- MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
-
- I->eraseFromParent();
-}
-
// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
// not be a fallthrough MBB now due to layout changes). Return nullptr if the
// fallthrough MBB cannot be identified.
diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.h b/gnu/llvm/lib/Target/X86/X86InstrInfo.h
index 8d746172dcb..acfdef4da7a 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.h
@@ -316,13 +316,6 @@ public:
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
- bool isUnconditionalTailCall(const MachineInstr &MI) const override;
- bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
- const MachineInstr &TailCall) const override;
- void replaceBranchWithTailCall(MachineBasicBlock &MBB,
- SmallVectorImpl<MachineOperand> &Cond,
- const MachineInstr &TailCall) const override;
-
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
diff --git a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
index a38a4b30b77..feeb2fd5993 100644
--- a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -498,16 +498,11 @@ ReSimplify:
break;
}
- // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction.
{ unsigned Opcode;
case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode;
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode;
- case X86::TAILJMPd_CC:
- case X86::TAILJMPd64_CC:
- Opcode = X86::GetCondBranchFromCond(
- static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
- goto SetTailJmpOpcode;
SetTailJmpOpcode:
MCOperand Saved = OutMI.getOperand(0);
@@ -1281,11 +1276,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
- case X86::TAILJMPd_CC:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
case X86::TAILJMPd64:
- case X86::TAILJMPd64_CC:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
// Lower these as normal, but add some comments.
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index 727ff70c3ff..586bb7bd7b1 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
isTargetKFreeBSD() || In64BitMode)
stackAlignment = 16;
-
- assert((!isPMULLDSlow() || hasSSE41()) &&
- "Feature Slow PMULLD can only be set on a subtarget with SSE4.1");
}
void X86Subtarget::initializeEnvironment() {