Import LLVM 4.0.0 release including clang and lld.

author: patrick <patrick@openbsd.org> 2017-03-14 08:07:52 +0000
committer: patrick <patrick@openbsd.org> 2017-03-14 08:07:52 +0000
commit: 1cb66ada17adf0954eaadba4d02ec2470365a3ac (patch)
tree: 521159d8f39562a43fffd680147eb5a71709b9b1 /gnu/llvm/lib/Target
parent: Mark the sshd_config UsePrivilegeSeparation option as deprecated, (diff)
download: wireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.tar.xz
wireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.zip
49 files changed, 713 insertions, 572 deletions
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64.td b/gnu/llvm/lib/Target/AArch64/AArch64.td
index 740766b151b..91c335fac32 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64.td
+++ b/gnu/llvm/lib/Target/AArch64/AArch64.td
@@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
 def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
     "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
 
-def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
-    "AvoidQuadLdStPairs", "true",
-    "Do not form quad load/store pair operations">;
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+    "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
 
 def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
     "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
@@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
 
 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
                                     "Samsung Exynos-M1 processors",
-                                    [FeatureAvoidQuadLdStPairs,
+                                    [FeatureSlowPaired128,
                                      FeatureCRC,
                                      FeatureCrypto,
                                      FeatureCustomCheapAsMoveHandling,
@@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
 
 def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
                                     "Samsung Exynos-M2/M3 processors",
-                                    [FeatureAvoidQuadLdStPairs,
+                                    [FeatureSlowPaired128,
                                      FeatureCRC,
                                      FeatureCrypto,
                                      FeatureCustomCheapAsMoveHandling,
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 9058617768d..938779d2369 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -91,7 +91,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
 
-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
 
   // Big endian vectors must be passed as if they were 1-element vectors so that
   // their lanes are in a consistent order.
@@ -138,8 +138,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
 
-  // A SwiftError is passed in X19.
-  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+  // A SwiftError is passed in X21.
+  CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
 
   CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
 
@@ -289,7 +289,7 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
 def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
 
 def CSR_AArch64_AAPCS_SwiftError
-    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>;
+    : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
 
 // The function used by Darwin to obtain the address of a thread-local variable
 // guarantees more than a normal AAPCS function. x16 and x17 are used on the
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5c8acba26aa..4c789926e3e 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
     return false;
 
   // On some CPUs quad load/store pairs are slower than two single load/stores.
-  if (Subtarget.avoidQuadLdStPairs()) {
+  if (Subtarget.isPaired128Slow()) {
     switch (MI.getOpcode()) {
     default:
       break;
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8a76c42b589..8e312dcf276 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -687,9 +687,30 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
   MachineInstrBuilder MIB;
   DebugLoc DL = I->getDebugLoc();
   MachineBasicBlock *MBB = I->getParent();
+  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
+  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
+  // Kill flags may become invalid when moving stores for pairing.
+  if (RegOp0.isUse()) {
+    if (!MergeForward) {
+      // Clear kill flags on store if moving upwards. Example:
+      //   STRWui %w0, ...
+      //   USE %w1
+      //   STRWui kill %w1  ; need to clear kill flag when moving STRWui upwards
+      RegOp0.setIsKill(false);
+      RegOp1.setIsKill(false);
+    } else {
+      // Clear kill flags of the first stores register. Example:
+      //   STRWui %w1, ...
+      //   USE kill %w1   ; need to clear kill flag when moving STRWui downwards
+      //   STRW %w0
+      unsigned Reg = getLdStRegOp(*I).getReg();
+      for (MachineInstr &MI : make_range(std::next(I), Paired))
+        MI.clearRegisterKills(Reg, TRI);
+    }
+  }
   MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
-            .addOperand(getLdStRegOp(*RtMI))
-            .addOperand(getLdStRegOp(*Rt2MI))
+            .addOperand(RegOp0)
+            .addOperand(RegOp1)
             .addOperand(BaseRegOp)
             .addImm(OffsetImm)
             .setMemRefs(I->mergeMemRefsWith(*Paired));
@@ -832,9 +853,11 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
               .addImm(Imms);
     }
   }
-  StoreI->clearRegisterKills(StRt, TRI);
 
-  (void)BitExtMI;
+  // Clear kill flags between store and load.
+  for (MachineInstr &MI : make_range(StoreI->getIterator(),
+                                     BitExtMI->getIterator()))
+    MI.clearRegisterKills(StRt, TRI);
 
   DEBUG(dbgs() << "Promoting load by replacing :\n    ");
   DEBUG(StoreI->print(dbgs()));
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0b2badff7cc..13022009af1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
   "Enable SI Machine Scheduler"
 >;
 
+// Unless +-flat-for-global is specified, turn on FlatForGlobal for
+// all OS-es on VI and newer hardware to avoid assertion failures due
+// to missing ADDR64 variants of MUBUF instructions.
+// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
+// instructions.
+
 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
   "FlatForGlobal",
   "true",
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 4acd55eb612..974e79fff3d 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
-  if (STM.isAmdCodeObjectV2()) {
+  if (STM.isAmdCodeObjectV2(*MF)) {
     getSIProgramInfo(KernelInfo, *MF);
     EmitAmdKernelCodeT(*MF, KernelInfo);
   }
@@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
 void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
-  if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
+  if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
     AMDGPUTargetStreamer *TS =
         static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
     SmallString<128> SymbolName;
@@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
 
   // FIXME: Should use getKernArgSize
   header.kernarg_segment_byte_size =
-      STM.getKernArgSegmentSize(MFI->getABIArgOffset());
+    STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
   header.wavefront_sgpr_count = KernelInfo.NumSGPR;
   header.workitem_vgpr_count = KernelInfo.NumVGPR;
   header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2b4fc5397b1..5bf347e4865 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
   unsigned Opc
     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
 
-  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
-  // omod
-  SDValue Ops[8];
-
-  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
-  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
-  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
-  CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 }
 
 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e48c1943cb0..54caa2c5dfa 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   SDLoc SL(N);
   switch (Opc) {
   case ISD::FADD: {
+    if (!mayIgnoreSignedZero(N0))
+      return SDValue();
+
     // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
     SDValue LHS = N0.getOperand(0);
     SDValue RHS = N0.getOperand(1);
@@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   }
   case ISD::FMA:
   case ISD::FMAD: {
+    if (!mayIgnoreSignedZero(N0))
+      return SDValue();
+
     // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
     SDValue LHS = N0.getOperand(0);
     SDValue MHS = N0.getOperand(1);
@@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CONST_DATA_PTR)
   NODE_NAME_CASE(PC_ADD_REL_OFFSET)
   NODE_NAME_CASE(KILL)
+  NODE_NAME_CASE(DUMMY_CHAIN)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
   NODE_NAME_CASE(SENDMSG)
   NODE_NAME_CASE(SENDMSGHALT)
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 69567aa5f71..f6adceac6f1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -119,6 +119,16 @@ protected:
 public:
   AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
 
+  bool mayIgnoreSignedZero(SDValue Op) const {
+    if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+      return true;
+
+    if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
+      return BO->Flags.hasNoSignedZeros();
+
+    return false;
+  }
+
   bool isFAbsFree(EVT VT) const override;
   bool isFNegFree(EVT VT) const override;
   bool isTruncateFree(EVT Src, EVT Dest) const override;
@@ -320,6 +330,7 @@ enum NodeType : unsigned {
   INTERP_P2,
   PC_ADD_REL_OFFSET,
   KILL,
+  DUMMY_CHAIN,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index f079c8d0c70..d7fa28bdc00 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
 // This argument to this node is a dword address.
 def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
 def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
 def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
 
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 74851aedbb2..c35a67de1d7 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
 
   ParseSubtargetFeatures(GPU, FullFS);
 
+  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+  // variants of MUBUF instructions.
+  if (!hasAddr64() && !FS.contains("flat-for-global")) {
+    FlatForGlobal = true;
+  }
+
   // FIXME: I don't think think Evergreen has any useful support for
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
@@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
 }
 
-unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
-  unsigned ImplicitBytes = getImplicitArgNumBytes();
+unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
+					    unsigned ExplicitArgBytes) const {
+  unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
   if (ImplicitBytes == 0)
     return ExplicitArgBytes;
 
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 51ba501bddd..0e3cb7dc1f8 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -311,22 +311,31 @@ public:
     return EnableXNACK;
   }
 
-  bool isAmdCodeObjectV2() const {
-    return isAmdHsaOS() || isMesa3DOS();
+  bool isMesaKernel(const MachineFunction &MF) const {
+    return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
+  }
+
+  // Covers VS/PS/CS graphics shaders
+  bool isMesaGfxShader(const MachineFunction &MF) const {
+    return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
+  }
+
+  bool isAmdCodeObjectV2(const MachineFunction &MF) const {
+    return isAmdHsaOS() || isMesaKernel(MF);
   }
 
   /// \brief Returns the offset in bytes from the start of the input buffer
   ///        of the first explicit kernel argument.
-  unsigned getExplicitKernelArgOffset() const {
-    return isAmdCodeObjectV2() ? 0 : 36;
+  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+    return isAmdCodeObjectV2(MF) ? 0 : 36;
   }
 
   unsigned getAlignmentForImplicitArgPtr() const {
     return isAmdHsaOS() ? 8 : 4;
   }
 
-  unsigned getImplicitArgNumBytes() const {
-    if (isMesa3DOS())
+  unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
+    if (isMesaKernel(MF))
       return 16;
     if (isAmdHsaOS() && isOpenCLEnv())
       return 32;
@@ -585,7 +594,7 @@ public:
     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
   }
 
-  unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
+  unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
 
   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index de7ce5cb9e4..77fee4356b6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
     llvm_unreachable("Unsupported private trunc store");
   }
 
-  SDValue Chain = Store->getChain();
+  SDValue OldChain = Store->getChain();
+  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
+  // Skip dummy
+  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
   SDValue BasePtr = Store->getBasePtr();
   SDValue Offset = Store->getOffset();
   EVT MemVT = Store->getMemoryVT();
@@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
 
   // Store dword
   // TODO: Can we be smarter about MachinePointerInfo?
-  return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+
+  // If we are part of expanded vector, make our neighbors depend on this store
+  if (VectorTrunc) {
+    // Make all other vector elements depend on this store
+    Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
+    DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
+  }
+  return NewStore;
 }
 
 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   // Neither LOCAL nor PRIVATE can do vectors at the moment
   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
       VT.isVector()) {
+    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+      // Add an extra level of chain to isolate this vector
+      SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
+      // TODO: can the chain be replaced without creating a new store?
+      SDValue NewStore = DAG.getTruncStore(
+          NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
+          MemVT, StoreNode->getAlignment(),
+          StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
+      StoreNode = cast<StoreSDNode>(NewStore);
+    }
+
     return scalarizeVectorStore(StoreNode, DAG);
   }
 
@@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
       // Put the mask in correct place
       SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
 
-      // Put the mask in correct place
+      // Put the value bits in correct place
       SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
       SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
 
@@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
 
     unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
     unsigned PartOffset = VA.getLocMemOffset();
-    unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
+    unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
 
     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
     SDValue Arg = DAG.getLoad(
diff --git a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
index 19795bdde64..9210e66b0fe 100644
--- a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
 
 def MOV : R600_1OP <0x19, "MOV", []>;
 
+
+// This is a hack to get rid of DUMMY_CHAIN nodes.
+// Most DUMMY_CHAINs should be eliminated during legalization, but undef
+// values can sneak in some to selection.
+let isPseudo = 1, isCodeGenOnly = 1 in {
+def DUMMY_CHAIN : AMDGPUInst <
+  (outs),
+  (ins),
+  "DUMMY_CHAIN",
+  [(R600dummy_chain)]
+>;
+} // end let isPseudo = 1, isCodeGenOnly = 1
+
+
 let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
 
 class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d0a69eafc58..0b571551588 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
 
 
   unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
-  if (ST.isAmdCodeObjectV2()) {
+  if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
     PreloadedPrivateBufferReg = TRI->getPreloadedValue(
       MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
   }
@@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
-    assert(ST.isAmdCodeObjectV2());
+    assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
     MRI.addLiveIn(PreloadedPrivateBufferReg);
     MBB.addLiveIn(PreloadedPrivateBufferReg);
   }
@@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
 
   bool CopyBuffer = ResourceRegUsed &&
     PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
+    ST.isAmdCodeObjectV2(MF) &&
     ScratchRsrcReg != PreloadedPrivateBufferReg;
 
   // This needs to be careful of the copying order to avoid overwriting one of
@@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
   }
 
-  if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
-    assert(!ST.isAmdCodeObjectV2());
+  if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
+    assert(!ST.isAmdCodeObjectV2(MF));
     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
 
-    unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
-    unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
     unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
     unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
 
     // Use relocations to get the pointer, and setup the other bits manually.
     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
-    BuildMI(MBB, I, DL, SMovB32, Rsrc0)
-      .addExternalSymbol("SCRATCH_RSRC_DWORD0")
-      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
 
-    BuildMI(MBB, I, DL, SMovB32, Rsrc1)
-      .addExternalSymbol("SCRATCH_RSRC_DWORD1")
-      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+    if (MFI->hasPrivateMemoryInputPtr()) {
+      unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+
+      if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
+        const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
+
+        BuildMI(MBB, I, DL, Mov64, Rsrc01)
+          .addReg(PreloadedPrivateBufferReg)
+          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+      } else {
+        const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
+
+        PointerType *PtrTy =
+          PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+                           AMDGPUAS::CONSTANT_ADDRESS);
+        MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+        auto MMO = MF.getMachineMemOperand(PtrInfo,
+                                           MachineMemOperand::MOLoad |
+                                           MachineMemOperand::MOInvariant |
+                                           MachineMemOperand::MODereferenceable,
+                                           0, 0);
+        BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
+          .addReg(PreloadedPrivateBufferReg)
+          .addImm(0) // offset
+          .addImm(0) // glc
+          .addMemOperand(MMO)
+          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+      }
+    } else {
+      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+
+      BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+        .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+      BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+        .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+    }
 
     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
       .addImm(Rsrc23 & 0xffffffff)
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9140fe6cd14..b98f9f400ee 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments(
   if (!AMDGPU::isShader(CallConv)) {
     assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
   } else {
-    assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
+    assert(!Info->hasDispatchPtr() &&
            !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
            !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
            !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
@@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments(
            !Info->hasWorkItemIDZ());
   }
 
+  if (Info->hasPrivateMemoryInputPtr()) {
+    unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
+    MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+    CCInfo.AllocateReg(PrivateMemoryPtrReg);
+  }
+
   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
   if (Info->hasPrivateSegmentBuffer()) {
     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
@@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments(
     if (VA.isMemLoc()) {
       VT = Ins[i].VT;
       EVT MemVT = VA.getLocVT();
-      const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+      const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
                               VA.getLocMemOffset();
       // The first 36 bytes of the input buffer contains information about
       // thread group and global sizes.
@@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments(
   if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
     HasStackObjects = true;
 
-  if (ST.isAmdCodeObjectV2()) {
+  if (ST.isAmdCodeObjectV2(MF)) {
     if (HasStackObjects) {
       // If we have stack objects, we unquestionably need the private buffer
       // resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   // TODO: Should this propagate fast-math-flags?
 
   switch (IntrinsicID) {
+  case Intrinsic::amdgcn_implicit_buffer_ptr: {
+    unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+  }
   case Intrinsic::amdgcn_dispatch_ptr:
   case Intrinsic::amdgcn_queue_ptr: {
-    if (!Subtarget->isAmdCodeObjectV2()) {
+    if (!Subtarget->isAmdCodeObjectV2(MF)) {
       DiagnosticInfoUnsupported BadIntrin(
           *MF.getFunction(), "unsupported hsa intrinsic without hsa target",
           DL.getDebugLoc());
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
index b86c0419118..38e31e75ee6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -997,6 +997,11 @@ def : Pat <
 >;
 
 def : Pat <
+  (i1 (trunc i16:$a)),
+  (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
+>;
+
+def : Pat <
   (i1 (trunc i64:$a)),
   (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
                     (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e911817c451..ecd46b95ca6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     PrivateSegmentWaveByteOffset(false),
     WorkItemIDX(false),
     WorkItemIDY(false),
-    WorkItemIDZ(false) {
+    WorkItemIDZ(false),
+    PrivateMemoryInputPtr(false) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   const Function *F = MF.getFunction();
 
@@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   if (HasStackObjects || MaySpill)
     PrivateSegmentWaveByteOffset = true;
 
-  if (ST.isAmdCodeObjectV2()) {
+  if (ST.isAmdCodeObjectV2(MF)) {
     if (HasStackObjects || MaySpill)
       PrivateSegmentBuffer = true;
 
@@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
 
     if (F->hasFnAttribute("amdgpu-dispatch-id"))
       DispatchID = true;
+  } else if (ST.isMesaGfxShader(MF)) {
+    if (HasStackObjects || MaySpill)
+      PrivateMemoryInputPtr = true;
   }
 
   // We don't need to worry about accessing spills with flat instructions.
@@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
   return FlatScratchInitUserSGPR;
 }
 
+unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
+  PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+  NumUserSGPRs += 2;
+  return PrivateMemoryPtrUserSGPR;
+}
+
 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
                                                        MachineFunction *MF,
                                                        unsigned FrameIndex,
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 3b4e233cd78..6fc8d18bceb 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   unsigned ScratchRSrcReg;
   unsigned ScratchWaveOffsetReg;
 
+  // Input registers for non-HSA ABI
+  unsigned PrivateMemoryPtrUserSGPR;
+
   // Input registers setup for the HSA ABI.
   // User SGPRs in allocation order.
   unsigned PrivateSegmentBufferUserSGPR;
@@ -163,6 +166,11 @@ private:
   bool WorkItemIDY : 1;
   bool WorkItemIDZ : 1;
 
+  // Private memory buffer
+  // Compute directly in sgpr[0:1]
+  // Other shaders indirect 64-bits at sgpr[0:1]
+  bool PrivateMemoryInputPtr : 1;
+
   MCPhysReg getNextUserSGPR() const {
     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
     return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -198,6 +206,7 @@ public:
   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
   unsigned addDispatchID(const SIRegisterInfo &TRI);
   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
+  unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
 
   // Add system SGPRs.
   unsigned addWorkGroupIDX() {
@@ -302,6 +311,10 @@ public:
     return WorkItemIDZ;
   }
 
+  bool hasPrivateMemoryInputPtr() const {
+    return PrivateMemoryInputPtr;
+  }
+
   unsigned getNumUserSGPRs() const {
     return NumUserSGPRs;
   }
@@ -338,6 +351,10 @@ public:
     return QueuePtrUserSGPR;
   }
 
+  unsigned getPrivateMemoryPtrUserSGPR() const {
+    return PrivateMemoryPtrUserSGPR;
+  }
+
   bool hasSpilledSGPRs() const {
     return HasSpilledSGPRs;
   }
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8c4b24a4504..a1ed5e8441d 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
   case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
     return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
   case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
-    assert(ST.isAmdCodeObjectV2() &&
-           "Non-CodeObjectV2 ABI currently uses relocations");
-    assert(MFI->hasPrivateSegmentBuffer());
-    return MFI->PrivateSegmentBufferUserSGPR;
+    if (ST.isAmdCodeObjectV2(MF)) {
+      assert(MFI->hasPrivateSegmentBuffer());
+      return MFI->PrivateSegmentBufferUserSGPR;
+    }
+    assert(MFI->hasPrivateMemoryInputPtr());
+    return MFI->PrivateMemoryPtrUserSGPR;
   case SIRegisterInfo::KERNARG_SEGMENT_PTR:
     assert(MFI->hasKernargSegmentPtr());
     return MFI->KernargSegmentPtrUserSGPR;
diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a15b9ceff2f..8cae83cd9d1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -607,12 +607,6 @@ def : Pat<
   (COPY $src)
 >;
 
-def : Pat<
-  (i1 (trunc i16:$src)),
-  (COPY $src)
->;
-
-
 def : Pat <
   (i16 (trunc i64:$src)),
   (EXTRACT_SUBREG $src, sub0)
diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5efa64d25ce..c2a4d4ba99b 100644
--- a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
 }
 
 class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+  // v_div_scale_{f32|f64} do not support input modifiers.
+  let HasModifiers = 0;
   let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-  let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
+  let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
 }
 
 def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
 def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
   let SchedRW = [WriteFloatFMA, WriteSALU];
   let hasExtraSrcRegAllocReq = 1;
+  let AsmMatchConverter = "";
 }
 
 // Double precision division pre-scale.
 def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
   let SchedRW = [WriteDouble, WriteSALU];
   let hasExtraSrcRegAllocReq = 1;
+  let AsmMatchConverter = "";
 }
 
 def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
diff --git a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 8ec9cb02813..95db35ce8ff 100644
--- a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit the rest of the function body.
   EmitFunctionBody();
 
+  // Emit the XRay table for this function.
+  emitXRayTable();
+
   // If we need V4T thumb mode Register Indirect Jump pads, emit them.
   // These are created per function, rather than per TU, since it's
   // relatively easy to exceed the thumb branch range within a TU.
diff --git a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
index 9c278a52a7f..7a7b7fede7c 100644
--- a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -26,8 +26,8 @@ def CC_ARM_APCS : CallingConv<[
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
 
-  // A SwiftError is passed in R6.
-  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+  // A SwiftError is passed in R8.
+  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   // Handle all vector types as either f64 or v2f64.
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -51,8 +51,8 @@ def RetCC_ARM_APCS : CallingConv<[
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
 
-  // A SwiftError is returned in R6.
-  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+  // A SwiftError is returned in R8.
+  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   // Handle all vector types as either f64 or v2f64.
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -166,8 +166,8 @@ def CC_ARM_AAPCS : CallingConv<[
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
 
-  // A SwiftError is passed in R6.
-  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+  // A SwiftError is passed in R8.
+  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
   CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -182,8 +182,8 @@ def RetCC_ARM_AAPCS : CallingConv<[
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
 
-  // A SwiftError is returned in R6.
-  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+  // A SwiftError is returned in R8.
+  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
   CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -206,8 +206,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
 
-  // A SwiftError is passed in R6.
-  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+  // A SwiftError is passed in R8.
+  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   // HFAs are passed in a contiguous block of registers, or on the stack
   CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
@@ -227,8 +227,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
   // Pass SwiftSelf in a callee saved register.
   CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
 
-  // A SwiftError is returned in R6.
-  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+  // A SwiftError is returned in R8.
+  CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
 
   CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -267,8 +267,8 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
 // Also save R7-R4 first to match the stack frame fixed spill areas.
 def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
 
-// R6 is used to pass swifterror, remove it from CSR.
-def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>;
+// R8 is used to pass swifterror, remove it from CSR.
+def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
 
 def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
                                          (sub CSR_AAPCS_ThisReturn, R9))>;
diff --git a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 95fcc8dcb45..baa4e0330cf 100644
--- a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1225,16 +1225,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     }
     case ARM::tTPsoft:
     case ARM::TPsoft: {
+      const bool Thumb = Opcode == ARM::tTPsoft;
+
       MachineInstrBuilder MIB;
-      if (Opcode == ARM::tTPsoft)
+      if (STI->genLongCalls()) {
+        MachineFunction *MF = MBB.getParent();
+        MachineConstantPool *MCP = MF->getConstantPool();
+        unsigned PCLabelID = AFI->createPICLabelUId();
+        MachineConstantPoolValue *CPV =
+            ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(),
+                                          "__aeabi_read_tp", PCLabelID, 0);
+        unsigned Reg = MI.getOperand(0).getReg();
         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                      TII->get( ARM::tBL))
-              .addImm((unsigned)ARMCC::AL).addReg(0)
-              .addExternalSymbol("__aeabi_read_tp", 0);
-      else
+                      TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
+                  .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+        if (!Thumb)
+          MIB.addImm(0);
+        MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+
         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                      TII->get( ARM::BL))
-              .addExternalSymbol("__aeabi_read_tp", 0);
+                      TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
+        if (Thumb)
+          MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+        MIB.addReg(Reg, RegState::Kill);
+      } else {
+        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                      TII->get(Thumb ? ARM::tBL : ARM::BL));
+        if (Thumb)
+          MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+        MIB.addExternalSymbol("__aeabi_read_tp", 0);
+      }
 
       MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       TransferImpOps(MI, MIB, MIB);
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 32b7c87e61b..0f84a235916 100644
--- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -97,171 +97,6 @@ namespace {
   };
 }
 
-void ARMTargetLowering::InitLibcallCallingConvs() {
-  // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or
-  // AAPCS_VFP.
-  for (const auto LC : {
-           RTLIB::SHL_I16,
-           RTLIB::SHL_I32,
-           RTLIB::SHL_I64,
-           RTLIB::SHL_I128,
-           RTLIB::SRL_I16,
-           RTLIB::SRL_I32,
-           RTLIB::SRL_I64,
-           RTLIB::SRL_I128,
-           RTLIB::SRA_I16,
-           RTLIB::SRA_I32,
-           RTLIB::SRA_I64,
-           RTLIB::SRA_I128,
-           RTLIB::MUL_I8,
-           RTLIB::MUL_I16,
-           RTLIB::MUL_I32,
-           RTLIB::MUL_I64,
-           RTLIB::MUL_I128,
-           RTLIB::MULO_I32,
-           RTLIB::MULO_I64,
-           RTLIB::MULO_I128,
-           RTLIB::SDIV_I8,
-           RTLIB::SDIV_I16,
-           RTLIB::SDIV_I32,
-           RTLIB::SDIV_I64,
-           RTLIB::SDIV_I128,
-           RTLIB::UDIV_I8,
-           RTLIB::UDIV_I16,
-           RTLIB::UDIV_I32,
-           RTLIB::UDIV_I64,
-           RTLIB::UDIV_I128,
-           RTLIB::SREM_I8,
-           RTLIB::SREM_I16,
-           RTLIB::SREM_I32,
-           RTLIB::SREM_I64,
-           RTLIB::SREM_I128,
-           RTLIB::UREM_I8,
-           RTLIB::UREM_I16,
-           RTLIB::UREM_I32,
-           RTLIB::UREM_I64,
-           RTLIB::UREM_I128,
-           RTLIB::SDIVREM_I8,
-           RTLIB::SDIVREM_I16,
-           RTLIB::SDIVREM_I32,
-           RTLIB::SDIVREM_I64,
-           RTLIB::SDIVREM_I128,
-           RTLIB::UDIVREM_I8,
-           RTLIB::UDIVREM_I16,
-           RTLIB::UDIVREM_I32,
-           RTLIB::UDIVREM_I64,
-           RTLIB::UDIVREM_I128,
-           RTLIB::NEG_I32,
-           RTLIB::NEG_I64,
-           RTLIB::ADD_F32,
-           RTLIB::ADD_F64,
-           RTLIB::ADD_F80,
-           RTLIB::ADD_F128,
-           RTLIB::SUB_F32,
-           RTLIB::SUB_F64,
-           RTLIB::SUB_F80,
-           RTLIB::SUB_F128,
-           RTLIB::MUL_F32,
-           RTLIB::MUL_F64,
-           RTLIB::MUL_F80,
-           RTLIB::MUL_F128,
-           RTLIB::DIV_F32,
-           RTLIB::DIV_F64,
-           RTLIB::DIV_F80,
-           RTLIB::DIV_F128,
-           RTLIB::POWI_F32,
-           RTLIB::POWI_F64,
-           RTLIB::POWI_F80,
-           RTLIB::POWI_F128,
-           RTLIB::FPEXT_F64_F128,
-           RTLIB::FPEXT_F32_F128,
-           RTLIB::FPEXT_F32_F64,
-           RTLIB::FPEXT_F16_F32,
-           RTLIB::FPROUND_F32_F16,
-           RTLIB::FPROUND_F64_F16,
-           RTLIB::FPROUND_F80_F16,
-           RTLIB::FPROUND_F128_F16,
-           RTLIB::FPROUND_F64_F32,
-           RTLIB::FPROUND_F80_F32,
-           RTLIB::FPROUND_F128_F32,
-           RTLIB::FPROUND_F80_F64,
-           RTLIB::FPROUND_F128_F64,
-           RTLIB::FPTOSINT_F32_I32,
-           RTLIB::FPTOSINT_F32_I64,
-           RTLIB::FPTOSINT_F32_I128,
-           RTLIB::FPTOSINT_F64_I32,
-           RTLIB::FPTOSINT_F64_I64,
-           RTLIB::FPTOSINT_F64_I128,
-           RTLIB::FPTOSINT_F80_I32,
-           RTLIB::FPTOSINT_F80_I64,
-           RTLIB::FPTOSINT_F80_I128,
-           RTLIB::FPTOSINT_F128_I32,
-           RTLIB::FPTOSINT_F128_I64,
-           RTLIB::FPTOSINT_F128_I128,
-           RTLIB::FPTOUINT_F32_I32,
-           RTLIB::FPTOUINT_F32_I64,
-           RTLIB::FPTOUINT_F32_I128,
-           RTLIB::FPTOUINT_F64_I32,
-           RTLIB::FPTOUINT_F64_I64,
-           RTLIB::FPTOUINT_F64_I128,
-           RTLIB::FPTOUINT_F80_I32,
-           RTLIB::FPTOUINT_F80_I64,
-           RTLIB::FPTOUINT_F80_I128,
-           RTLIB::FPTOUINT_F128_I32,
-           RTLIB::FPTOUINT_F128_I64,
-           RTLIB::FPTOUINT_F128_I128,
-           RTLIB::SINTTOFP_I32_F32,
-           RTLIB::SINTTOFP_I32_F64,
-           RTLIB::SINTTOFP_I32_F80,
-           RTLIB::SINTTOFP_I32_F128,
-           RTLIB::SINTTOFP_I64_F32,
-           RTLIB::SINTTOFP_I64_F64,
-           RTLIB::SINTTOFP_I64_F80,
-           RTLIB::SINTTOFP_I64_F128,
-           RTLIB::SINTTOFP_I128_F32,
-           RTLIB::SINTTOFP_I128_F64,
-           RTLIB::SINTTOFP_I128_F80,
-           RTLIB::SINTTOFP_I128_F128,
-           RTLIB::UINTTOFP_I32_F32,
-           RTLIB::UINTTOFP_I32_F64,
-           RTLIB::UINTTOFP_I32_F80,
-           RTLIB::UINTTOFP_I32_F128,
-           RTLIB::UINTTOFP_I64_F32,
-           RTLIB::UINTTOFP_I64_F64,
-           RTLIB::UINTTOFP_I64_F80,
-           RTLIB::UINTTOFP_I64_F128,
-           RTLIB::UINTTOFP_I128_F32,
-           RTLIB::UINTTOFP_I128_F64,
-           RTLIB::UINTTOFP_I128_F80,
-           RTLIB::UINTTOFP_I128_F128,
-           RTLIB::OEQ_F32,
-           RTLIB::OEQ_F64,
-           RTLIB::OEQ_F128,
-           RTLIB::UNE_F32,
-           RTLIB::UNE_F64,
-           RTLIB::UNE_F128,
-           RTLIB::OGE_F32,
-           RTLIB::OGE_F64,
-           RTLIB::OGE_F128,
-           RTLIB::OLT_F32,
-           RTLIB::OLT_F64,
-           RTLIB::OLT_F128,
-           RTLIB::OLE_F32,
-           RTLIB::OLE_F64,
-           RTLIB::OLE_F128,
-           RTLIB::OGT_F32,
-           RTLIB::OGT_F64,
-           RTLIB::OGT_F128,
-           RTLIB::UO_F32,
-           RTLIB::UO_F64,
-           RTLIB::UO_F128,
-           RTLIB::O_F32,
-           RTLIB::O_F64,
-           RTLIB::O_F128,
-       })
-  setLibcallCallingConv(LC, CallingConv::ARM_AAPCS);
-}
-
 // The APCS parameter registers.
 static const MCPhysReg GPRArgRegs[] = {
   ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -349,7 +184,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
-  InitLibcallCallingConvs();
+  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
+      !Subtarget->isTargetWatchOS()) {
+    const auto &E = Subtarget->getTargetTriple().getEnvironment();
+
+    bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
+                      E == Triple::MuslEABIHF;
+    // Windows is a special case.  Technically, we will replace all of the "GNU"
+    // calls with calls to MSVCRT if appropriate and adjust the calling
+    // convention then.
+    IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
+
+    for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
+      setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
+                            IsHFTarget ? CallingConv::ARM_AAPCS_VFP
+                                       : CallingConv::ARM_AAPCS);
+  }
 
   if (Subtarget->isTargetMachO()) {
     // Uses VFP for Thumb libfuncs if available.
@@ -1937,7 +1787,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                          StackPtr, MemOpChains, Flags);
       }
     } else if (VA.isRegLoc()) {
-      if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+      if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
+          Outs[0].VT == MVT::i32) {
         assert(VA.getLocVT() == MVT::i32 &&
                "unexpected calling convention register assignment");
         assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
@@ -3176,17 +3027,20 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
 }
 
+static bool isReadOnly(const GlobalValue *GV) {
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+    GV = GA->getBaseObject();
+  return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
+         isa<Function>(GV);
+}
+
 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
                                                  SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   const TargetMachine &TM = getTargetMachine();
-  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-    GV = GA->getBaseObject();
-  bool IsRO =
-      (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
-      isa<Function>(GV);
+  bool IsRO = isReadOnly(GV);
 
   // promoteToConstantPool only if not generating XO text section
   if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
@@ -7721,11 +7575,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::MUL:           return LowerMUL(Op, DAG);
   case ISD::SDIV:
-    if (Subtarget->isTargetWindows())
+    if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
       return LowerDIV_Windows(Op, DAG, /* Signed */ true);
     return LowerSDIV(Op, DAG);
   case ISD::UDIV:
-    if (Subtarget->isTargetWindows())
+    if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
       return LowerDIV_Windows(Op, DAG, /* Signed */ false);
     return LowerUDIV(Op, DAG);
   case ISD::ADDC:
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
index 7a7f91f4d3c..84c6eb845bb 100644
--- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -538,8 +538,6 @@ class InstrItineraryData;
 
     bool HasStandaloneRem = true;
 
-    void InitLibcallCallingConvs();
-
     void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
     void addDRTypeForNEON(MVT VT);
     void addQRTypeForNEON(MVT VT);
diff --git a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 97ca11ca501..d054578deb6 100644
--- a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -413,6 +413,7 @@ public:
     Match_RequiresDifferentOperands,
     Match_RequiresNoZeroRegister,
     Match_RequiresSameSrcAndDst,
+    Match_NoFCCRegisterForCurrentISA,
     Match_NonZeroOperandForSync,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "MipsGenAsmMatcher.inc"
@@ -1461,8 +1462,6 @@ public:
   bool isFCCAsmReg() const {
     if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
       return false;
-    if (!AsmParser.hasEightFccRegisters())
-      return RegIdx.Index == 0;
     return RegIdx.Index <= 7;
   }
   bool isACCAsmReg() const {
@@ -4053,6 +4052,7 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
     return Match_RequiresSameSrcAndDst;
   }
 }
+
 unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   switch (Inst.getOpcode()) {
   // As described by the MIPSR6 spec, daui must not use the zero operand for
@@ -4131,9 +4131,15 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
     if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
       return Match_RequiresDifferentOperands;
     return Match_Success;
-  default:
-    return Match_Success;
   }
+
+  uint64_t TSFlags = getInstDesc(Inst.getOpcode()).TSFlags;
+  if ((TSFlags & MipsII::HasFCCRegOperand) &&
+      (Inst.getOperand(0).getReg() != Mips::FCC0) && !hasEightFccRegisters())
+    return Match_NoFCCRegisterForCurrentISA;
+
+  return Match_Success;
+
 }
 
 static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
@@ -4191,6 +4197,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "invalid operand ($zero) for instruction");
   case Match_RequiresSameSrcAndDst:
     return Error(IDLoc, "source and destination must match");
+  case Match_NoFCCRegisterForCurrentISA:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "non-zero fcc register doesn't exist in current ISA level");
   case Match_Immz:
     return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
   case Match_UImm1_0:
diff --git a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 35de7b27bf1..a90db2384c4 100644
--- a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -123,7 +123,9 @@ namespace MipsII {
     HasForbiddenSlot = 1 << 5,
     /// IsPCRelativeLoad - A Load instruction with implicit source register
     ///                    ($pc) with explicit offset and destination register
-    IsPCRelativeLoad = 1 << 6
+    IsPCRelativeLoad = 1 << 6,
+    /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
+    HasFCCRegOperand = 1 << 7
 
   };
 }
diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index fc83761e409..5600f71ff68 100644
--- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -27,9 +27,20 @@ def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
                SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6;
 
 def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
-                  CEQS_FM_MM<0>;
+                  CEQS_FM_MM<0> {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}
+
 def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
-                  CEQS_FM_MM<1>;
+                  CEQS_FM_MM<1> {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}
 
 def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
               BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
@@ -164,6 +175,98 @@ let AdditionalPredicates = [InMicroMips] in {
     def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>,
                   LW_FM_MM<0x26>;
   }
+
+  multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt,
+                      InstrItinClass itin> {
+    def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+                    C_COND_FM_MM<fmt, 0> {
+      let BaseOpcode = "c.f."#NAME;
+      let isCommutable = 1;
+    }
+    def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 1> {
+      let BaseOpcode = "c.un."#NAME;
+      let isCommutable = 1;
+    }
+    def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 2> {
+      let BaseOpcode = "c.eq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 3> {
+      let BaseOpcode = "c.ueq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 4> {
+      let BaseOpcode = "c.olt."#NAME;
+    }
+    def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 5> {
+      let BaseOpcode = "c.ult."#NAME;
+    }
+    def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 6> {
+      let BaseOpcode = "c.ole."#NAME;
+    }
+    def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+                       C_COND_FM_MM<fmt, 7> {
+      let BaseOpcode = "c.ule."#NAME;
+    }
+    def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 8> {
+      let BaseOpcode = "c.sf."#NAME;
+      let isCommutable = 1;
+    }
+    def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+                       C_COND_FM_MM<fmt, 9> {
+      let BaseOpcode = "c.ngle."#NAME;
+    }
+    def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 10> {
+      let BaseOpcode = "c.seq."#NAME;
+      let isCommutable = 1;
+    }
+    def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 11> {
+      let BaseOpcode = "c.ngl."#NAME;
+    }
+    def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 12> {
+      let BaseOpcode = "c.lt."#NAME;
+    }
+    def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 13> {
+      let BaseOpcode = "c.nge."#NAME;
+    }
+    def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+                     C_COND_FM_MM<fmt, 14> {
+      let BaseOpcode = "c.le."#NAME;
+    }
+    def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+                      C_COND_FM_MM<fmt, 15> {
+      let BaseOpcode = "c.ngt."#NAME;
+    }
+  }
+
+  defm S   : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>,
+             ISA_MIPS1_NOT_32R6_64R6;
+  defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  let DecoderNamespace = "Mips64" in
+  defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm S_MM   : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6;
+  defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+                ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm : BC1_ALIASES<BC1T_MM, "bc1t", BC1F_MM, "bc1f">,
+         ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index 8b595f9e6c4..774976828a0 100644
--- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -766,6 +766,7 @@ class SWXC1_FM_MM<bits<9> funct> : MMArch {
 class CEQS_FM_MM<bits<2> fmt> : MMArch {
   bits<5> fs;
   bits<5> ft;
+  bits<3> fcc;
   bits<4> cond;
 
   bits<32> Inst;
@@ -773,13 +774,17 @@ class CEQS_FM_MM<bits<2> fmt> : MMArch {
   let Inst{31-26} = 0x15;
   let Inst{25-21} = ft;
   let Inst{20-16} = fs;
-  let Inst{15-13} = 0x0;  // cc
+  let Inst{15-13} = fcc;
   let Inst{12}    = 0;
   let Inst{11-10} = fmt;
   let Inst{9-6}   = cond;
   let Inst{5-0}   = 0x3c;
 }
 
+class C_COND_FM_MM<bits <2> fmt, bits<4> c> : CEQS_FM_MM<fmt> {
+  let cond = c;
+}
+
 class BC1F_FM_MM<bits<5> tf> : MMArch {
   bits<16> offset;
 
diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 179695bc698..04d6529a073 100644
--- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1037,6 +1037,22 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   // TODO: implement
 }
 
+// Emit .dtprelword or .dtpreldword directive
+// and value for debug thread local expression.
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value,
+                                          unsigned Size) const {
+  switch (Size) {
+  case 4:
+    OutStreamer->EmitDTPRel32Value(Value);
+    break;
+  case 8:
+    OutStreamer->EmitDTPRel64Value(Value);
+    break;
+  default:
+    llvm_unreachable("Unexpected size of expression value.");
+  }
+}
+
 // Align all targets of indirect branches on bundle size.  Used only if target
 // is NaCl.
 void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 259e557e128..c5cf5241c23 100644
--- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -140,6 +140,7 @@ public:
   void EmitStartOfAsmFile(Module &M) override;
   void EmitEndOfAsmFile(Module &M) override;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+  void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
 };
 }
 
diff --git a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
index 29f3e2c07e0..a44192f57aa 100644
--- a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -698,8 +698,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
     unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
     emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
     emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
-    emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg(
-        Mips::FCC0, RegState::ImplicitDefine);
+    emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg)
+                 .addReg(RightReg);
     emitInst(CondMovOpc, ResultReg)
         .addReg(RegWithOne)
         .addReg(Mips::FCC0)
diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
index ab7aa9dcdca..df42d56d041 100644
--- a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -219,6 +219,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
   let isTerminator = 1;
   let hasDelaySlot = DelaySlot;
   let Defs = [AT];
+  let hasFCCRegOperand = 1;
 }
 
 class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
@@ -229,41 +230,106 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
          !strconcat("c.$cond.", typestr)>, HARDFLOAT {
   let Defs = [FCC0];
   let isCodeGenOnly = 1;
+  let hasFCCRegOperand = 1;
 }
 
+
+// Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather
+//       duplicating the instruction definition for MIPS1 - MIPS3, we expand
+//       c.cond.ft if necessary, and reject it after constructing the
+//       instruction if the ISA doesn't support it.
 class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC,
                 InstrItinClass itin>  :
-   InstSE<(outs), (ins RC:$fs, RC:$ft),
-          !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin,
-          FrmFR>, HARDFLOAT;
+   InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft),
+          !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin,
+          FrmFR>, HARDFLOAT {
+  let isCompare = 1;
+  let hasFCCRegOperand = 1;
+}
+
 
 multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
                     InstrItinClass itin> {
-  def C_F_#NAME : C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM<fmt, 0>;
-  def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM<fmt, 1>;
-  def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM<fmt, 2>;
-  def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM<fmt, 3>;
-  def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM<fmt, 4>;
-  def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM<fmt, 5>;
-  def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM<fmt, 6>;
-  def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM<fmt, 7>;
-  def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM<fmt, 8>;
-  def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM<fmt, 9>;
-  def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM<fmt, 10>;
-  def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM<fmt, 11>;
-  def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM<fmt, 12>;
-  def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM<fmt, 13>;
-  def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM<fmt, 14>;
-  def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>;
+  def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+                  C_COND_FM<fmt, 0> {
+    let BaseOpcode = "c.f."#NAME;
+    let isCommutable = 1;
+  }
+  def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 1> {
+    let BaseOpcode = "c.un."#NAME;
+    let isCommutable = 1;
+  }
+  def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 2> {
+    let BaseOpcode = "c.eq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 3> {
+    let BaseOpcode = "c.ueq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 4> {
+    let BaseOpcode = "c.olt."#NAME;
+  }
+  def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 5> {
+    let BaseOpcode = "c.ult."#NAME;
+  }
+  def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 6> {
+    let BaseOpcode = "c.ole."#NAME;
+  }
+  def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+                     C_COND_FM<fmt, 7> {
+    let BaseOpcode = "c.ule."#NAME;
+  }
+  def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 8> {
+    let BaseOpcode = "c.sf."#NAME;
+    let isCommutable = 1;
+  }
+  def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+                     C_COND_FM<fmt, 9> {
+    let BaseOpcode = "c.ngle."#NAME;
+  }
+  def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 10> {
+    let BaseOpcode = "c.seq."#NAME;
+    let isCommutable = 1;
+  }
+  def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 11> {
+    let BaseOpcode = "c.ngl."#NAME;
+  }
+  def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 12> {
+    let BaseOpcode = "c.lt."#NAME;
+  }
+  def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 13> {
+    let BaseOpcode = "c.nge."#NAME;
+  }
+  def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+                   C_COND_FM<fmt, 14> {
+    let BaseOpcode = "c.le."#NAME;
+  }
+  def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+                    C_COND_FM<fmt, 15> {
+    let BaseOpcode = "c.ngt."#NAME;
+  }
 }
 
+let AdditionalPredicates = [NotInMicroMips] in {
 defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
 defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
            FGR_32;
 let DecoderNamespace = "Mips64" in
 defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
            FGR_64;
-
+}
 //===----------------------------------------------------------------------===//
 // Floating Point Instructions
 //===----------------------------------------------------------------------===//
@@ -549,13 +615,29 @@ def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
 /// Floating Point Compare
 let AdditionalPredicates = [NotInMicroMips] in {
   def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
-                 ISA_MIPS1_NOT_32R6_64R6;
+                 ISA_MIPS1_NOT_32R6_64R6 {
+
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+  }
   def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-                 ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+                 ISA_MIPS1_NOT_32R6_64R6, FGR_32 {
+  // FIXME: This is a required to work around the fact that these instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+  }
 }
 let DecoderNamespace = "Mips64" in
 def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
-               ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+               ISA_MIPS1_NOT_32R6_64R6, FGR_64 {
+  // FIXME: This is a required to work around the fact that thiese instructions
+  //        only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+  //        fcc register set is used directly.
+  bits<3> fcc = 0;
+}
 
 //===----------------------------------------------------------------------===//
 // Floating Point Pseudo-Instructions
@@ -602,15 +684,6 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd),
 //===----------------------------------------------------------------------===//
 // InstAliases.
 //===----------------------------------------------------------------------===//
-def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
-      ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
-      ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-
 def : MipsInstAlias
         <"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
       ISA_MIPS2, HARDFLOAT;
@@ -630,6 +703,80 @@ def : MipsInstAlias
 def : MipsInstAlias
         <"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
       FGR_64, ISA_MIPS2, HARDFLOAT;
+
+multiclass C_COND_ALIASES<string TypeStr, RegisterOperand RC> {
+  def : MipsInstAlias<!strconcat("c.f.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_F_"#NAME) FCC0,
+                                                       RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.un.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_UN_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.eq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_EQ_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ueq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_UEQ_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.olt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_OLT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ult.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_ULT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ole.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_OLE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ule.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_ULE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.sf.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_SF_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngle.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGLE_"#NAME) FCC0,
+                                                          RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.seq.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_SEQ_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngl.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGL_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.lt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_LT_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.nge.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGE_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.le.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_LE_"#NAME) FCC0,
+                                                        RC:$fs, RC:$ft), 1>;
+  def : MipsInstAlias<!strconcat("c.ngt.", TypeStr, " $fs, $ft"),
+                      (!cast<Instruction>("C_NGT_"#NAME) FCC0,
+                                                         RC:$fs, RC:$ft), 1>;
+}
+
+multiclass BC1_ALIASES<Instruction BCTrue, string BCTrueString,
+                       Instruction BCFalse, string BCFalseString> {
+  def : MipsInstAlias<!strconcat(BCTrueString, " $offset"),
+                                (BCTrue FCC0, brtarget:$offset), 1>;
+
+  def : MipsInstAlias<!strconcat(BCFalseString, " $offset"),
+                                (BCFalse FCC0, brtarget:$offset), 1>;
+}
+
+let AdditionalPredicates = [NotInMicroMips] in {
+  defm S   : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6;
+  defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+  defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+             ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+  defm : BC1_ALIASES<BC1T, "bc1t", BC1F, "bc1f">, ISA_MIPS1_NOT_32R6_64R6,
+         HARDFLOAT;
+  defm : BC1_ALIASES<BC1TL, "bc1tl", BC1FL, "bc1fl">, ISA_MIPS2_NOT_32R6_64R6,
+         HARDFLOAT;
+}
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
 //===----------------------------------------------------------------------===//
diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
index 1437fb75434..817d9b44b9c 100644
--- a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -101,12 +101,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
   bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
                             // ($pc) and with explicit offset and destination
                             // register
+  bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is
+                            // present in MIPS-I to MIPS-III.
 
-  // TSFlags layout should be kept in sync with MipsInstrInfo.h.
+  // TSFlags layout should be kept in sync with MCTargetDesc/MipsBaseInfo.h.
   let TSFlags{3-0}   = FormBits;
   let TSFlags{4}     = isCTI;
   let TSFlags{5}     = hasForbiddenSlot;
   let TSFlags{6}     = IsPCRelativeLoad;
+  let TSFlags{7}     = hasFCCRegOperand;
 
   let DecoderNamespace = "Mips";
 
@@ -829,6 +832,7 @@ class BC1F_FM<bit nd, bit tf> : StdArch {
 class CEQS_FM<bits<5> fmt> : StdArch {
   bits<5> fs;
   bits<5> ft;
+  bits<3> fcc;
   bits<4> cond;
 
   bits<32> Inst;
@@ -837,7 +841,7 @@ class CEQS_FM<bits<5> fmt> : StdArch {
   let Inst{25-21} = fmt;
   let Inst{20-16} = ft;
   let Inst{15-11} = fs;
-  let Inst{10-8} = 0; // cc
+  let Inst{10-8} = fcc;
   let Inst{7-4} = 0x3;
   let Inst{3-0} = cond;
 }
diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index fadab780612..c5d6a05d661 100644
--- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -148,3 +148,11 @@ MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL,
   // Otherwise, we work the same as ELF.
   return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
 }
+
+const MCExpr *
+MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+  const MCExpr *Expr =
+      MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+  return MCBinaryExpr::createAdd(
+      Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+}
diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index e5423f9578a..a37ec154ff7 100644
--- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -42,6 +42,8 @@ class MipsTargetMachine;
     MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                      const Constant *C,
                                      unsigned &Align) const override;
+    /// Describe a TLS variable address within debug info.
+    const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
   };
 } // end namespace llvm
 
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 90111bbea07..f615cc7cc97 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1508,8 +1508,14 @@ def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
                       PPC970_DGroup_Single;
 } // hasSideEffects = 0
 
+def ICBLC  : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>;
+def ICBLQ  : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
 def ICBT  : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
                        "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src),
+                       "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
 
 def : Pat<(int_ppc_dcbt xoaddr:$dst),
           (DCBT 0, xoaddr:$dst)>;
@@ -2381,6 +2387,13 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
 def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
                      "mftb $RT, $SPR", IIC_SprMFTB>;
 
+def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR),
+                     "mfpmr $RT, $SPR", IIC_SprMFPMR>;
+
+def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
+                     "mtpmr $SPR, $RT", IIC_SprMTPMR>;
+
+
 // A pseudo-instruction used to implement the read of the 64-bit cycle counter
 // on a 32-bit target.
 let hasSideEffects = 1, usesCustomInserter = 1 in
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
index edabe774867..d240529bc73 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -118,6 +118,8 @@ def IIC_SprTLBIE     : InstrItinClass;
 def IIC_SprABORT     : InstrItinClass;
 def IIC_SprMSGSYNC   : InstrItinClass;
 def IIC_SprSTOP      : InstrItinClass;
+def IIC_SprMFPMR     : InstrItinClass;
+def IIC_SprMTPMR     : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index f687d326b52..15d5991b938 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -249,6 +249,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                   InstrStage<5, [E500_SFX0]>],
                                  [8, 1],
                                  [E500_GPR_Bypass, E500_CR_Bypass]>,
+  InstrItinData<IIC_SprMFPMR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<4, [E500_SFX0]>],
+                                 [7, 1], // Latency = 4, Repeat rate = 4
+                                 [E500_GPR_Bypass, E500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFMSR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                   InstrStage<4, [E500_SFX0]>],
                                  [7, 1], // Latency = 4, Repeat rate = 4
@@ -257,6 +261,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
                                   InstrStage<1, [E500_SFX0, E500_SFX1]>],
                                  [4, 1], // Latency = 1, Repeat rate = 1
                                  [E500_GPR_Bypass, E500_CR_Bypass]>,
+  InstrItinData<IIC_SprMTPMR,    [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+                                  InstrStage<1, [E500_SFX0]>],
+                                 [4, 1], // Latency = 1, Repeat rate = 1
+                                 [E500_CR_Bypass, E500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFTB,     [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
                                   InstrStage<4, [E500_SFX0]>],
                                  [7, 1], // Latency = 4, Repeat rate = 4
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 5db886cf8f9..32f8e652dd5 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -313,20 +313,24 @@ def PPCE5500Itineraries : ProcessorItineraries<
                                   InstrStage<5, [E5500_CFX_0]>],
                                  [9, 2], // Latency = 5, Repeat rate = 5
                                  [E5500_GPR_Bypass, E5500_CR_Bypass]>,
-  InstrItinData<IIC_SprMFMSR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
-                                  InstrStage<4, [E5500_SFX0]>],
+  InstrItinData<IIC_SprMFPMR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<4, [E5500_CFX_0]>],
                                  [8, 2], // Latency = 4, Repeat rate = 4
                                  [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFSPR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<1, [E5500_CFX_0]>],
                                  [5], // Latency = 1, Repeat rate = 1
                                  [E5500_GPR_Bypass]>,
+  InstrItinData<IIC_SprMTPMR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+                                  InstrStage<1, [E5500_CFX_0]>],
+                                 [5], // Latency = 1, Repeat rate = 1
+                                 [E5500_GPR_Bypass]>,
   InstrItinData<IIC_SprMFTB,     [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
                                   InstrStage<4, [E5500_CFX_0]>],
                                  [8, 2], // Latency = 4, Repeat rate = 4
                                  [NoBypass, E5500_GPR_Bypass]>,
   InstrItinData<IIC_SprMTSPR,    [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
-                                  InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+                                  InstrStage<1, [E5500_CFX_0]>],
                                  [5], // Latency = 1, Repeat rate = 1
                                  [E5500_GPR_Bypass]>,
   InstrItinData<IIC_FPGeneral,   [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
diff --git a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 192b942490e..985acf92a2d 100644
--- a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -77,11 +77,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
   default:
     return false;
   case X86::TCRETURNdi:
-  case X86::TCRETURNdicc:
   case X86::TCRETURNri:
   case X86::TCRETURNmi:
   case X86::TCRETURNdi64:
-  case X86::TCRETURNdi64cc:
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64: {
     bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
@@ -99,10 +97,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     Offset = StackAdj - MaxTCDelta;
     assert(Offset >= 0 && "Offset should never be negative");
 
-    if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
-      assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
-    }
-
     if (Offset) {
       // Check for possible merge with preceding ADD instruction.
       Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
@@ -111,21 +105,12 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
 
     // Jump to label or value in register.
     bool IsWin64 = STI->isTargetWin64();
-    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
-        Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
+    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
       unsigned Op;
       switch (Opcode) {
       case X86::TCRETURNdi:
         Op = X86::TAILJMPd;
         break;
-      case X86::TCRETURNdicc:
-        Op = X86::TAILJMPd_CC;
-        break;
-      case X86::TCRETURNdi64cc:
-        assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder.");
-        // TODO: We could do it for Win64 "leaf" functions though; PR30337.
-        Op = X86::TAILJMPd64_CC;
-        break;
       default:
         // Note: Win64 uses REX prefixes indirect jumps out of functions, but
         // not direct ones.
@@ -141,10 +126,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                               JumpTarget.getTargetFlags());
       }
-      if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
-        MIB.addImm(MBBI->getOperand(2).getImm());
-      }
-
     } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
       unsigned Op = (Opcode == X86::TCRETURNmi)
                         ? X86::TAILJMPm
diff --git a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f13b722eb3..08fe2bad281 100644
--- a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28788,10 +28788,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// If a vector select has an operand that is -1 or 0, simplify the select to a
-/// bitwise logic operation.
-static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
-                                                const X86Subtarget &Subtarget) {
+/// If a vector select has an operand that is -1 or 0, try to simplify the
+/// select to a bitwise logic operation.
+static SDValue
+combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget &Subtarget) {
   SDValue Cond = N->getOperand(0);
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
@@ -28853,18 +28855,28 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  if (!TValIsAllOnes && !FValIsAllZeros)
+  // vselect Cond, 111..., 000... -> Cond
+  if (TValIsAllOnes && FValIsAllZeros)
+    return DAG.getBitcast(VT, Cond);
+
+  if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT))
     return SDValue();
 
-  SDValue Ret;
-  if (TValIsAllOnes && FValIsAllZeros)
-    Ret = Cond;
-  else if (TValIsAllOnes)
-    Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
-  else if (FValIsAllZeros)
-    Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS));
+  // vselect Cond, 111..., X -> or Cond, X
+  if (TValIsAllOnes) {
+    SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
+    SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
+    return DAG.getBitcast(VT, Or);
+  }
+
+  // vselect Cond, X, 000... -> and Cond, X
+  if (FValIsAllZeros) {
+    SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
+    SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
+    return DAG.getBitcast(VT, And);
+  }
 
-  return DAG.getBitcast(VT, Ret);
+  return SDValue();
 }
 
 static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
@@ -29353,7 +29365,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
+  if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
     return V;
 
   // If this is a *dynamic* select (non-constant condition) and we can match
@@ -31260,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
 }
 
-/// Check if truncation with saturation form type \p SrcVT to \p DstVT
-/// is valid for the given \p Subtarget.
-static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
-                                        const X86Subtarget &Subtarget) {
-  if (!Subtarget.hasAVX512())
-    return false;
-
-  // FIXME: Scalar type may be supported if we move it to vector register.
-  if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
-    return false;
-
-  EVT SrcElVT = SrcVT.getScalarType();
-  EVT DstElVT = DstVT.getScalarType();
-  if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
-    return false;
-  if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
-    return false;
-  if (SrcVT.is512BitVector() || Subtarget.hasVLX())
-    return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
-  return false;
-}
-
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
-  if (Subtarget.hasSSE2())
-    // v16i16 -> v16i8
-    if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
-      return true;
-  if (Subtarget.hasSSE41())
-    // v8i32 -> v8i16
-    if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
-      return true;
-  return false;
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectUSatPattern(SDValue In, EVT VT) {
-  if (In.getOpcode() != ISD::UMIN)
-    return SDValue();
-
-  //Saturation with truncation. We truncate from InVT to VT.
-  assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
-    "Unexpected types for truncate operation");
-
-  APInt C;
-  if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
-    // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
-    // the element size of the destination type.
-    return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
-      SDValue();
-  }
-  return SDValue();
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// The types should allow to use VPMOVUS* instruction on AVX512.
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
-                                       const X86Subtarget &Subtarget) {
-  if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
-    return SDValue();
-  return detectUSatPattern(In, VT);
-}
-
-static SDValue
-combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
-                        const X86Subtarget &Subtarget) {
-  SDValue USatVal = detectUSatPattern(In, VT);
-  if (USatVal) {
-    if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
-      return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
-    if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
-      SDValue Lo, Hi;
-      std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
-      return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
-    }
-  }
-  return SDValue();
-}
-
 /// This function detects the AVG pattern between vectors of unsigned i8/i16,
 /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
 /// X86ISD::AVG instruction.
@@ -31913,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
                           St->getPointerInfo(), St->getAlignment(),
                           St->getMemOperand()->getFlags());
 
-    if (SDValue Val =
-        detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
-      return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
-                             dl, Val, St->getBasePtr(),
-                             St->getMemoryVT(), St->getMemOperand(), DAG);
-
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     unsigned NumElems = VT.getVectorNumElements();
     assert(StVT != VT && "Cannot truncate to the same type");
@@ -32539,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
   if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
     return Avg;
 
-  // Try to combine truncation with unsigned saturation.
-  if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
-    return Val;
-
   // The bitcast source is a direct mmx result.
   // Detect bitcasts between i32 to x86mmx
   if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
@@ -33778,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Try to synthesize horizontal adds from adds of shuffles.
+  // Try to synthesize horizontal subs from subs of shuffles.
   EVT VT = N->getValueType(0);
   if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
        (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
-      isHorizontalBinOp(Op0, Op1, true))
+      isHorizontalBinOp(Op0, Op1, false))
     return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
 
   return OptimizeConditionalInDecrement(N, DAG);
diff --git a/gnu/llvm/lib/Target/X86/X86InstrControl.td b/gnu/llvm/lib/Target/X86/X86InstrControl.td
index 4ea223e82be..2f260c48df4 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrControl.td
+++ b/gnu/llvm/lib/Target/X86/X86InstrControl.td
@@ -264,21 +264,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                    "jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
 }
 
-// Conditional tail calls are similar to the above, but they are branches
-// rather than barriers, and they use EFLAGS.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
-    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
-  let Uses = [ESP, EFLAGS] in {
-  def TCRETURNdicc : PseudoI<(outs),
-                     (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
-
-  // This gets substituted to a conditional jump instruction in MC lowering.
-  def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
-                           (ins i32imm_pcrel:$dst, i32imm:$cond),
-                           "",
-                           [], IIC_JMP_REL>;
-}
-
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
@@ -340,19 +325,3 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                            "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
   }
 }
-
-// Conditional tail calls are similar to the above, but they are branches
-// rather than barriers, and they use EFLAGS.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
-    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
-  let Uses = [RSP, EFLAGS] in {
-  def TCRETURNdi64cc : PseudoI<(outs),
-                           (ins i64i32imm_pcrel:$dst, i32imm:$offset,
-                            i32imm:$cond), []>;
-
-  // This gets substituted to a conditional jump instruction in MC lowering.
-  def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
-                           (ins i64i32imm_pcrel:$dst, i32imm:$cond),
-                           "",
-                           [], IIC_JMP_REL>;
-}
diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
index e3484d062bc..627b6120b04 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5108,85 +5108,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
   return !isPredicated(MI);
 }
 
-bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
-  switch (MI.getOpcode()) {
-  case X86::TCRETURNdi:
-  case X86::TCRETURNri:
-  case X86::TCRETURNmi:
-  case X86::TCRETURNdi64:
-  case X86::TCRETURNri64:
-  case X86::TCRETURNmi64:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool X86InstrInfo::canMakeTailCallConditional(
-    SmallVectorImpl<MachineOperand> &BranchCond,
-    const MachineInstr &TailCall) const {
-  if (TailCall.getOpcode() != X86::TCRETURNdi &&
-      TailCall.getOpcode() != X86::TCRETURNdi64) {
-    // Only direct calls can be done with a conditional branch.
-    return false;
-  }
-
-  if (Subtarget.isTargetWin64()) {
-    // Conditional tail calls confuse the Win64 unwinder.
-    // TODO: Allow them for "leaf" functions; PR30337.
-    return false;
-  }
-
-  assert(BranchCond.size() == 1);
-  if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
-    // Can't make a conditional tail call with this condition.
-    return false;
-  }
-
-  const X86MachineFunctionInfo *X86FI =
-      TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>();
-  if (X86FI->getTCReturnAddrDelta() != 0 ||
-      TailCall.getOperand(1).getImm() != 0) {
-    // A conditional tail call cannot do any stack adjustment.
-    return false;
-  }
-
-  return true;
-}
-
-void X86InstrInfo::replaceBranchWithTailCall(
-    MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
-    const MachineInstr &TailCall) const {
-  assert(canMakeTailCallConditional(BranchCond, TailCall));
-
-  MachineBasicBlock::iterator I = MBB.end();
-  while (I != MBB.begin()) {
-    --I;
-    if (I->isDebugValue())
-      continue;
-    if (!I->isBranch())
-      assert(0 && "Can't find the branch to replace!");
-
-    X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
-    assert(BranchCond.size() == 1);
-    if (CC != BranchCond[0].getImm())
-      continue;
-
-    break;
-  }
-
-  unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
-                                                         : X86::TCRETURNdi64cc;
-
-  auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
-  MIB->addOperand(TailCall.getOperand(0)); // Destination.
-  MIB.addImm(0); // Stack offset (not used).
-  MIB->addOperand(BranchCond[0]); // Condition.
-  MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
-
-  I->eraseFromParent();
-}
-
 // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
 // not be a fallthrough MBB now due to layout changes). Return nullptr if the
 // fallthrough MBB cannot be identified.
diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.h b/gnu/llvm/lib/Target/X86/X86InstrInfo.h
index 8d746172dcb..acfdef4da7a 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.h
@@ -316,13 +316,6 @@ public:
 
   // Branch analysis.
   bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
-  bool isUnconditionalTailCall(const MachineInstr &MI) const override;
-  bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
-                                  const MachineInstr &TailCall) const override;
-  void replaceBranchWithTailCall(MachineBasicBlock &MBB,
-                                 SmallVectorImpl<MachineOperand> &Cond,
-                                 const MachineInstr &TailCall) const override;
-
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
diff --git a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
index a38a4b30b77..feeb2fd5993 100644
--- a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -498,16 +498,11 @@ ReSimplify:
     break;
   }
 
-  // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
+  // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction.
   { unsigned Opcode;
   case X86::TAILJMPr:   Opcode = X86::JMP32r; goto SetTailJmpOpcode;
   case X86::TAILJMPd:
   case X86::TAILJMPd64: Opcode = X86::JMP_1;  goto SetTailJmpOpcode;
-  case X86::TAILJMPd_CC:
-  case X86::TAILJMPd64_CC:
-    Opcode = X86::GetCondBranchFromCond(
-        static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
-    goto SetTailJmpOpcode;
 
   SetTailJmpOpcode:
     MCOperand Saved = OutMI.getOperand(0);
@@ -1281,11 +1276,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::TAILJMPr:
   case X86::TAILJMPm:
   case X86::TAILJMPd:
-  case X86::TAILJMPd_CC:
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
   case X86::TAILJMPd64:
-  case X86::TAILJMPd64_CC:
   case X86::TAILJMPr64_REX:
   case X86::TAILJMPm64_REX:
     // Lower these as normal, but add some comments.
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index 727ff70c3ff..586bb7bd7b1 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
            isTargetKFreeBSD() || In64BitMode)
     stackAlignment = 16;
-
-  assert((!isPMULLDSlow() || hasSSE41()) &&
-         "Feature Slow PMULLD can only be set on a subtarget with SSE4.1");
 }
 
 void X86Subtarget::initializeEnvironment() {
author	patrick <patrick@openbsd.org>	2017-03-14 08:07:52 +0000
committer	patrick <patrick@openbsd.org>	2017-03-14 08:07:52 +0000
commit	1cb66ada17adf0954eaadba4d02ec2470365a3ac (patch)
tree	521159d8f39562a43fffd680147eb5a71709b9b1 /gnu/llvm/lib/Target
parent	Mark the sshd_config UsePrivilegeSeparation option as deprecated, (diff)
download	wireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.tar.xz wireguard-openbsd-1cb66ada17adf0954eaadba4d02ec2470365a3ac.zip