summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib')
-rw-r--r--gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp10
-rw-r--r--gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp1
-rw-r--r--gnu/llvm/lib/Analysis/ScalarEvolution.cpp17
-rw-r--r--gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp2
-rw-r--r--gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp193
-rw-r--r--gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h2
-rw-r--r--gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp9
-rw-r--r--gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp2
-rw-r--r--gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp13
-rw-r--r--gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp9
-rw-r--r--gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h58
-rw-r--r--gnu/llvm/lib/CodeGen/BranchFolding.cpp37
-rw-r--r--gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp24
-rw-r--r--gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp8
-rw-r--r--gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp48
-rw-r--r--gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp38
-rw-r--r--gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp32
-rw-r--r--gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp31
-rw-r--r--gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp5
-rw-r--r--gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp9
-rw-r--r--gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp32
-rw-r--r--gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp65
-rw-r--r--gnu/llvm/lib/MC/MCCodeView.cpp12
-rw-r--r--gnu/llvm/lib/MC/MCMachOStreamer.cpp3
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64.td9
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td8
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp2
-rw-r--r--gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp31
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPU.td6
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp6
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp10
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp7
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h11
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td3
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp12
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h23
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp30
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/R600Instructions.td14
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp58
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp18
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIInstructions.td5
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp15
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h17
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp10
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td6
-rw-r--r--gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td6
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp3
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMCallingConv.td28
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp34
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp202
-rw-r--r--gnu/llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp17
-rw-r--r--gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h4
-rw-r--r--gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td107
-rw-r--r--gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td7
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp16
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h1
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsFastISel.cpp4
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsInstrFPU.td211
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsInstrFormats.td8
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp8
-rw-r--r--gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h2
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td13
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCSchedule.td2
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td8
-rw-r--r--gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td10
-rw-r--r--gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp21
-rw-r--r--gnu/llvm/lib/Target/X86/X86ISelLowering.cpp141
-rw-r--r--gnu/llvm/lib/Target/X86/X86InstrControl.td31
-rw-r--r--gnu/llvm/lib/Target/X86/X86InstrInfo.cpp79
-rw-r--r--gnu/llvm/lib/Target/X86/X86InstrInfo.h7
-rw-r--r--gnu/llvm/lib/Target/X86/X86MCInstLower.cpp9
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp4
-rw-r--r--gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp35
-rw-r--r--gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp3
-rw-r--r--gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp349
-rw-r--r--gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp7
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp31
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/LICM.cpp3
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp11
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp25
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp8
-rw-r--r--gnu/llvm/lib/Transforms/Scalar/SCCP.cpp5
-rw-r--r--gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp11
-rw-r--r--gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp15
-rw-r--r--gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp8
-rw-r--r--gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp265
90 files changed, 1538 insertions, 1199 deletions
diff --git a/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index e8d86bdbca5..c8d05794949 100644
--- a/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1191,14 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return MayAlias;
AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
- AAMDNodes(), V2, V2Size, V2AAInfo,
- nullptr, UnderlyingV2);
+ AAMDNodes(), V2, MemoryLocation::UnknownSize,
+ V2AAInfo, nullptr, UnderlyingV2);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
- // cannot alias per GEP semantics: "A pointer value formed from a
- // getelementptr instruction is associated with the addresses associated
- // with the first operand of the getelementptr".
+ // cannot alias per GEP semantics: "Any memory access must be done through
+ // a pointer value associated with an address range of the memory access,
+ // otherwise the behavior is undefined.".
return R;
// If the max search depth is reached the result is undefined
diff --git a/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 6387bb36166..f5ba637e58e 100644
--- a/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -405,6 +405,7 @@ char ModuleSummaryIndexWrapperPass::ID = 0;
INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
"Module Summary Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
"Module Summary Analysis", false, true)
diff --git a/gnu/llvm/lib/Analysis/ScalarEvolution.cpp b/gnu/llvm/lib/Analysis/ScalarEvolution.cpp
index b3905cc01e8..ed328f12c46 100644
--- a/gnu/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/gnu/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -127,10 +127,15 @@ static cl::opt<unsigned> MulOpsInlineThreshold(
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
cl::init(1000));
-static cl::opt<unsigned>
- MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
- cl::desc("Maximum depth of recursive compare complexity"),
- cl::init(32));
+static cl::opt<unsigned> MaxSCEVCompareDepth(
+ "scalar-evolution-max-scev-compare-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
+ cl::init(32));
+
+static cl::opt<unsigned> MaxValueCompareDepth(
+ "scalar-evolution-max-value-compare-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive value complexity comparisons"),
+ cl::init(2));
//===----------------------------------------------------------------------===//
// SCEV class definitions
@@ -481,7 +486,7 @@ static int
CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
const LoopInfo *const LI, Value *LV, Value *RV,
unsigned Depth) {
- if (Depth > MaxCompareDepth || EqCache.count({LV, RV}))
+ if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV}))
return 0;
// Order pointer values after integer values. This helps SCEVExpander form
@@ -568,7 +573,7 @@ static int CompareSCEVComplexity(
if (LType != RType)
return (int)LType - (int)RType;
- if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS}))
+ if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS}))
return 0;
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
diff --git a/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index d9e249aad21..a46e49ccde8 100644
--- a/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -512,7 +512,7 @@ private:
}
Metadata *getFnMetadataByID(unsigned ID) {
- return MDLoader->getMetadataFwdRef(ID);
+ return MDLoader->getMetadataFwdRefOrLoad(ID);
}
BasicBlock *getBasicBlock(unsigned ID) const {
diff --git a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 4a5d18e2db7..b89f5be4a36 100644
--- a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -448,6 +448,7 @@ class MetadataLoader::MetadataLoaderImpl {
bool StripTBAA = false;
bool HasSeenOldLoopTags = false;
+ bool NeedUpgradeToDIGlobalVariableExpression = false;
/// True if metadata is being parsed for a module being ThinLTO imported.
bool IsImporting = false;
@@ -473,6 +474,45 @@ class MetadataLoader::MetadataLoaderImpl {
CUSubprograms.clear();
}
+ /// Upgrade old-style bare DIGlobalVariables to DIGlobalVariableExpressions.
+ void upgradeCUVariables() {
+ if (!NeedUpgradeToDIGlobalVariableExpression)
+ return;
+
+ // Upgrade list of variables attached to the CUs.
+ if (NamedMDNode *CUNodes = TheModule.getNamedMetadata("llvm.dbg.cu"))
+ for (unsigned I = 0, E = CUNodes->getNumOperands(); I != E; ++I) {
+ auto *CU = cast<DICompileUnit>(CUNodes->getOperand(I));
+ if (auto *GVs = dyn_cast_or_null<MDTuple>(CU->getRawGlobalVariables()))
+ for (unsigned I = 0; I < GVs->getNumOperands(); I++)
+ if (auto *GV =
+ dyn_cast_or_null<DIGlobalVariable>(GVs->getOperand(I))) {
+ auto *DGVE =
+ DIGlobalVariableExpression::getDistinct(Context, GV, nullptr);
+ GVs->replaceOperandWith(I, DGVE);
+ }
+ }
+
+ // Upgrade variables attached to globals.
+ for (auto &GV : TheModule.globals()) {
+ SmallVector<MDNode *, 1> MDs, NewMDs;
+ GV.getMetadata(LLVMContext::MD_dbg, MDs);
+ GV.eraseMetadata(LLVMContext::MD_dbg);
+ for (auto *MD : MDs)
+ if (auto *DGV = dyn_cast_or_null<DIGlobalVariable>(MD)) {
+ auto *DGVE =
+ DIGlobalVariableExpression::getDistinct(Context, DGV, nullptr);
+ GV.addMetadata(LLVMContext::MD_dbg, *DGVE);
+ } else
+ GV.addMetadata(LLVMContext::MD_dbg, *MD);
+ }
+ }
+
+ void upgradeDebugInfo() {
+ upgradeCUSubprograms();
+ upgradeCUVariables();
+ }
+
public:
MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule,
BitcodeReaderValueList &ValueList,
@@ -485,8 +525,21 @@ public:
Error parseMetadata(bool ModuleLevel);
bool hasFwdRefs() const { return MetadataList.hasFwdRefs(); }
- Metadata *getMetadataFwdRef(unsigned Idx) {
- return MetadataList.getMetadataFwdRef(Idx);
+
+ Metadata *getMetadataFwdRefOrLoad(unsigned ID) {
+ if (ID < MDStringRef.size())
+ return lazyLoadOneMDString(ID);
+ if (auto *MD = MetadataList.lookup(ID))
+ return MD;
+ // If lazy-loading is enabled, we try recursively to load the operand
+ // instead of creating a temporary.
+ if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) {
+ PlaceholderQueue Placeholders;
+ lazyLoadOneMetadata(ID, Placeholders);
+ resolveForwardRefsAndPlaceholders(Placeholders);
+ return MetadataList.lookup(ID);
+ }
+ return MetadataList.getMetadataFwdRef(ID);
}
MDNode *getMDNodeFwdRefOrNull(unsigned Idx) {
@@ -713,7 +766,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Reading the named metadata created forward references and/or
// placeholders, that we flush here.
resolveForwardRefsAndPlaceholders(Placeholders);
- upgradeCUSubprograms();
+ upgradeDebugInfo();
// Return at the beginning of the block, since it is easy to skip it
// entirely from there.
Stream.ReadBlockEnd(); // Pop the abbrev block context.
@@ -737,7 +790,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
return error("Malformed block");
case BitstreamEntry::EndBlock:
resolveForwardRefsAndPlaceholders(Placeholders);
- upgradeCUSubprograms();
+ upgradeDebugInfo();
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -768,13 +821,12 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
unsigned ID, PlaceholderQueue &Placeholders) {
assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size());
assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString");
-#ifndef NDEBUG
// Lookup first if the metadata hasn't already been loaded.
if (auto *MD = MetadataList.lookup(ID)) {
auto *N = dyn_cast_or_null<MDNode>(MD);
- assert(N && N->isTemporary() && "Lazy loading an already loaded metadata");
+ if (!N->isTemporary())
+ return;
}
-#endif
SmallVector<uint64_t, 64> Record;
StringRef Blob;
IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);
@@ -827,8 +879,22 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
auto getMD = [&](unsigned ID) -> Metadata * {
if (ID < MDStringRef.size())
return lazyLoadOneMDString(ID);
- if (!IsDistinct)
+ if (!IsDistinct) {
+ if (auto *MD = MetadataList.lookup(ID))
+ return MD;
+ // If lazy-loading is enabled, we try recursively to load the operand
+ // instead of creating a temporary.
+ if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) {
+ // Create a temporary for the node that is referencing the operand we
+ // will lazy-load. It is needed before recursing in case there are
+ // uniquing cycles.
+ MetadataList.getMetadataFwdRef(NextMetadataNo);
+ lazyLoadOneMetadata(ID, Placeholders);
+ return MetadataList.lookup(ID);
+ }
+ // Return a temporary.
return MetadataList.getMetadataFwdRef(ID);
+ }
if (auto *MD = MetadataList.getMetadataIfResolved(ID))
return MD;
return &Placeholders.getPlaceholderOp(ID);
@@ -893,7 +959,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// If this isn't a LocalAsMetadata record, we're dropping it. This used
// to be legal, but there's no upgrade path.
auto dropRecord = [&] {
- MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
+ MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo);
+ NextMetadataNo++;
};
if (Record.size() != 2) {
dropRecord();
@@ -908,7 +975,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(
LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_OLD_NODE: {
@@ -933,7 +1001,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
} else
Elts.push_back(nullptr);
}
- MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
+ MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_VALUE: {
@@ -946,7 +1015,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(
ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_DISTINCT_NODE:
@@ -959,7 +1029,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Elts.push_back(getMDOrNull(ID));
MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
: MDNode::get(Context, Elts),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_LOCATION: {
@@ -973,7 +1044,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Metadata *InlinedAt = getMDOrNull(Record[4]);
MetadataList.assignValue(
GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_GENERIC_DEBUG: {
@@ -993,7 +1065,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
DwarfOps.push_back(getMDOrNull(Record[I]));
MetadataList.assignValue(
GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_SUBRANGE: {
@@ -1004,7 +1077,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(
GET_OR_DISTINCT(DISubrange,
(Context, Record[1], unrotateSign(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_ENUMERATOR: {
@@ -1015,7 +1089,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(
GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]),
getMDString(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_BASIC_TYPE: {
@@ -1027,7 +1102,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DIBasicType,
(Context, Record[1], getMDString(Record[2]), Record[3],
Record[4], Record[5])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_DERIVED_TYPE: {
@@ -1043,7 +1119,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getDITypeRefOrNull(Record[5]),
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
Record[9], Flags, getDITypeRefOrNull(Record[11]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_COMPOSITE_TYPE: {
@@ -1108,7 +1185,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (!IsNotUsedInTypeRef && Identifier)
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
- MetadataList.assignValue(CT, NextMetadataNo++);
+ MetadataList.assignValue(CT, NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_SUBROUTINE_TYPE: {
@@ -1125,7 +1203,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(
GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
@@ -1139,7 +1218,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, getMDOrNull(Record[1]),
getMDString(Record[2]), getMDString(Record[3]),
getMDString(Record[4]), getMDString(Record[5]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
@@ -1155,7 +1235,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Record.size() == 3 ? DIFile::CSK_None
: static_cast<DIFile::ChecksumKind>(Record[3]),
Record.size() == 3 ? nullptr : getMDString(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_COMPILE_UNIT: {
@@ -1174,7 +1255,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Record.size() <= 14 ? 0 : Record[14],
Record.size() <= 16 ? true : Record[16]);
- MetadataList.assignValue(CU, NextMetadataNo++);
+ MetadataList.assignValue(CU, NextMetadataNo);
+ NextMetadataNo++;
// Move the Upgrade the list of subprograms.
if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11]))
@@ -1221,7 +1303,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[16 + Offset]), // declaration
getMDOrNull(Record[17 + Offset]) // variables
));
- MetadataList.assignValue(SP, NextMetadataNo++);
+ MetadataList.assignValue(SP, NextMetadataNo);
+ NextMetadataNo++;
// Upgrade sp->function mapping to function->sp mapping.
if (HasFn) {
@@ -1246,7 +1329,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DILexicalBlock,
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3], Record[4])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_LEXICAL_BLOCK_FILE: {
@@ -1258,7 +1342,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DILexicalBlockFile,
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_NAMESPACE: {
@@ -1272,7 +1357,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), getMDString(Record[3]),
Record[4], ExportSymbols)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_MACRO: {
@@ -1284,7 +1370,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DIMacro,
(Context, Record[1], Record[2], getMDString(Record[3]),
getMDString(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_MACRO_FILE: {
@@ -1296,7 +1383,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
GET_OR_DISTINCT(DIMacroFile,
(Context, Record[1], Record[2], getMDOrNull(Record[3]),
getMDOrNull(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_TEMPLATE_TYPE: {
@@ -1307,7 +1395,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
(Context, getMDString(Record[1]),
getDITypeRefOrNull(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_TEMPLATE_VALUE: {
@@ -1320,7 +1409,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, Record[1], getMDString(Record[2]),
getDITypeRefOrNull(Record[3]),
getMDOrNull(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_GLOBAL_VAR: {
@@ -1338,7 +1428,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[4]), Record[5],
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
getMDOrNull(Record[10]), Record[11])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
} else if (Version == 0) {
// Upgrade old metadata, which stored a global variable reference or a
// ConstantInt here.
@@ -1369,10 +1460,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
getMDOrNull(Record[10]), AlignInBits));
- auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
- MetadataList.assignValue(DGVE, NextMetadataNo++);
+ DIGlobalVariableExpression *DGVE = nullptr;
+ if (Attach || Expr)
+ DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
+ else
+ NeedUpgradeToDIGlobalVariableExpression = true;
if (Attach)
Attach->addDebugInfo(DGVE);
+
+ auto *MDNode = Expr ? cast<Metadata>(DGVE) : cast<Metadata>(DGV);
+ MetadataList.assignValue(MDNode, NextMetadataNo);
+ NextMetadataNo++;
} else
return error("Invalid record");
@@ -1403,7 +1501,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
getDITypeRefOrNull(Record[5 + HasTag]),
Record[6 + HasTag], Flags, AlignInBits)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_EXPRESSION: {
@@ -1420,7 +1519,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(
GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_GLOBAL_VAR_EXPR: {
@@ -1431,7 +1531,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression,
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_OBJC_PROPERTY: {
@@ -1445,7 +1546,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[2]), Record[3],
getMDString(Record[4]), getMDString(Record[5]),
Record[6], getDITypeRefOrNull(Record[7]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_IMPORTED_ENTITY: {
@@ -1458,7 +1560,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, Record[1], getMDOrNull(Record[2]),
getDITypeRefOrNull(Record[3]), Record[4],
getMDString(Record[5]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_STRING_OLD: {
@@ -1468,13 +1571,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
++NumMDStringLoaded;
Metadata *MD = MDString::get(Context, String);
- MetadataList.assignValue(MD, NextMetadataNo++);
+ MetadataList.assignValue(MD, NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_STRINGS: {
auto CreateNextMDString = [&](StringRef Str) {
++NumMDStringLoaded;
- MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++);
+ MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo);
+ NextMetadataNo++;
};
if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
return Err;
@@ -1714,8 +1819,8 @@ bool MetadataLoader::hasFwdRefs() const { return Pimpl->hasFwdRefs(); }
/// Return the given metadata, creating a replaceable forward reference if
/// necessary.
-Metadata *MetadataLoader::getMetadataFwdRef(unsigned Idx) {
- return Pimpl->getMetadataFwdRef(Idx);
+Metadata *MetadataLoader::getMetadataFwdRefOrLoad(unsigned Idx) {
+ return Pimpl->getMetadataFwdRefOrLoad(Idx);
}
MDNode *MetadataLoader::getMDNodeFwdRefOrNull(unsigned Idx) {
diff --git a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h
index 753846c6ead..442dfc94e4e 100644
--- a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h
+++ b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h
@@ -63,7 +63,7 @@ public:
/// Return the given metadata, creating a replaceable forward reference if
/// necessary.
- Metadata *getMetadataFwdRef(unsigned Idx);
+ Metadata *getMetadataFwdRefOrLoad(unsigned Idx);
MDNode *getMDNodeFwdRefOrNull(unsigned Idx);
diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9f6caa95a9e..24fdbfc901f 100644
--- a/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -567,6 +567,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->AddBlankLine();
}
+/// Emit the directive and value for debug thread local expression
+///
+/// \p Value - The value to emit.
+/// \p Size - The size of the integer (in bytes) to emit.
+void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+ unsigned Size) const {
+ OutStreamer->EmitValue(Value, Size);
+}
+
/// EmitFunctionHeader - This method emits the header for the current
/// function.
void AsmPrinter::EmitFunctionHeader() {
diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index a8a3b30d5b6..87991899547 100644
--- a/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -484,7 +484,7 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
+ AP->EmitDebugValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 0db623bbc29..d904372af58 100644
--- a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -522,22 +522,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
}
// .. else use frame index.
- if (DV.getFrameIndex().empty())
+ if (!DV.hasFrameIndexExprs())
return VariableDie;
- auto Expr = DV.getExpression().begin();
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- for (auto FI : DV.getFrameIndex()) {
+ for (auto &Fragment : DV.getFrameIndexExprs()) {
unsigned FrameReg = 0;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
- DwarfExpr.addFragmentOffset(*Expr);
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
+ DwarfExpr.addFragmentOffset(Fragment.Expr);
DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
FrameReg, Offset);
- DwarfExpr.AddExpression(*Expr);
- ++Expr;
+ DwarfExpr.AddExpression(Fragment.Expr);
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index b465b98f368..91a3d0989cc 100644
--- a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -199,6 +199,15 @@ const DIType *DbgVariable::getType() const {
return Ty;
}
+ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
+ std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
+ [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
+ return A.Expr->getFragmentInfo()->OffsetInBits <
+ B.Expr->getFragmentInfo()->OffsetInBits;
+ });
+ return FrameIndexExprs;
+}
+
static const DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index e5bf33db81f..253e3f06200 100644
--- a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -54,7 +54,7 @@ class MachineModuleInfo;
///
/// Variables can be created from allocas, in which case they're generated from
/// the MMI table. Such variables can have multiple expressions and frame
-/// indices. The \a Expr and \a FrameIndices array must match.
+/// indices.
///
/// Variables can be created from \c DBG_VALUE instructions. Those whose
/// location changes over time use \a DebugLocListIndex, while those with a
@@ -64,11 +64,16 @@ class MachineModuleInfo;
class DbgVariable {
const DILocalVariable *Var; /// Variable Descriptor.
const DILocation *IA; /// Inlined at location.
- SmallVector<const DIExpression *, 1> Expr; /// Complex address.
DIE *TheDIE = nullptr; /// Variable DIE.
unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
- SmallVector<int, 1> FrameIndex; /// Frame index.
+
+ struct FrameIndexExpr {
+ int FI;
+ const DIExpression *Expr;
+ };
+ mutable SmallVector<FrameIndexExpr, 1>
+ FrameIndexExprs; /// Frame index + expression.
public:
/// Construct a DbgVariable.
@@ -80,21 +85,18 @@ public:
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
- assert(Expr.empty() && "Already initialized?");
- assert(FrameIndex.empty() && "Already initialized?");
+ assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!MInsn && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
assert(~FI && "Expected valid index");
- Expr.push_back(E);
- FrameIndex.push_back(FI);
+ FrameIndexExprs.push_back({FI, E});
}
/// Initialize from a DBG_VALUE instruction.
void initializeDbgValue(const MachineInstr *DbgValue) {
- assert(Expr.empty() && "Already initialized?");
- assert(FrameIndex.empty() && "Already initialized?");
+ assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!MInsn && "Already initialized?");
assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
@@ -103,16 +105,15 @@ public:
MInsn = DbgValue;
if (auto *E = DbgValue->getDebugExpression())
if (E->getNumElements())
- Expr.push_back(E);
+ FrameIndexExprs.push_back({0, E});
}
// Accessors.
const DILocalVariable *getVariable() const { return Var; }
const DILocation *getInlinedAt() const { return IA; }
- ArrayRef<const DIExpression *> getExpression() const { return Expr; }
const DIExpression *getSingleExpression() const {
- assert(MInsn && Expr.size() <= 1);
- return Expr.size() ? Expr[0] : nullptr;
+ assert(MInsn && FrameIndexExprs.size() <= 1);
+ return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
@@ -120,7 +121,9 @@ public:
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
StringRef getName() const { return Var->getName(); }
const MachineInstr *getMInsn() const { return MInsn; }
- ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+ /// Get the FI entries, sorted by fragment offset.
+ ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
+ bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
void addMMIEntry(const DbgVariable &V) {
assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
@@ -128,16 +131,15 @@ public:
assert(V.Var == Var && "conflicting variable");
assert(V.IA == IA && "conflicting inlined-at location");
- assert(!FrameIndex.empty() && "Expected an MMI entry");
- assert(!V.FrameIndex.empty() && "Expected an MMI entry");
- assert(Expr.size() == FrameIndex.size() && "Mismatched expressions");
- assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions");
+ assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
+ assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
- Expr.append(V.Expr.begin(), V.Expr.end());
- FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end());
- assert(all_of(Expr, [](const DIExpression *E) {
- return E && E->isFragment();
- }) && "conflicting locations for variable");
+ FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end());
+ assert(all_of(FrameIndexExprs,
+ [](FrameIndexExpr &FIE) {
+ return FIE.Expr && FIE.Expr->isFragment();
+ }) &&
+ "conflicting locations for variable");
}
// Translate tag to proper Dwarf tag.
@@ -167,11 +169,11 @@ public:
bool hasComplexAddress() const {
assert(MInsn && "Expected DBG_VALUE, not MMI variable");
- assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable");
- assert(
- (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) &&
- "Invalid Expr for DBG_VALUE");
- return !Expr.empty();
+ assert((FrameIndexExprs.empty() ||
+ (FrameIndexExprs.size() == 1 &&
+ FrameIndexExprs[0].Expr->getNumElements())) &&
+ "Invalid Expr for DBG_VALUE");
+ return !FrameIndexExprs.empty();
}
bool isBlockByrefVariable() const;
const DIType *getType() const;
diff --git a/gnu/llvm/lib/CodeGen/BranchFolding.cpp b/gnu/llvm/lib/CodeGen/BranchFolding.cpp
index a898e327ccc..6fba161033b 100644
--- a/gnu/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/gnu/llvm/lib/CodeGen/BranchFolding.cpp
@@ -49,7 +49,6 @@ STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
STATISTIC(NumHoist , "Number of times common instructions are hoisted");
-STATISTIC(NumTailCalls, "Number of tail calls optimized");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -1387,42 +1386,6 @@ ReoptimizeBlock:
}
}
- if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
- MF.getFunction()->optForSize()) {
- // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
- // direction, thereby defeating careful block placement and regressing
- // performance. Therefore, only consider this for optsize functions.
- MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
- if (TII->isUnconditionalTailCall(TailCall)) {
- MachineBasicBlock *Pred = *MBB->pred_begin();
- MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
- SmallVector<MachineOperand, 4> PredCond;
- bool PredAnalyzable =
- !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
-
- if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) {
- // The predecessor has a conditional branch to this block which consists
- // of only a tail call. Try to fold the tail call into the conditional
- // branch.
- if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
- // TODO: It would be nice if analyzeBranch() could provide a pointer
- // to the branch insturction so replaceBranchWithTailCall() doesn't
- // have to search for it.
- TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
- ++NumTailCalls;
- Pred->removeSuccessor(MBB);
- MadeChange = true;
- return MadeChange;
- }
- }
- // If the predecessor is falling through to this block, we could reverse
- // the branch condition and fold the tail call into that. However, after
- // that we might have to re-arrange the CFG to fall through to the other
- // block and there is a high risk of regressing code size rather than
- // improving it.
- }
- }
-
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
SmallVector<MachineOperand, 4> CurCond;
diff --git a/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index e7c6b03f1a4..32c57e3e370 100644
--- a/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -707,9 +707,8 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Kill off any remaining uses that don't match available, and build a list of
// incoming DomainValues that we want to merge.
- SmallVector<LiveReg, 4> Regs;
- for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
- int rx = *i;
+ SmallVector<const LiveReg *, 4> Regs;
+ for (int rx : used) {
assert(LiveRegs && "no space allocated for live registers");
const LiveReg &LR = LiveRegs[rx];
// This useless DomainValue could have been missed above.
@@ -718,16 +717,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
continue;
}
// Sorted insertion.
- bool Inserted = false;
- for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end();
- i != e && !Inserted; ++i) {
- if (LR.Def < i->Def) {
- Inserted = true;
- Regs.insert(i, LR);
- }
- }
- if (!Inserted)
- Regs.push_back(LR);
+ auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR,
+ [](const LiveReg *LHS, const LiveReg *RHS) {
+ return LHS->Def < RHS->Def;
+ });
+ Regs.insert(I, &LR);
}
// doms are now sorted in order of appearance. Try to merge them all, giving
@@ -735,14 +729,14 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
DomainValue *dv = nullptr;
while (!Regs.empty()) {
if (!dv) {
- dv = Regs.pop_back_val().Value;
+ dv = Regs.pop_back_val()->Value;
// Force the first dv to match the current instruction.
dv->AvailableDomains = dv->getCommonDomains(available);
assert(dv->AvailableDomains && "Domain should have been filtered");
continue;
}
- DomainValue *Latest = Regs.pop_back_val().Value;
+ DomainValue *Latest = Regs.pop_back_val()->Value;
// Skip already merged values.
if (Latest == dv || Latest->Next)
continue;
diff --git a/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c8f79d7fb71..ec35b3f6449 100644
--- a/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned MaxFactor) {
+ unsigned MaxFactor, unsigned OpNumElts) {
unsigned NumElts = Mask.size();
if (NumElts < 4)
return false;
@@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
if (StartMask < 0)
break;
+ // We must stay within the vectors; This case can happen with undefs.
+ if (StartMask + LaneLen > OpNumElts*2)
+ break;
}
// Found an interleaved mask of current factor.
@@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore(
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor))
+ unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+ if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
return false;
DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
diff --git a/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 92d043df26b..5de6dec29fb 100644
--- a/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -61,6 +61,7 @@ namespace {
private:
void ClobberRegister(unsigned Reg);
+ void ReadRegister(unsigned Reg);
void CopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
@@ -120,6 +121,18 @@ void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
}
}
+void MachineCopyPropagation::ReadRegister(unsigned Reg) {
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ Reg2MIMap::iterator CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end()) {
+ DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+}
+
/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
/// This fact may have been obscured by sub register usage or may not be true at
/// all even though Src and Def are subregisters of the registers used in
@@ -212,12 +225,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
- for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
- Reg2MIMap::iterator CI = CopyMap.find(*AI);
- if (CI != CopyMap.end()) {
- DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump());
- MaybeDeadCopies.remove(CI->second);
- }
+ ReadRegister(Src);
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ ReadRegister(Reg);
}
DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
@@ -234,6 +249,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// ...
// %xmm2<def> = copy %xmm9
ClobberRegister(Def);
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ ClobberRegister(Reg);
+ }
// Remember Def is defined by the copy.
for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
@@ -268,17 +291,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
if (MO.isDef()) {
Defs.push_back(Reg);
- continue;
- }
-
- // If 'Reg' is defined by a copy, the copy is no longer a candidate
- // for elimination.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- Reg2MIMap::iterator CI = CopyMap.find(*AI);
- if (CI != CopyMap.end()) {
- DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
- MaybeDeadCopies.remove(CI->second);
- }
+ } else {
+ ReadRegister(Reg);
}
// Treat undef use like defs for copy propagation but not for
// dead copy. We would need to do a liveness check to be sure the copy
diff --git a/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp b/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 0f4bb59c49a..4bb3c229afc 100644
--- a/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1556,9 +1556,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
unsigned DstReg = CP.getDstReg();
+ unsigned SrcReg = CP.getSrcReg();
assert(CP.isPhys() && "Must be a physreg copy");
assert(MRI->isReserved(DstReg) && "Not a reserved register");
- LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ LiveInterval &RHS = LIS->getInterval(SrcReg);
DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
assert(RHS.containsOneValue() && "Invalid join with reserved register");
@@ -1592,17 +1593,36 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// Delete the identity copy.
MachineInstr *CopyMI;
if (CP.isFlipped()) {
- CopyMI = MRI->getVRegDef(RHS.reg);
+ // Physreg is copied into vreg
+ // %vregY = COPY %X
+ // ... //< no other def of %X here
+ // use %vregY
+ // =>
+ // ...
+ // use %X
+ CopyMI = MRI->getVRegDef(SrcReg);
} else {
- if (!MRI->hasOneNonDBGUse(RHS.reg)) {
+ // VReg is copied into physreg:
+ // %vregX = def
+ // ... //< no other def or use of %Y here
+ // %Y = COPY %vregX
+ // =>
+ // %Y = def
+ // ...
+ if (!MRI->hasOneNonDBGUse(SrcReg)) {
DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
return false;
}
- MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
- CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
- const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
- const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot();
+ if (!LIS->intervalIsInOneMBB(RHS)) {
+ DEBUG(dbgs() << "\t\tComplex control flow!\n");
+ return false;
+ }
+
+ MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
+ CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
+ SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+ SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
if (!MRI->isConstantPhysReg(DstReg)) {
// We checked above that there are no interfering defs of the physical
@@ -1629,8 +1649,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// We're going to remove the copy which defines a physical reserved
// register, so remove its valno, etc.
- DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at "
- << CopyRegIdx << "\n");
+ DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI)
+ << " at " << CopyRegIdx << "\n");
LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
// Create a new dead def at the new def location.
diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 680f62fa91b..2c7bffe7650 100644
--- a/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8123,9 +8123,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8137,7 +8140,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL &&
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8367,10 +8373,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8384,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -13060,9 +13072,15 @@ SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
return SDValue();
- if (InVT1 != InVT2)
+ // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
+ // lower it back into a BUILD_VECTOR. So if the inserted type is
+ // illegal, don't even try.
+ if (InVT1 != InVT2) {
+ if (!TLI.isTypeLegal(InVT2))
+ return SDValue();
VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
ShuffleNumElems = NumElems * 2;
} else {
// Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 3b91e58879b..4a9042cfb3f 100644
--- a/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -502,8 +502,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const TargetRegisterClass *TRC =
TLI->getRegClassFor(Node->getSimpleValueType(0));
- unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned Reg;
+ MachineInstr *DefMI;
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
+ if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ Reg = R->getReg();
+ DefMI = nullptr;
+ } else {
+ Reg = getVR(Node->getOperand(0), VRBaseMap);
+ DefMI = MRI->getVRegDef(Reg);
+ }
+
unsigned SrcReg, DstReg, DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
@@ -519,20 +528,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
MRI->clearKillFlags(SrcReg);
} else {
- // VReg may not support a SubIdx sub-register, and we may need to
+ // Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- VReg = ConstrainForSubReg(VReg, SubIdx,
- Node->getOperand(0).getSimpleValueType(),
- Node->getDebugLoc());
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Reg = ConstrainForSubReg(Reg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->getDebugLoc());
// Create the destreg if it is missing.
if (VRBase == 0)
VRBase = MRI->createVirtualRegister(TRC);
// Create the extract_subreg machine instruction.
- BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
+ MachineInstrBuilder CopyMI =
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ CopyMI.addReg(Reg, 0, SubIdx);
+ else
+ CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
}
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 27a9ac337f2..6906f67ebac 100644
--- a/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3439,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LD->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
- if (L->getValueType(0).isVector()) {
+ if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
+ // Later code assumes the vector loads produced will be mergeable, so we
+ // must pad the final entry up to the previous width. Scalars are
+ // combined separately.
SmallVector<SDValue, 16> Loads;
Loads.push_back(L);
unsigned size = L->getValueSizeInBits(0);
diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9ca646534e2..996c95bd5f0 100644
--- a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5832,6 +5832,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ auto *Caller = CS.getInstruction()->getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ isTailCall = false;
+
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
const Value *V = *i;
diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6d717b44eb7..64e6c221229 100644
--- a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2248,7 +2248,7 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
/// to use the new results.
void SelectionDAGISel::UpdateChains(
SDNode *NodeToMatch, SDValue InputChain,
- const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+ SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
// Now that all the normal results are replaced, we replace the chain and
@@ -2260,6 +2260,11 @@ void SelectionDAGISel::UpdateChains(
// Replace all the chain results with the final chain we ended up with.
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
SDNode *ChainNode = ChainNodesMatched[i];
+ // If ChainNode is null, it's because we replaced it on a previous
+ // iteration and we cleared it out of the map. Just skip it.
+ if (!ChainNode)
+ continue;
+
assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
"Deleted node left in chain");
@@ -2272,6 +2277,11 @@ void SelectionDAGISel::UpdateChains(
if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ SelectionDAG::DAGNodeDeletedListener NDL(
+ *CurDAG, [&](SDNode *N, SDNode *E) {
+ std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
+ static_cast<SDNode *>(nullptr));
+ });
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
// If the node became dead and we haven't already seen it, delete it.
@@ -2772,14 +2782,15 @@ struct MatchScope {
/// for this.
class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
{
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes;
- SmallVectorImpl<MatchScope> &MatchScopes;
+ SDNode **NodeToMatch;
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
public:
- MatchStateUpdater(SelectionDAG &DAG,
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN,
- SmallVectorImpl<MatchScope> &MS) :
- SelectionDAG::DAGUpdateListener(DAG),
- RecordedNodes(RN), MatchScopes(MS) { }
+ MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
+ SmallVectorImpl<MatchScope> &MS)
+ : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch),
+ RecordedNodes(RN), MatchScopes(MS) {}
void NodeDeleted(SDNode *N, SDNode *E) override {
// Some early-returns here to avoid the search if we deleted the node or
@@ -2789,6 +2800,9 @@ public:
// update listener during matching a complex patterns.
if (!E || E->isMachineOpcode())
return;
+ // Check if NodeToMatch was updated.
+ if (N == *NodeToMatch)
+ *NodeToMatch = E;
// Performing linear search here does not matter because we almost never
// run this code. You'd have to have a CSE during complex pattern
// matching.
@@ -3081,7 +3095,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// consistent.
std::unique_ptr<MatchStateUpdater> MSU;
if (ComplexPatternFuncMutatesDAG())
- MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
+ MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes,
MatchScopes));
if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
diff --git a/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index a14b86179d6..40537e4fa78 100644
--- a/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -150,13 +150,13 @@ static void computePrevailingCopies(
}
static StringMap<MemoryBufferRef>
-generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
+generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) {
StringMap<MemoryBufferRef> ModuleMap;
for (auto &ModuleBuffer : Modules) {
assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
ModuleMap.end() &&
"Expect unique Buffer Identifier");
- ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
+ ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer();
}
return ModuleMap;
}
@@ -522,13 +522,13 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
} // end anonymous namespace
void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
- MemoryBufferRef Buffer(Data, Identifier);
+ ThinLTOBuffer Buffer(Data, Identifier);
if (Modules.empty()) {
// First module added, so initialize the triple and some options
LLVMContext Context;
StringRef TripleStr;
- ErrorOr<std::string> TripleOrErr =
- expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer));
+ ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
+ Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
if (TripleOrErr)
TripleStr = *TripleOrErr;
Triple TheTriple(TripleStr);
@@ -538,8 +538,8 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
else {
LLVMContext Context;
StringRef TripleStr;
- ErrorOr<std::string> TripleOrErr =
- expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer));
+ ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
+ Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
if (TripleOrErr)
TripleStr = *TripleOrErr;
assert(TMBuilder.TheTriple.str() == TripleStr &&
@@ -588,7 +588,8 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
uint64_t NextModuleId = 0;
for (auto &ModuleBuffer : Modules) {
Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
- object::ModuleSummaryIndexObjectFile::create(ModuleBuffer);
+ object::ModuleSummaryIndexObjectFile::create(
+ ModuleBuffer.getMemBuffer());
if (!ObjOrErr) {
// FIXME diagnose
logAllUnhandledErrors(
@@ -829,11 +830,22 @@ static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
// Main entry point for the ThinLTO processing
void ThinLTOCodeGenerator::run() {
+ // Prepare the resulting object vector
+ assert(ProducedBinaries.empty() && "The generator should not be reused");
+ if (SavedObjectsDirectoryPath.empty())
+ ProducedBinaries.resize(Modules.size());
+ else {
+ sys::fs::create_directories(SavedObjectsDirectoryPath);
+ bool IsDir;
+ sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
+ if (!IsDir)
+ report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
+ ProducedBinaryFiles.resize(Modules.size());
+ }
+
if (CodeGenOnly) {
// Perform only parallel codegen and return.
ThreadPool Pool;
- assert(ProducedBinaries.empty() && "The generator should not be reused");
- ProducedBinaries.resize(Modules.size());
int count = 0;
for (auto &ModuleBuffer : Modules) {
Pool.async([&](int count) {
@@ -841,11 +853,17 @@ void ThinLTOCodeGenerator::run() {
Context.setDiscardValueNames(LTODiscardValueNames);
// Parse module now
- auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false,
- /*IsImporting*/ false);
+ auto TheModule =
+ loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
+ /*IsImporting*/ false);
// CodeGen
- ProducedBinaries[count] = codegen(*TheModule);
+ auto OutputBuffer = codegen(*TheModule);
+ if (SavedObjectsDirectoryPath.empty())
+ ProducedBinaries[count] = std::move(OutputBuffer);
+ else
+ ProducedBinaryFiles[count] = writeGeneratedObject(
+ count, "", SavedObjectsDirectoryPath, *OutputBuffer);
}, count++);
}
@@ -866,18 +884,6 @@ void ThinLTOCodeGenerator::run() {
WriteIndexToFile(*Index, OS);
}
- // Prepare the resulting object vector
- assert(ProducedBinaries.empty() && "The generator should not be reused");
- if (SavedObjectsDirectoryPath.empty())
- ProducedBinaries.resize(Modules.size());
- else {
- sys::fs::create_directories(SavedObjectsDirectoryPath);
- bool IsDir;
- sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
- if (!IsDir)
- report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
- ProducedBinaryFiles.resize(Modules.size());
- }
// Prepare the module map.
auto ModuleMap = generateModuleMap(Modules);
@@ -939,8 +945,8 @@ void ThinLTOCodeGenerator::run() {
std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
[&](int LeftIndex, int RightIndex) {
- auto LSize = Modules[LeftIndex].getBufferSize();
- auto RSize = Modules[RightIndex].getBufferSize();
+ auto LSize = Modules[LeftIndex].getBuffer().size();
+ auto RSize = Modules[RightIndex].getBuffer().size();
return LSize > RSize;
});
@@ -992,8 +998,9 @@ void ThinLTOCodeGenerator::run() {
}
// Parse module now
- auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false,
- /*IsImporting*/ false);
+ auto TheModule =
+ loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
+ /*IsImporting*/ false);
// Save temps: original file.
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
diff --git a/gnu/llvm/lib/MC/MCCodeView.cpp b/gnu/llvm/lib/MC/MCCodeView.cpp
index 3773542cf8a..99a5c11a498 100644
--- a/gnu/llvm/lib/MC/MCCodeView.cpp
+++ b/gnu/llvm/lib/MC/MCCodeView.cpp
@@ -509,17 +509,17 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
// are artificially constructing.
size_t RecordSize = FixedSizePortion.size() +
sizeof(LocalVariableAddrRange) + 4 * NumGaps;
- // Write out the recrod size.
- support::endian::Writer<support::little>(OS).write<uint16_t>(RecordSize);
+ // Write out the record size.
+ LEWriter.write<uint16_t>(RecordSize);
// Write out the fixed size prefix.
OS << FixedSizePortion;
// Make space for a fixup that will eventually have a section relative
// relocation pointing at the offset where the variable becomes live.
Fixups.push_back(MCFixup::create(Contents.size(), BE, FK_SecRel_4));
- Contents.resize(Contents.size() + 4); // Fixup for code start.
+ LEWriter.write<uint32_t>(0); // Fixup for code start.
// Make space for a fixup that will record the section index for the code.
Fixups.push_back(MCFixup::create(Contents.size(), BE, FK_SecRel_2));
- Contents.resize(Contents.size() + 2); // Fixup for section index.
+ LEWriter.write<uint16_t>(0); // Fixup for section index.
// Write down the range's extent.
LEWriter.write<uint16_t>(Chunk);
@@ -529,7 +529,7 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
} while (RangeSize > 0);
// Emit the gaps afterwards.
- assert((NumGaps == 0 || Bias < MaxDefRange) &&
+ assert((NumGaps == 0 || Bias <= MaxDefRange) &&
"large ranges should not have gaps");
unsigned GapStartOffset = GapAndRangeSizes[I].second;
for (++I; I != J; ++I) {
@@ -537,7 +537,7 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
assert(I < GapAndRangeSizes.size());
std::tie(GapSize, RangeSize) = GapAndRangeSizes[I];
LEWriter.write<uint16_t>(GapStartOffset);
- LEWriter.write<uint16_t>(RangeSize);
+ LEWriter.write<uint16_t>(GapSize);
GapStartOffset += GapSize + RangeSize;
}
}
diff --git a/gnu/llvm/lib/MC/MCMachOStreamer.cpp b/gnu/llvm/lib/MC/MCMachOStreamer.cpp
index 45a497240b4..bd425bb7309 100644
--- a/gnu/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/gnu/llvm/lib/MC/MCMachOStreamer.cpp
@@ -142,7 +142,8 @@ static bool canGoAfterDWARF(const MCSectionMachO &MSec) {
if (SegName == "__TEXT" && SecName == "__eh_frame")
return true;
- if (SegName == "__DATA" && SecName == "__nl_symbol_ptr")
+ if (SegName == "__DATA" && (SecName == "__nl_symbol_ptr" ||
+ SecName == "__thread_ptr"))
return true;
return false;
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64.td b/gnu/llvm/lib/Target/AArch64/AArch64.td
index 740766b151b..91c335fac32 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64.td
+++ b/gnu/llvm/lib/Target/AArch64/AArch64.td
@@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
"Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
-def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
- "AvoidQuadLdStPairs", "true",
- "Do not form quad load/store pair operations">;
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+ "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
@@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M1 processors",
- [FeatureAvoidQuadLdStPairs,
+ [FeatureSlowPaired128,
FeatureCRC,
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
@@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M2/M3 processors",
- [FeatureAvoidQuadLdStPairs,
+ [FeatureSlowPaired128,
FeatureCRC,
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 9058617768d..938779d2369 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -91,7 +91,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
// their lanes are in a consistent order.
@@ -138,8 +138,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
- // A SwiftError is passed in X19.
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>,
+ // A SwiftError is passed in X21.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
@@ -289,7 +289,7 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
def CSR_AArch64_AAPCS_SwiftError
- : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>;
+ : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5c8acba26aa..4c789926e3e 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
return false;
// On some CPUs quad load/store pairs are slower than two single load/stores.
- if (Subtarget.avoidQuadLdStPairs()) {
+ if (Subtarget.isPaired128Slow()) {
switch (MI.getOpcode()) {
default:
break;
diff --git a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8a76c42b589..8e312dcf276 100644
--- a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -687,9 +687,30 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineInstrBuilder MIB;
DebugLoc DL = I->getDebugLoc();
MachineBasicBlock *MBB = I->getParent();
+ MachineOperand RegOp0 = getLdStRegOp(*RtMI);
+ MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
+ // Kill flags may become invalid when moving stores for pairing.
+ if (RegOp0.isUse()) {
+ if (!MergeForward) {
+ // Clear kill flags on store if moving upwards. Example:
+ // STRWui %w0, ...
+ // USE %w1
+ // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
+ RegOp0.setIsKill(false);
+ RegOp1.setIsKill(false);
+ } else {
+ // Clear kill flags of the first stores register. Example:
+ // STRWui %w1, ...
+ // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
+ // STRW %w0
+ unsigned Reg = getLdStRegOp(*I).getReg();
+ for (MachineInstr &MI : make_range(std::next(I), Paired))
+ MI.clearRegisterKills(Reg, TRI);
+ }
+ }
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
- .addOperand(getLdStRegOp(*RtMI))
- .addOperand(getLdStRegOp(*Rt2MI))
+ .addOperand(RegOp0)
+ .addOperand(RegOp1)
.addOperand(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
@@ -832,9 +853,11 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
.addImm(Imms);
}
}
- StoreI->clearRegisterKills(StRt, TRI);
- (void)BitExtMI;
+ // Clear kill flags between store and load.
+ for (MachineInstr &MI : make_range(StoreI->getIterator(),
+ BitExtMI->getIterator()))
+ MI.clearRegisterKills(StRt, TRI);
DEBUG(dbgs() << "Promoting load by replacing :\n ");
DEBUG(StoreI->print(dbgs()));
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0b2badff7cc..13022009af1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
"Enable SI Machine Scheduler"
>;
+// Unless +-flat-for-global is specified, turn on FlatForGlobal for
+// all OS-es on VI and newer hardware to avoid assertion failures due
+// to missing ADDR64 variants of MUBUF instructions.
+// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
+// instructions.
+
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal",
"true",
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 4acd55eb612..974e79fff3d 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
- if (STM.isAmdCodeObjectV2()) {
+ if (STM.isAmdCodeObjectV2(*MF)) {
getSIProgramInfo(KernelInfo, *MF);
EmitAmdKernelCodeT(*MF, KernelInfo);
}
@@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
- if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
+ if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
SmallString<128> SymbolName;
@@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
// FIXME: Should use getKernArgSize
header.kernarg_segment_byte_size =
- STM.getKernArgSegmentSize(MFI->getABIArgOffset());
+ STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2b4fc5397b1..5bf347e4865 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
- // omod
- SDValue Ops[8];
-
- SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
- SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
- SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
- CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e48c1943cb0..54caa2c5dfa 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDLoc SL(N);
switch (Opc) {
case ISD::FADD: {
+ if (!mayIgnoreSignedZero(N0))
+ return SDValue();
+
// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
SDValue LHS = N0.getOperand(0);
SDValue RHS = N0.getOperand(1);
@@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
}
case ISD::FMA:
case ISD::FMAD: {
+ if (!mayIgnoreSignedZero(N0))
+ return SDValue();
+
// (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
SDValue LHS = N0.getOperand(0);
SDValue MHS = N0.getOperand(1);
@@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(KILL)
+ NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(SENDMSGHALT)
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 69567aa5f71..f6adceac6f1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -119,6 +119,16 @@ protected:
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
+ bool mayIgnoreSignedZero(SDValue Op) const {
+ if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+ return true;
+
+ if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
+ return BO->Flags.hasNoSignedZeros();
+
+ return false;
+ }
+
bool isFAbsFree(EVT VT) const override;
bool isFNegFree(EVT VT) const override;
bool isTruncateFree(EVT Src, EVT Dest) const override;
@@ -320,6 +330,7 @@ enum NodeType : unsigned {
INTERP_P2,
PC_ADD_REL_OFFSET,
KILL,
+ DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index f079c8d0c70..d7fa28bdc00 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 74851aedbb2..c35a67de1d7 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
ParseSubtargetFeatures(GPU, FullFS);
+ // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+ // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+ // variants of MUBUF instructions.
+ if (!hasAddr64() && !FS.contains("flat-for-global")) {
+ FlatForGlobal = true;
+ }
+
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
@@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
-unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
- unsigned ImplicitBytes = getImplicitArgNumBytes();
+unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplicitArgBytes) const {
+ unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
if (ImplicitBytes == 0)
return ExplicitArgBytes;
diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 51ba501bddd..0e3cb7dc1f8 100644
--- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -311,22 +311,31 @@ public:
return EnableXNACK;
}
- bool isAmdCodeObjectV2() const {
- return isAmdHsaOS() || isMesa3DOS();
+ bool isMesaKernel(const MachineFunction &MF) const {
+ return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ }
+
+ // Covers VS/PS/CS graphics shaders
+ bool isMesaGfxShader(const MachineFunction &MF) const {
+ return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ }
+
+ bool isAmdCodeObjectV2(const MachineFunction &MF) const {
+ return isAmdHsaOS() || isMesaKernel(MF);
}
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
- unsigned getExplicitKernelArgOffset() const {
- return isAmdCodeObjectV2() ? 0 : 36;
+ unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+ return isAmdCodeObjectV2(MF) ? 0 : 36;
}
unsigned getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? 8 : 4;
}
- unsigned getImplicitArgNumBytes() const {
- if (isMesa3DOS())
+ unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
+ if (isMesaKernel(MF))
return 16;
if (isAmdHsaOS() && isOpenCLEnv())
return 32;
@@ -585,7 +594,7 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
- unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
+ unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index de7ce5cb9e4..77fee4356b6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
llvm_unreachable("Unsupported private trunc store");
}
- SDValue Chain = Store->getChain();
+ SDValue OldChain = Store->getChain();
+ bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
+ // Skip dummy
+ SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
SDValue BasePtr = Store->getBasePtr();
SDValue Offset = Store->getOffset();
EVT MemVT = Store->getMemoryVT();
@@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
// Store dword
// TODO: Can we be smarter about MachinePointerInfo?
- return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+ SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+
+ // If we are part of expanded vector, make our neighbors depend on this store
+ if (VectorTrunc) {
+ // Make all other vector elements depend on this store
+ Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
+ }
+ return NewStore;
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Neither LOCAL nor PRIVATE can do vectors at the moment
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
+ if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+ // Add an extra level of chain to isolate this vector
+ SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
+ // TODO: can the chain be replaced without creating a new store?
+ SDValue NewStore = DAG.getTruncStore(
+ NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
+ MemVT, StoreNode->getAlignment(),
+ StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
+ StoreNode = cast<StoreSDNode>(NewStore);
+ }
+
return scalarizeVectorStore(StoreNode, DAG);
}
@@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Put the mask in correct place
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
- // Put the mask in correct place
+ // Put the value bits in correct place
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
@@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
- unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
+ unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(
diff --git a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
index 19795bdde64..9210e66b0fe 100644
--- a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
def MOV : R600_1OP <0x19, "MOV", []>;
+
+// This is a hack to get rid of DUMMY_CHAIN nodes.
+// Most DUMMY_CHAINs should be eliminated during legalization, but undef
+// values can sneak in some to selection.
+let isPseudo = 1, isCodeGenOnly = 1 in {
+def DUMMY_CHAIN : AMDGPUInst <
+ (outs),
+ (ins),
+ "DUMMY_CHAIN",
+ [(R600dummy_chain)]
+>;
+} // end let isPseudo = 1, isCodeGenOnly = 1
+
+
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d0a69eafc58..0b571551588 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
}
@@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
- assert(ST.isAmdCodeObjectV2());
+ assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
MRI.addLiveIn(PreloadedPrivateBufferReg);
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
@@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
bool CopyBuffer = ResourceRegUsed &&
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
+ ST.isAmdCodeObjectV2(MF) &&
ScratchRsrcReg != PreloadedPrivateBufferReg;
// This needs to be careful of the copying order to avoid overwriting one of
@@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
- assert(!ST.isAmdCodeObjectV2());
+ if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
+ assert(!ST.isAmdCodeObjectV2(MF));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
- BuildMI(MBB, I, DL, SMovB32, Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
- BuildMI(MBB, I, DL, SMovB32, Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ if (MFI->hasPrivateMemoryInputPtr()) {
+ unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+
+ if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
+ const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
+
+ BuildMI(MBB, I, DL, Mov64, Rsrc01)
+ .addReg(PreloadedPrivateBufferReg)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ } else {
+ const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
+
+ PointerType *PtrTy =
+ PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+ AMDGPUAS::CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ auto MMO = MF.getMachineMemOperand(PtrInfo,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable,
+ 0, 0);
+ BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
+ .addReg(PreloadedPrivateBufferReg)
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addMemOperand(MMO)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ }
+ } else {
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ }
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
.addImm(Rsrc23 & 0xffffffff)
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9140fe6cd14..b98f9f400ee 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (!AMDGPU::isShader(CallConv)) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
- assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
+ assert(!Info->hasDispatchPtr() &&
!Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
@@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments(
!Info->hasWorkItemIDZ());
}
+ if (Info->hasPrivateMemoryInputPtr()) {
+ unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
+ MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+ CCInfo.AllocateReg(PrivateMemoryPtrReg);
+ }
+
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info->hasPrivateSegmentBuffer()) {
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
@@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
- const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+ const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
@@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
HasStackObjects = true;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// TODO: Should this propagate fast-math-flags?
switch (IntrinsicID) {
+ case Intrinsic::amdgcn_implicit_buffer_ptr: {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+ }
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
- if (!Subtarget->isAmdCodeObjectV2()) {
+ if (!Subtarget->isAmdCodeObjectV2(MF)) {
DiagnosticInfoUnsupported BadIntrin(
*MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc());
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
index b86c0419118..38e31e75ee6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -997,6 +997,11 @@ def : Pat <
>;
def : Pat <
+ (i1 (trunc i16:$a)),
+ (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
+>;
+
+def : Pat <
(i1 (trunc i64:$a)),
(V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e911817c451..ecd46b95ca6 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateSegmentWaveByteOffset(false),
WorkItemIDX(false),
WorkItemIDY(false),
- WorkItemIDZ(false) {
+ WorkItemIDZ(false),
+ PrivateMemoryInputPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
@@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (HasStackObjects || MaySpill)
PrivateSegmentWaveByteOffset = true;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F->hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
+ } else if (ST.isMesaGfxShader(MF)) {
+ if (HasStackObjects || MaySpill)
+ PrivateMemoryInputPtr = true;
}
// We don't need to worry about accessing spills with flat instructions.
@@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return FlatScratchInitUserSGPR;
}
+unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
+ PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return PrivateMemoryPtrUserSGPR;
+}
+
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
MachineFunction *MF,
unsigned FrameIndex,
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 3b4e233cd78..6fc8d18bceb 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned ScratchRSrcReg;
unsigned ScratchWaveOffsetReg;
+ // Input registers for non-HSA ABI
+ unsigned PrivateMemoryPtrUserSGPR;
+
// Input registers setup for the HSA ABI.
// User SGPRs in allocation order.
unsigned PrivateSegmentBufferUserSGPR;
@@ -163,6 +166,11 @@ private:
bool WorkItemIDY : 1;
bool WorkItemIDZ : 1;
+ // Private memory buffer
+ // Compute directly in sgpr[0:1]
+ // Other shaders indirect 64-bits at sgpr[0:1]
+ bool PrivateMemoryInputPtr : 1;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -198,6 +206,7 @@ public:
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
unsigned addDispatchID(const SIRegisterInfo &TRI);
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
+ unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
unsigned addWorkGroupIDX() {
@@ -302,6 +311,10 @@ public:
return WorkItemIDZ;
}
+ bool hasPrivateMemoryInputPtr() const {
+ return PrivateMemoryInputPtr;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -338,6 +351,10 @@ public:
return QueuePtrUserSGPR;
}
+ unsigned getPrivateMemoryPtrUserSGPR() const {
+ return PrivateMemoryPtrUserSGPR;
+ }
+
bool hasSpilledSGPRs() const {
return HasSpilledSGPRs;
}
diff --git a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8c4b24a4504..a1ed5e8441d 100644
--- a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
- assert(ST.isAmdCodeObjectV2() &&
- "Non-CodeObjectV2 ABI currently uses relocations");
- assert(MFI->hasPrivateSegmentBuffer());
- return MFI->PrivateSegmentBufferUserSGPR;
+ if (ST.isAmdCodeObjectV2(MF)) {
+ assert(MFI->hasPrivateSegmentBuffer());
+ return MFI->PrivateSegmentBufferUserSGPR;
+ }
+ assert(MFI->hasPrivateMemoryInputPtr());
+ return MFI->PrivateMemoryPtrUserSGPR;
case SIRegisterInfo::KERNARG_SEGMENT_PTR:
assert(MFI->hasKernargSegmentPtr());
return MFI->KernargSegmentPtrUserSGPR;
diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a15b9ceff2f..8cae83cd9d1 100644
--- a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -607,12 +607,6 @@ def : Pat<
(COPY $src)
>;
-def : Pat<
- (i1 (trunc i16:$src)),
- (COPY $src)
->;
-
-
def : Pat <
(i16 (trunc i64:$src)),
(EXTRACT_SUBREG $src, sub0)
diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 5efa64d25ce..c2a4d4ba99b 100644
--- a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
}
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+ // v_div_scale_{f32|f64} do not support input modifiers.
+ let HasModifiers = 0;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
- let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
+ let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
}
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
let hasExtraSrcRegAllocReq = 1;
+ let AsmMatchConverter = "";
}
// Double precision division pre-scale.
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
let SchedRW = [WriteDouble, WriteSALU];
let hasExtraSrcRegAllocReq = 1;
+ let AsmMatchConverter = "";
}
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
diff --git a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 8ec9cb02813..95db35ce8ff 100644
--- a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
+ // Emit the XRay table for this function.
+ emitXRayTable();
+
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.
diff --git a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
index 9c278a52a7f..7a7b7fede7c 100644
--- a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -26,8 +26,8 @@ def CC_ARM_APCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -51,8 +51,8 @@ def RetCC_ARM_APCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -166,8 +166,8 @@ def CC_ARM_AAPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -182,8 +182,8 @@ def RetCC_ARM_AAPCS : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -206,8 +206,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is passed in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is passed in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// HFAs are passed in a contiguous block of registers, or on the stack
CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
@@ -227,8 +227,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
- // A SwiftError is returned in R6.
- CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>,
+ // A SwiftError is returned in R8.
+ CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -267,8 +267,8 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
-// R6 is used to pass swifterror, remove it from CSR.
-def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>;
+// R8 is used to pass swifterror, remove it from CSR.
+def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
diff --git a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 95fcc8dcb45..baa4e0330cf 100644
--- a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1225,16 +1225,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case ARM::tTPsoft:
case ARM::TPsoft: {
+ const bool Thumb = Opcode == ARM::tTPsoft;
+
MachineInstrBuilder MIB;
- if (Opcode == ARM::tTPsoft)
+ if (STI->genLongCalls()) {
+ MachineFunction *MF = MBB.getParent();
+ MachineConstantPool *MCP = MF->getConstantPool();
+ unsigned PCLabelID = AFI->createPICLabelUId();
+ MachineConstantPoolValue *CPV =
+ ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(),
+ "__aeabi_read_tp", PCLabelID, 0);
+ unsigned Reg = MI.getOperand(0).getReg();
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get( ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__aeabi_read_tp", 0);
- else
+ TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
+ .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
+ if (!Thumb)
+ MIB.addImm(0);
+ MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get( ARM::BL))
- .addExternalSymbol("__aeabi_read_tp", 0);
+ TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
+ if (Thumb)
+ MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.addReg(Reg, RegState::Kill);
+ } else {
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Thumb ? ARM::tBL : ARM::BL));
+ if (Thumb)
+ MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.addExternalSymbol("__aeabi_read_tp", 0);
+ }
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 32b7c87e61b..0f84a235916 100644
--- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -97,171 +97,6 @@ namespace {
};
}
-void ARMTargetLowering::InitLibcallCallingConvs() {
- // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or
- // AAPCS_VFP.
- for (const auto LC : {
- RTLIB::SHL_I16,
- RTLIB::SHL_I32,
- RTLIB::SHL_I64,
- RTLIB::SHL_I128,
- RTLIB::SRL_I16,
- RTLIB::SRL_I32,
- RTLIB::SRL_I64,
- RTLIB::SRL_I128,
- RTLIB::SRA_I16,
- RTLIB::SRA_I32,
- RTLIB::SRA_I64,
- RTLIB::SRA_I128,
- RTLIB::MUL_I8,
- RTLIB::MUL_I16,
- RTLIB::MUL_I32,
- RTLIB::MUL_I64,
- RTLIB::MUL_I128,
- RTLIB::MULO_I32,
- RTLIB::MULO_I64,
- RTLIB::MULO_I128,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16,
- RTLIB::SDIV_I32,
- RTLIB::SDIV_I64,
- RTLIB::SDIV_I128,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16,
- RTLIB::UDIV_I32,
- RTLIB::UDIV_I64,
- RTLIB::UDIV_I128,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16,
- RTLIB::SREM_I32,
- RTLIB::SREM_I64,
- RTLIB::SREM_I128,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16,
- RTLIB::UREM_I32,
- RTLIB::UREM_I64,
- RTLIB::UREM_I128,
- RTLIB::SDIVREM_I8,
- RTLIB::SDIVREM_I16,
- RTLIB::SDIVREM_I32,
- RTLIB::SDIVREM_I64,
- RTLIB::SDIVREM_I128,
- RTLIB::UDIVREM_I8,
- RTLIB::UDIVREM_I16,
- RTLIB::UDIVREM_I32,
- RTLIB::UDIVREM_I64,
- RTLIB::UDIVREM_I128,
- RTLIB::NEG_I32,
- RTLIB::NEG_I64,
- RTLIB::ADD_F32,
- RTLIB::ADD_F64,
- RTLIB::ADD_F80,
- RTLIB::ADD_F128,
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::MUL_F32,
- RTLIB::MUL_F64,
- RTLIB::MUL_F80,
- RTLIB::MUL_F128,
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_F128,
- RTLIB::POWI_F32,
- RTLIB::POWI_F64,
- RTLIB::POWI_F80,
- RTLIB::POWI_F128,
- RTLIB::FPEXT_F64_F128,
- RTLIB::FPEXT_F32_F128,
- RTLIB::FPEXT_F32_F64,
- RTLIB::FPEXT_F16_F32,
- RTLIB::FPROUND_F32_F16,
- RTLIB::FPROUND_F64_F16,
- RTLIB::FPROUND_F80_F16,
- RTLIB::FPROUND_F128_F16,
- RTLIB::FPROUND_F64_F32,
- RTLIB::FPROUND_F80_F32,
- RTLIB::FPROUND_F128_F32,
- RTLIB::FPROUND_F80_F64,
- RTLIB::FPROUND_F128_F64,
- RTLIB::FPTOSINT_F32_I32,
- RTLIB::FPTOSINT_F32_I64,
- RTLIB::FPTOSINT_F32_I128,
- RTLIB::FPTOSINT_F64_I32,
- RTLIB::FPTOSINT_F64_I64,
- RTLIB::FPTOSINT_F64_I128,
- RTLIB::FPTOSINT_F80_I32,
- RTLIB::FPTOSINT_F80_I64,
- RTLIB::FPTOSINT_F80_I128,
- RTLIB::FPTOSINT_F128_I32,
- RTLIB::FPTOSINT_F128_I64,
- RTLIB::FPTOSINT_F128_I128,
- RTLIB::FPTOUINT_F32_I32,
- RTLIB::FPTOUINT_F32_I64,
- RTLIB::FPTOUINT_F32_I128,
- RTLIB::FPTOUINT_F64_I32,
- RTLIB::FPTOUINT_F64_I64,
- RTLIB::FPTOUINT_F64_I128,
- RTLIB::FPTOUINT_F80_I32,
- RTLIB::FPTOUINT_F80_I64,
- RTLIB::FPTOUINT_F80_I128,
- RTLIB::FPTOUINT_F128_I32,
- RTLIB::FPTOUINT_F128_I64,
- RTLIB::FPTOUINT_F128_I128,
- RTLIB::SINTTOFP_I32_F32,
- RTLIB::SINTTOFP_I32_F64,
- RTLIB::SINTTOFP_I32_F80,
- RTLIB::SINTTOFP_I32_F128,
- RTLIB::SINTTOFP_I64_F32,
- RTLIB::SINTTOFP_I64_F64,
- RTLIB::SINTTOFP_I64_F80,
- RTLIB::SINTTOFP_I64_F128,
- RTLIB::SINTTOFP_I128_F32,
- RTLIB::SINTTOFP_I128_F64,
- RTLIB::SINTTOFP_I128_F80,
- RTLIB::SINTTOFP_I128_F128,
- RTLIB::UINTTOFP_I32_F32,
- RTLIB::UINTTOFP_I32_F64,
- RTLIB::UINTTOFP_I32_F80,
- RTLIB::UINTTOFP_I32_F128,
- RTLIB::UINTTOFP_I64_F32,
- RTLIB::UINTTOFP_I64_F64,
- RTLIB::UINTTOFP_I64_F80,
- RTLIB::UINTTOFP_I64_F128,
- RTLIB::UINTTOFP_I128_F32,
- RTLIB::UINTTOFP_I128_F64,
- RTLIB::UINTTOFP_I128_F80,
- RTLIB::UINTTOFP_I128_F128,
- RTLIB::OEQ_F32,
- RTLIB::OEQ_F64,
- RTLIB::OEQ_F128,
- RTLIB::UNE_F32,
- RTLIB::UNE_F64,
- RTLIB::UNE_F128,
- RTLIB::OGE_F32,
- RTLIB::OGE_F64,
- RTLIB::OGE_F128,
- RTLIB::OLT_F32,
- RTLIB::OLT_F64,
- RTLIB::OLT_F128,
- RTLIB::OLE_F32,
- RTLIB::OLE_F64,
- RTLIB::OLE_F128,
- RTLIB::OGT_F32,
- RTLIB::OGT_F64,
- RTLIB::OGT_F128,
- RTLIB::UO_F32,
- RTLIB::UO_F64,
- RTLIB::UO_F128,
- RTLIB::O_F32,
- RTLIB::O_F64,
- RTLIB::O_F128,
- })
- setLibcallCallingConv(LC, CallingConv::ARM_AAPCS);
-}
-
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -349,7 +184,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- InitLibcallCallingConvs();
+ if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
+ !Subtarget->isTargetWatchOS()) {
+ const auto &E = Subtarget->getTargetTriple().getEnvironment();
+
+ bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
+ E == Triple::MuslEABIHF;
+ // Windows is a special case. Technically, we will replace all of the "GNU"
+ // calls with calls to MSVCRT if appropriate and adjust the calling
+ // convention then.
+ IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
+
+ for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
+ setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
+ IsHFTarget ? CallingConv::ARM_AAPCS_VFP
+ : CallingConv::ARM_AAPCS);
+ }
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
@@ -1937,7 +1787,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
- if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
+ Outs[0].VT == MVT::i32) {
assert(VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
@@ -3176,17 +3027,20 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
}
+static bool isReadOnly(const GlobalValue *GV) {
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->getBaseObject();
+ return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
+ isa<Function>(GV);
+}
+
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
const TargetMachine &TM = getTargetMachine();
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->getBaseObject();
- bool IsRO =
- (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
- isa<Function>(GV);
+ bool IsRO = isReadOnly(GV);
// promoteToConstantPool only if not generating XO text section
if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
@@ -7721,11 +7575,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SDIV:
- if (Subtarget->isTargetWindows())
+ if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ true);
return LowerSDIV(Op, DAG);
case ISD::UDIV:
- if (Subtarget->isTargetWindows())
+ if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
return LowerUDIV(Op, DAG);
case ISD::ADDC:
diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
index 7a7f91f4d3c..84c6eb845bb 100644
--- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -538,8 +538,6 @@ class InstrItineraryData;
bool HasStandaloneRem = true;
- void InitLibcallCallingConvs();
-
void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
diff --git a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 97ca11ca501..d054578deb6 100644
--- a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -413,6 +413,7 @@ public:
Match_RequiresDifferentOperands,
Match_RequiresNoZeroRegister,
Match_RequiresSameSrcAndDst,
+ Match_NoFCCRegisterForCurrentISA,
Match_NonZeroOperandForSync,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "MipsGenAsmMatcher.inc"
@@ -1461,8 +1462,6 @@ public:
bool isFCCAsmReg() const {
if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
return false;
- if (!AsmParser.hasEightFccRegisters())
- return RegIdx.Index == 0;
return RegIdx.Index <= 7;
}
bool isACCAsmReg() const {
@@ -4053,6 +4052,7 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
return Match_RequiresSameSrcAndDst;
}
}
+
unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
switch (Inst.getOpcode()) {
// As described by the MIPSR6 spec, daui must not use the zero operand for
@@ -4131,9 +4131,15 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
return Match_RequiresDifferentOperands;
return Match_Success;
- default:
- return Match_Success;
}
+
+ uint64_t TSFlags = getInstDesc(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & MipsII::HasFCCRegOperand) &&
+ (Inst.getOperand(0).getReg() != Mips::FCC0) && !hasEightFccRegisters())
+ return Match_NoFCCRegisterForCurrentISA;
+
+ return Match_Success;
+
}
static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
@@ -4191,6 +4197,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "invalid operand ($zero) for instruction");
case Match_RequiresSameSrcAndDst:
return Error(IDLoc, "source and destination must match");
+ case Match_NoFCCRegisterForCurrentISA:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "non-zero fcc register doesn't exist in current ISA level");
case Match_Immz:
return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
case Match_UImm1_0:
diff --git a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 35de7b27bf1..a90db2384c4 100644
--- a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -123,7 +123,9 @@ namespace MipsII {
HasForbiddenSlot = 1 << 5,
/// IsPCRelativeLoad - A Load instruction with implicit source register
/// ($pc) with explicit offset and destination register
- IsPCRelativeLoad = 1 << 6
+ IsPCRelativeLoad = 1 << 6,
+ /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
+ HasFCCRegOperand = 1 << 7
};
}
diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index fc83761e409..5600f71ff68 100644
--- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -27,9 +27,20 @@ def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6;
def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
- CEQS_FM_MM<0>;
+ CEQS_FM_MM<0> {
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+}
+
def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
- CEQS_FM_MM<1>;
+ CEQS_FM_MM<1> {
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+}
def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
@@ -164,6 +175,98 @@ let AdditionalPredicates = [InMicroMips] in {
def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>,
LW_FM_MM<0x26>;
}
+
+ multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt,
+ InstrItinClass itin> {
+ def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 0> {
+ let BaseOpcode = "c.f."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 1> {
+ let BaseOpcode = "c.un."#NAME;
+ let isCommutable = 1;
+ }
+ def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 2> {
+ let BaseOpcode = "c.eq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 3> {
+ let BaseOpcode = "c.ueq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 4> {
+ let BaseOpcode = "c.olt."#NAME;
+ }
+ def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 5> {
+ let BaseOpcode = "c.ult."#NAME;
+ }
+ def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 6> {
+ let BaseOpcode = "c.ole."#NAME;
+ }
+ def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 7> {
+ let BaseOpcode = "c.ule."#NAME;
+ }
+ def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 8> {
+ let BaseOpcode = "c.sf."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 9> {
+ let BaseOpcode = "c.ngle."#NAME;
+ }
+ def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 10> {
+ let BaseOpcode = "c.seq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 11> {
+ let BaseOpcode = "c.ngl."#NAME;
+ }
+ def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 12> {
+ let BaseOpcode = "c.lt."#NAME;
+ }
+ def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 13> {
+ let BaseOpcode = "c.nge."#NAME;
+ }
+ def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 14> {
+ let BaseOpcode = "c.le."#NAME;
+ }
+ def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+ C_COND_FM_MM<fmt, 15> {
+ let BaseOpcode = "c.ngt."#NAME;
+ }
+ }
+
+ defm S : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>,
+ ISA_MIPS1_NOT_32R6_64R6;
+ defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ let DecoderNamespace = "Mips64" in
+ defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+ defm S_MM : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6;
+ defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+ defm : BC1_ALIASES<BC1T_MM, "bc1t", BC1F_MM, "bc1f">,
+ ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
}
//===----------------------------------------------------------------------===//
diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index 8b595f9e6c4..774976828a0 100644
--- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -766,6 +766,7 @@ class SWXC1_FM_MM<bits<9> funct> : MMArch {
class CEQS_FM_MM<bits<2> fmt> : MMArch {
bits<5> fs;
bits<5> ft;
+ bits<3> fcc;
bits<4> cond;
bits<32> Inst;
@@ -773,13 +774,17 @@ class CEQS_FM_MM<bits<2> fmt> : MMArch {
let Inst{31-26} = 0x15;
let Inst{25-21} = ft;
let Inst{20-16} = fs;
- let Inst{15-13} = 0x0; // cc
+ let Inst{15-13} = fcc;
let Inst{12} = 0;
let Inst{11-10} = fmt;
let Inst{9-6} = cond;
let Inst{5-0} = 0x3c;
}
+class C_COND_FM_MM<bits <2> fmt, bits<4> c> : CEQS_FM_MM<fmt> {
+ let cond = c;
+}
+
class BC1F_FM_MM<bits<5> tf> : MMArch {
bits<16> offset;
diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 179695bc698..04d6529a073 100644
--- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1037,6 +1037,22 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// TODO: implement
}
+// Emit .dtprelword or .dtpreldword directive
+// and value for debug thread local expression.
+void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value,
+ unsigned Size) const {
+ switch (Size) {
+ case 4:
+ OutStreamer->EmitDTPRel32Value(Value);
+ break;
+ case 8:
+ OutStreamer->EmitDTPRel64Value(Value);
+ break;
+ default:
+ llvm_unreachable("Unexpected size of expression value.");
+ }
+}
+
// Align all targets of indirect branches on bundle size. Used only if target
// is NaCl.
void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
index 259e557e128..c5cf5241c23 100644
--- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -140,6 +140,7 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+ void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
};
}
diff --git a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
index 29f3e2c07e0..a44192f57aa 100644
--- a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -698,8 +698,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
- emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg(
- Mips::FCC0, RegState::ImplicitDefine);
+ emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg)
+ .addReg(RightReg);
emitInst(CondMovOpc, ResultReg)
.addReg(RegWithOne)
.addReg(Mips::FCC0)
diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
index ab7aa9dcdca..df42d56d041 100644
--- a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -219,6 +219,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
let isTerminator = 1;
let hasDelaySlot = DelaySlot;
let Defs = [AT];
+ let hasFCCRegOperand = 1;
}
class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
@@ -229,41 +230,106 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
!strconcat("c.$cond.", typestr)>, HARDFLOAT {
let Defs = [FCC0];
let isCodeGenOnly = 1;
+ let hasFCCRegOperand = 1;
}
+
+// Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather
+// duplicating the instruction definition for MIPS1 - MIPS3, we expand
+// c.cond.ft if necessary, and reject it after constructing the
+// instruction if the ISA doesn't support it.
class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC,
InstrItinClass itin> :
- InstSE<(outs), (ins RC:$fs, RC:$ft),
- !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin,
- FrmFR>, HARDFLOAT;
+ InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft),
+ !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin,
+ FrmFR>, HARDFLOAT {
+ let isCompare = 1;
+ let hasFCCRegOperand = 1;
+}
+
multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
InstrItinClass itin> {
- def C_F_#NAME : C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM<fmt, 0>;
- def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM<fmt, 1>;
- def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM<fmt, 2>;
- def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM<fmt, 3>;
- def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM<fmt, 4>;
- def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM<fmt, 5>;
- def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM<fmt, 6>;
- def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM<fmt, 7>;
- def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM<fmt, 8>;
- def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM<fmt, 9>;
- def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM<fmt, 10>;
- def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM<fmt, 11>;
- def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM<fmt, 12>;
- def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM<fmt, 13>;
- def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM<fmt, 14>;
- def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>;
+ def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 0> {
+ let BaseOpcode = "c.f."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 1> {
+ let BaseOpcode = "c.un."#NAME;
+ let isCommutable = 1;
+ }
+ def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 2> {
+ let BaseOpcode = "c.eq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 3> {
+ let BaseOpcode = "c.ueq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 4> {
+ let BaseOpcode = "c.olt."#NAME;
+ }
+ def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 5> {
+ let BaseOpcode = "c.ult."#NAME;
+ }
+ def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 6> {
+ let BaseOpcode = "c.ole."#NAME;
+ }
+ def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 7> {
+ let BaseOpcode = "c.ule."#NAME;
+ }
+ def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 8> {
+ let BaseOpcode = "c.sf."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 9> {
+ let BaseOpcode = "c.ngle."#NAME;
+ }
+ def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 10> {
+ let BaseOpcode = "c.seq."#NAME;
+ let isCommutable = 1;
+ }
+ def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 11> {
+ let BaseOpcode = "c.ngl."#NAME;
+ }
+ def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 12> {
+ let BaseOpcode = "c.lt."#NAME;
+ }
+ def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 13> {
+ let BaseOpcode = "c.nge."#NAME;
+ }
+ def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 14> {
+ let BaseOpcode = "c.le."#NAME;
+ }
+ def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>,
+ C_COND_FM<fmt, 15> {
+ let BaseOpcode = "c.ngt."#NAME;
+ }
}
+let AdditionalPredicates = [NotInMicroMips] in {
defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
FGR_32;
let DecoderNamespace = "Mips64" in
defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
FGR_64;
-
+}
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
@@ -549,13 +615,29 @@ def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
/// Floating Point Compare
let AdditionalPredicates = [NotInMicroMips] in {
def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
- ISA_MIPS1_NOT_32R6_64R6;
+ ISA_MIPS1_NOT_32R6_64R6 {
+
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+ }
def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32 {
+ // FIXME: This is a required to work around the fact that these instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+ }
}
let DecoderNamespace = "Mips64" in
def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64 {
+ // FIXME: This is a required to work around the fact that thiese instructions
+ // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the
+ // fcc register set is used directly.
+ bits<3> fcc = 0;
+}
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
@@ -602,15 +684,6 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd),
//===----------------------------------------------------------------------===//
// InstAliases.
//===----------------------------------------------------------------------===//
-def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
- ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
- ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
- ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT;
-def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
- ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
-
def : MipsInstAlias
<"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
ISA_MIPS2, HARDFLOAT;
@@ -630,6 +703,80 @@ def : MipsInstAlias
def : MipsInstAlias
<"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
FGR_64, ISA_MIPS2, HARDFLOAT;
+
+multiclass C_COND_ALIASES<string TypeStr, RegisterOperand RC> {
+ def : MipsInstAlias<!strconcat("c.f.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_F_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.un.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_UN_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.eq.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_EQ_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ueq.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_UEQ_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.olt.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_OLT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ult.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_ULT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ole.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_OLE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ule.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_ULE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.sf.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_SF_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ngle.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGLE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.seq.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_SEQ_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ngl.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGL_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.lt.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_LT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.nge.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.le.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_LE_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+ def : MipsInstAlias<!strconcat("c.ngt.", TypeStr, " $fs, $ft"),
+ (!cast<Instruction>("C_NGT_"#NAME) FCC0,
+ RC:$fs, RC:$ft), 1>;
+}
+
+multiclass BC1_ALIASES<Instruction BCTrue, string BCTrueString,
+ Instruction BCFalse, string BCFalseString> {
+ def : MipsInstAlias<!strconcat(BCTrueString, " $offset"),
+ (BCTrue FCC0, brtarget:$offset), 1>;
+
+ def : MipsInstAlias<!strconcat(BCFalseString, " $offset"),
+ (BCFalse FCC0, brtarget:$offset), 1>;
+}
+
+let AdditionalPredicates = [NotInMicroMips] in {
+ defm S : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6;
+ defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_32;
+ defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT,
+ ISA_MIPS1_NOT_32R6_64R6, FGR_64;
+
+ defm : BC1_ALIASES<BC1T, "bc1t", BC1F, "bc1f">, ISA_MIPS1_NOT_32R6_64R6,
+ HARDFLOAT;
+ defm : BC1_ALIASES<BC1TL, "bc1tl", BC1FL, "bc1fl">, ISA_MIPS2_NOT_32R6_64R6,
+ HARDFLOAT;
+}
//===----------------------------------------------------------------------===//
// Floating Point Patterns
//===----------------------------------------------------------------------===//
diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
index 1437fb75434..817d9b44b9c 100644
--- a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -101,12 +101,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
// ($pc) and with explicit offset and destination
// register
+ bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is
+ // present in MIPS-I to MIPS-III.
- // TSFlags layout should be kept in sync with MipsInstrInfo.h.
+ // TSFlags layout should be kept in sync with MCTargetDesc/MipsBaseInfo.h.
let TSFlags{3-0} = FormBits;
let TSFlags{4} = isCTI;
let TSFlags{5} = hasForbiddenSlot;
let TSFlags{6} = IsPCRelativeLoad;
+ let TSFlags{7} = hasFCCRegOperand;
let DecoderNamespace = "Mips";
@@ -829,6 +832,7 @@ class BC1F_FM<bit nd, bit tf> : StdArch {
class CEQS_FM<bits<5> fmt> : StdArch {
bits<5> fs;
bits<5> ft;
+ bits<3> fcc;
bits<4> cond;
bits<32> Inst;
@@ -837,7 +841,7 @@ class CEQS_FM<bits<5> fmt> : StdArch {
let Inst{25-21} = fmt;
let Inst{20-16} = ft;
let Inst{15-11} = fs;
- let Inst{10-8} = 0; // cc
+ let Inst{10-8} = fcc;
let Inst{7-4} = 0x3;
let Inst{3-0} = cond;
}
diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index fadab780612..c5d6a05d661 100644
--- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -148,3 +148,11 @@ MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL,
// Otherwise, we work the same as ELF.
return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
}
+
+const MCExpr *
+MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ return MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(0x8000, getContext()), getContext());
+}
diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index e5423f9578a..a37ec154ff7 100644
--- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -42,6 +42,8 @@ class MipsTargetMachine;
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
unsigned &Align) const override;
+ /// Describe a TLS variable address within debug info.
+ const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
} // end namespace llvm
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 90111bbea07..f615cc7cc97 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1508,8 +1508,14 @@ def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
PPC970_DGroup_Single;
} // hasSideEffects = 0
+def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src),
+ "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>;
+def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src),
+ "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
"icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src),
+ "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
def : Pat<(int_ppc_dcbt xoaddr:$dst),
(DCBT 0, xoaddr:$dst)>;
@@ -2381,6 +2387,13 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
"mftb $RT, $SPR", IIC_SprMFTB>;
+def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfpmr $RT, $SPR", IIC_SprMFPMR>;
+
+def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
+ "mtpmr $SPR, $RT", IIC_SprMTPMR>;
+
+
// A pseudo-instruction used to implement the read of the 64-bit cycle counter
// on a 32-bit target.
let hasSideEffects = 1, usesCustomInserter = 1 in
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
index edabe774867..d240529bc73 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -118,6 +118,8 @@ def IIC_SprTLBIE : InstrItinClass;
def IIC_SprABORT : InstrItinClass;
def IIC_SprMSGSYNC : InstrItinClass;
def IIC_SprSTOP : InstrItinClass;
+def IIC_SprMFPMR : InstrItinClass;
+def IIC_SprMTPMR : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index f687d326b52..15d5991b938 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -249,6 +249,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
InstrStage<5, [E500_SFX0]>],
[8, 1],
[E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<4, [E500_SFX0]>],
[7, 1], // Latency = 4, Repeat rate = 4
@@ -257,6 +261,10 @@ def PPCE500mcItineraries : ProcessorItineraries<
InstrStage<1, [E500_SFX0, E500_SFX1]>],
[4, 1], // Latency = 1, Repeat rate = 1
[E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
InstrStage<4, [E500_SFX0]>],
[7, 1], // Latency = 4, Repeat rate = 4
diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index 5db886cf8f9..32f8e652dd5 100644
--- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -313,20 +313,24 @@ def PPCE5500Itineraries : ProcessorItineraries<
InstrStage<5, [E5500_CFX_0]>],
[9, 2], // Latency = 5, Repeat rate = 5
[E5500_GPR_Bypass, E5500_CR_Bypass]>,
- InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
- InstrStage<4, [E5500_SFX0]>],
+ InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_CFX_0]>],
[8, 2], // Latency = 4, Repeat rate = 4
[E5500_GPR_Bypass, E5500_GPR_Bypass]>,
InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<1, [E5500_CFX_0]>],
[5], // Latency = 1, Repeat rate = 1
[E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
InstrStage<4, [E5500_CFX_0]>],
[8, 2], // Latency = 4, Repeat rate = 4
[NoBypass, E5500_GPR_Bypass]>,
InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
- InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ InstrStage<1, [E5500_CFX_0]>],
[5], // Latency = 1, Repeat rate = 1
[E5500_GPR_Bypass]>,
InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
diff --git a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 192b942490e..985acf92a2d 100644
--- a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -77,11 +77,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
default:
return false;
case X86::TCRETURNdi:
- case X86::TCRETURNdicc:
case X86::TCRETURNri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
- case X86::TCRETURNdi64cc:
case X86::TCRETURNri64:
case X86::TCRETURNmi64: {
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
@@ -99,10 +97,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Offset = StackAdj - MaxTCDelta;
assert(Offset >= 0 && "Offset should never be negative");
- if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
- assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
- }
-
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
@@ -111,21 +105,12 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Jump to label or value in register.
bool IsWin64 = STI->isTargetWin64();
- if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
- Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
+ if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
unsigned Op;
switch (Opcode) {
case X86::TCRETURNdi:
Op = X86::TAILJMPd;
break;
- case X86::TCRETURNdicc:
- Op = X86::TAILJMPd_CC;
- break;
- case X86::TCRETURNdi64cc:
- assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder.");
- // TODO: We could do it for Win64 "leaf" functions though; PR30337.
- Op = X86::TAILJMPd64_CC;
- break;
default:
// Note: Win64 uses REX prefixes indirect jumps out of functions, but
// not direct ones.
@@ -141,10 +126,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
- if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
- MIB.addImm(MBBI->getOperand(2).getImm());
- }
-
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
unsigned Op = (Opcode == X86::TCRETURNmi)
? X86::TAILJMPm
diff --git a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f13b722eb3..08fe2bad281 100644
--- a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28788,10 +28788,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// If a vector select has an operand that is -1 or 0, simplify the select to a
-/// bitwise logic operation.
-static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+/// If a vector select has an operand that is -1 or 0, try to simplify the
+/// select to a bitwise logic operation.
+static SDValue
+combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
@@ -28853,18 +28855,28 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
}
}
- if (!TValIsAllOnes && !FValIsAllZeros)
+ // vselect Cond, 111..., 000... -> Cond
+ if (TValIsAllOnes && FValIsAllZeros)
+ return DAG.getBitcast(VT, Cond);
+
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT))
return SDValue();
- SDValue Ret;
- if (TValIsAllOnes && FValIsAllZeros)
- Ret = Cond;
- else if (TValIsAllOnes)
- Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
- else if (FValIsAllZeros)
- Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS));
+ // vselect Cond, 111..., X -> or Cond, X
+ if (TValIsAllOnes) {
+ SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
+ SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
+ return DAG.getBitcast(VT, Or);
+ }
+
+ // vselect Cond, X, 000... -> and Cond, X
+ if (FValIsAllZeros) {
+ SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
+ SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
+ return DAG.getBitcast(VT, And);
+ }
- return DAG.getBitcast(VT, Ret);
+ return SDValue();
}
static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
@@ -29353,7 +29365,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
+ if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
return V;
// If this is a *dynamic* select (non-constant condition) and we can match
@@ -31260,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
-/// Check if truncation with saturation form type \p SrcVT to \p DstVT
-/// is valid for the given \p Subtarget.
-static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
- const X86Subtarget &Subtarget) {
- if (!Subtarget.hasAVX512())
- return false;
-
- // FIXME: Scalar type may be supported if we move it to vector register.
- if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
- return false;
-
- EVT SrcElVT = SrcVT.getScalarType();
- EVT DstElVT = DstVT.getScalarType();
- if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
- return false;
- if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
- return false;
- if (SrcVT.is512BitVector() || Subtarget.hasVLX())
- return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
- return false;
-}
-
-/// Return true if VPACK* instruction can be used for the given types
-/// and it is avalable on \p Subtarget.
-static bool
-isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
- if (Subtarget.hasSSE2())
- // v16i16 -> v16i8
- if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
- return true;
- if (Subtarget.hasSSE41())
- // v8i32 -> v8i16
- if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
- return true;
- return false;
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectUSatPattern(SDValue In, EVT VT) {
- if (In.getOpcode() != ISD::UMIN)
- return SDValue();
-
- //Saturation with truncation. We truncate from InVT to VT.
- assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
- "Unexpected types for truncate operation");
-
- APInt C;
- if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
- // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
- // the element size of the destination type.
- return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
- SDValue();
- }
- return SDValue();
-}
-
-/// Detect a pattern of truncation with saturation:
-/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
-/// The types should allow to use VPMOVUS* instruction on AVX512.
-/// Return the source value to be truncated or SDValue() if the pattern was not
-/// matched.
-static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
- const X86Subtarget &Subtarget) {
- if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
- return SDValue();
- return detectUSatPattern(In, VT);
-}
-
-static SDValue
-combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- SDValue USatVal = detectUSatPattern(In, VT);
- if (USatVal) {
- if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
- return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
- if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
- return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
- }
- }
- return SDValue();
-}
-
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31913,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
- if (SDValue Val =
- detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
- return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
- dl, Val, St->getBasePtr(),
- St->getMemoryVT(), St->getMemOperand(), DAG);
-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -32539,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
- // Try to combine truncation with unsigned saturation.
- if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
- return Val;
-
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
@@ -33778,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
}
}
- // Try to synthesize horizontal adds from adds of shuffles.
+ // Try to synthesize horizontal subs from subs of shuffles.
EVT VT = N->getValueType(0);
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
- isHorizontalBinOp(Op0, Op1, true))
+ isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
diff --git a/gnu/llvm/lib/Target/X86/X86InstrControl.td b/gnu/llvm/lib/Target/X86/X86InstrControl.td
index 4ea223e82be..2f260c48df4 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrControl.td
+++ b/gnu/llvm/lib/Target/X86/X86InstrControl.td
@@ -264,21 +264,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
}
-// Conditional tail calls are similar to the above, but they are branches
-// rather than barriers, and they use EFLAGS.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
- isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
- let Uses = [ESP, EFLAGS] in {
- def TCRETURNdicc : PseudoI<(outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
-
- // This gets substituted to a conditional jump instruction in MC lowering.
- def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
- (ins i32imm_pcrel:$dst, i32imm:$cond),
- "",
- [], IIC_JMP_REL>;
-}
-
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -340,19 +325,3 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
}
}
-
-// Conditional tail calls are similar to the above, but they are branches
-// rather than barriers, and they use EFLAGS.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
- isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
- let Uses = [RSP, EFLAGS] in {
- def TCRETURNdi64cc : PseudoI<(outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset,
- i32imm:$cond), []>;
-
- // This gets substituted to a conditional jump instruction in MC lowering.
- def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$cond),
- "",
- [], IIC_JMP_REL>;
-}
diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
index e3484d062bc..627b6120b04 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5108,85 +5108,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
return !isPredicated(MI);
}
-bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- case X86::TCRETURNdi:
- case X86::TCRETURNri:
- case X86::TCRETURNmi:
- case X86::TCRETURNdi64:
- case X86::TCRETURNri64:
- case X86::TCRETURNmi64:
- return true;
- default:
- return false;
- }
-}
-
-bool X86InstrInfo::canMakeTailCallConditional(
- SmallVectorImpl<MachineOperand> &BranchCond,
- const MachineInstr &TailCall) const {
- if (TailCall.getOpcode() != X86::TCRETURNdi &&
- TailCall.getOpcode() != X86::TCRETURNdi64) {
- // Only direct calls can be done with a conditional branch.
- return false;
- }
-
- if (Subtarget.isTargetWin64()) {
- // Conditional tail calls confuse the Win64 unwinder.
- // TODO: Allow them for "leaf" functions; PR30337.
- return false;
- }
-
- assert(BranchCond.size() == 1);
- if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
- // Can't make a conditional tail call with this condition.
- return false;
- }
-
- const X86MachineFunctionInfo *X86FI =
- TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>();
- if (X86FI->getTCReturnAddrDelta() != 0 ||
- TailCall.getOperand(1).getImm() != 0) {
- // A conditional tail call cannot do any stack adjustment.
- return false;
- }
-
- return true;
-}
-
-void X86InstrInfo::replaceBranchWithTailCall(
- MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
- const MachineInstr &TailCall) const {
- assert(canMakeTailCallConditional(BranchCond, TailCall));
-
- MachineBasicBlock::iterator I = MBB.end();
- while (I != MBB.begin()) {
- --I;
- if (I->isDebugValue())
- continue;
- if (!I->isBranch())
- assert(0 && "Can't find the branch to replace!");
-
- X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
- assert(BranchCond.size() == 1);
- if (CC != BranchCond[0].getImm())
- continue;
-
- break;
- }
-
- unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
- : X86::TCRETURNdi64cc;
-
- auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
- MIB->addOperand(TailCall.getOperand(0)); // Destination.
- MIB.addImm(0); // Stack offset (not used).
- MIB->addOperand(BranchCond[0]); // Condition.
- MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
-
- I->eraseFromParent();
-}
-
// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
// not be a fallthrough MBB now due to layout changes). Return nullptr if the
// fallthrough MBB cannot be identified.
diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.h b/gnu/llvm/lib/Target/X86/X86InstrInfo.h
index 8d746172dcb..acfdef4da7a 100644
--- a/gnu/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.h
@@ -316,13 +316,6 @@ public:
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
- bool isUnconditionalTailCall(const MachineInstr &MI) const override;
- bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
- const MachineInstr &TailCall) const override;
- void replaceBranchWithTailCall(MachineBasicBlock &MBB,
- SmallVectorImpl<MachineOperand> &Cond,
- const MachineInstr &TailCall) const override;
-
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
diff --git a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
index a38a4b30b77..feeb2fd5993 100644
--- a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -498,16 +498,11 @@ ReSimplify:
break;
}
- // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction.
{ unsigned Opcode;
case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode;
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode;
- case X86::TAILJMPd_CC:
- case X86::TAILJMPd64_CC:
- Opcode = X86::GetCondBranchFromCond(
- static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
- goto SetTailJmpOpcode;
SetTailJmpOpcode:
MCOperand Saved = OutMI.getOperand(0);
@@ -1281,11 +1276,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
- case X86::TAILJMPd_CC:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
case X86::TAILJMPd64:
- case X86::TAILJMPd64_CC:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
// Lower these as normal, but add some comments.
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
index 727ff70c3ff..586bb7bd7b1 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
isTargetKFreeBSD() || In64BitMode)
stackAlignment = 16;
-
- assert((!isPMULLDSlow() || hasSSE41()) &&
- "Feature Slow PMULLD can only be set on a subtarget with SSE4.1");
}
void X86Subtarget::initializeEnvironment() {
diff --git a/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index d086ee05a64..941efb210d1 100644
--- a/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -141,8 +141,8 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
- "enable-gvn-hoist", cl::init(true), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass (default = on)"));
+ "enable-gvn-hoist", cl::init(false), cl::Hidden,
+ cl::desc("Enable the GVN hoisting pass"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
diff --git a/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 013159cde77..428f94bb5e9 100644
--- a/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -884,6 +884,10 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
if (!GEPLHS->hasAllConstantIndices())
return nullptr;
+ // Make sure the pointers have the same type.
+ if (GEPLHS->getType() != RHS->getType())
+ return nullptr;
+
Value *PtrBase, *Index;
std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
@@ -4035,11 +4039,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
Constant *CMinus1 = ConstantInt::get(Op0->getType(), *CmpC - 1);
return new ICmpInst(ICmpInst::ICMP_EQ, Op0, CMinus1);
}
- // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
- if (CmpC->isMinSignedValue()) {
- Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
- return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
- }
}
break;
}
@@ -4059,11 +4058,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
if (*CmpC == Op0Max - 1)
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
ConstantInt::get(Op1->getType(), *CmpC + 1));
-
- // (x >u 2147483647) -> (x <s 0) -> true if sign bit set
- if (CmpC->isMaxSignedValue())
- return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
- Constant::getNullValue(Op0->getType()));
}
break;
}
@@ -4295,6 +4289,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
(SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
return nullptr;
+ // FIXME: We only do this after checking for min/max to prevent infinite
+ // looping caused by a reverse canonicalization of these patterns for min/max.
+ // FIXME: The organization of folds is a mess. These would naturally go into
+ // canonicalizeCmpWithConstant(), but we can't move all of the above folds
+ // down here after the min/max restriction.
+ ICmpInst::Predicate Pred = I.getPredicate();
+ const APInt *C;
+ if (match(Op1, m_APInt(C))) {
+ // For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set
+ if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
+ Constant *Zero = Constant::getNullValue(Op0->getType());
+ return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
+ }
+
+ // For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear
+ if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
+ Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
+ return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
+ }
+ }
+
if (Instruction *Res = foldICmpInstWithConstant(I))
return Res;
diff --git a/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 388c5e4e7fa..49e516e9c17 100644
--- a/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -502,7 +502,8 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
!DL.isNonIntegralPointerType(Ty)) {
if (all_of(LI.users(), [&LI](User *U) {
auto *SI = dyn_cast<StoreInst>(U);
- return SI && SI->getPointerOperand() != &LI;
+ return SI && SI->getPointerOperand() != &LI &&
+ !SI->getPointerOperand()->isSwiftError();
})) {
LoadInst *NewLoad = combineLoadToNewType(
IC, LI,
diff --git a/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 9c4b417e35e..f5e9e7dd5a9 100644
--- a/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -600,6 +600,22 @@ private:
void initializeCallbacks(Module &M);
bool InstrumentGlobals(IRBuilder<> &IRB, Module &M);
+ void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers);
+ void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers);
+ void
+ InstrumentGlobalsWithMetadataArray(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers);
+
+ GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,
+ StringRef OriginalName);
+ void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata);
+ IRBuilder<> CreateAsanModuleDtor(Module &M);
+
bool ShouldInstrumentGlobal(GlobalVariable *G);
bool ShouldUseMachOGlobalsSection() const;
StringRef getGlobalMetadataSection() const;
@@ -997,7 +1013,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
(!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
// inalloca allocas are not treated as static, and we don't want
// dynamic alloca instrumentation for them as well.
- !AI.isUsedWithInAlloca());
+ !AI.isUsedWithInAlloca() &&
+ // swifterror allocas are register promoted by ISel
+ !AI.isSwiftError());
ProcessedAllocas[&AI] = IsInteresting;
return IsInteresting;
@@ -1072,12 +1090,19 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
}
}
- // Do not instrument acesses from different address spaces; we cannot deal
- // with them.
if (PtrOperand) {
+ // Do not instrument acesses from different address spaces; we cannot deal
+ // with them.
Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType());
if (PtrTy->getPointerAddressSpace() != 0)
return nullptr;
+
+ // Ignore swifterror addresses.
+ // swifterror memory addresses are mem2reg promoted by instruction
+ // selection. As such they cannot have regular uses like an instrumentation
+ // function and it makes no sense to track them as memory.
+ if (PtrOperand->isSwiftError())
+ return nullptr;
}
// Treat memory accesses to promotable allocas as non-interesting since they
@@ -1553,17 +1578,173 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
// Declare the functions that find globals in a shared object and then invoke
// the (un)register function on them.
- AsanRegisterImageGlobals = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanRegisterImageGlobalsName,
- IRB.getVoidTy(), IntptrTy, nullptr));
+ AsanRegisterImageGlobals =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnregisterImageGlobals = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanUnregisterImageGlobalsName,
- IRB.getVoidTy(), IntptrTy, nullptr));
+ AsanUnregisterImageGlobals =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
}
+// Put the metadata and the instrumented global in the same group. This ensures
+// that the metadata is discarded if the instrumented global is discarded.
+void AddressSanitizerModule::SetComdatForGlobalMetadata(
+ GlobalVariable *G, GlobalVariable *Metadata) {
+ Module &M = *G->getParent();
+ Comdat *C = G->getComdat();
+ if (!C) {
+ if (!G->hasName()) {
+ // If G is unnamed, it must be internal. Give it an artificial name
+ // so we can put it in a comdat.
+ assert(G->hasLocalLinkage());
+ G->setName(Twine(kAsanGenPrefix) + "_anon_global");
+ }
+ C = M.getOrInsertComdat(G->getName());
+ // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.
+ if (TargetTriple.isOSBinFormatCOFF())
+ C->setSelectionKind(Comdat::NoDuplicates);
+ G->setComdat(C);
+ }
+
+ assert(G->hasComdat());
+ Metadata->setComdat(G->getComdat());
+}
+
+// Create a separate metadata global and put it in the appropriate ASan
+// global registration section.
+GlobalVariable *
+AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
+ StringRef OriginalName) {
+ GlobalVariable *Metadata =
+ new GlobalVariable(M, Initializer->getType(), false,
+ GlobalVariable::InternalLinkage, Initializer,
+ Twine("__asan_global_") +
+ GlobalValue::getRealLinkageName(OriginalName));
+ Metadata->setSection(getGlobalMetadataSection());
+ return Metadata;
+}
+
+IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
+ Function *AsanDtorFunction =
+ Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
+
+ return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
+}
+
+void AddressSanitizerModule::InstrumentGlobalsCOFF(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+ auto &DL = M.getDataLayout();
+
+ for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+ Constant *Initializer = MetadataInitializers[i];
+ GlobalVariable *G = ExtendedGlobals[i];
+ GlobalVariable *Metadata =
+ CreateMetadataGlobal(M, Initializer, G->getName());
+
+ // The MSVC linker always inserts padding when linking incrementally. We
+ // cope with that by aligning each struct to its size, which must be a power
+ // of two.
+ unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType());
+ assert(isPowerOf2_32(SizeOfGlobalStruct) &&
+ "global metadata will not be padded appropriately");
+ Metadata->setAlignment(SizeOfGlobalStruct);
+
+ SetComdatForGlobalMetadata(G, Metadata);
+ }
+}
+
+void AddressSanitizerModule::InstrumentGlobalsMachO(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+ // On recent Mach-O platforms, use a structure which binds the liveness of
+ // the global variable to the metadata struct. Keep the list of "Liveness" GV
+ // created to be added to llvm.compiler.used
+ StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr);
+ SmallVector<GlobalValue *, 16> LivenessGlobals(ExtendedGlobals.size());
+
+ for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+ Constant *Initializer = MetadataInitializers[i];
+ GlobalVariable *G = ExtendedGlobals[i];
+ GlobalVariable *Metadata =
+ CreateMetadataGlobal(M, Initializer, G->getName());
+
+ // On recent Mach-O platforms, we emit the global metadata in a way that
+ // allows the linker to properly strip dead globals.
+ auto LivenessBinder = ConstantStruct::get(
+ LivenessTy, Initializer->getAggregateElement(0u),
+ ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr);
+ GlobalVariable *Liveness = new GlobalVariable(
+ M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder,
+ Twine("__asan_binder_") + G->getName());
+ Liveness->setSection("__DATA,__asan_liveness,regular,live_support");
+ LivenessGlobals[i] = Liveness;
+ }
+
+ // Update llvm.compiler.used, adding the new liveness globals. This is
+ // needed so that during LTO these variables stay alive. The alternative
+ // would be to have the linker handling the LTO symbols, but libLTO
+ // current API does not expose access to the section for each symbol.
+ if (!LivenessGlobals.empty())
+ appendToCompilerUsed(M, LivenessGlobals);
+
+ // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+ // to look up the loaded image that contains it. Second, we can store in it
+ // whether registration has already occurred, to prevent duplicate
+ // registration.
+ //
+ // common linkage ensures that there is only one global per shared library.
+ GlobalVariable *RegisteredFlag = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::CommonLinkage,
+ ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+ RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+ IRB.CreateCall(AsanRegisterImageGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
+
+ // We also need to unregister globals at the end, e.g., when a shared library
+ // gets closed.
+ IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+ IRB_Dtor.CreateCall(AsanUnregisterImageGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
+}
+
+void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+ unsigned N = ExtendedGlobals.size();
+ assert(N > 0);
+
+ // On platforms that don't have a custom metadata section, we emit an array
+ // of global metadata structures.
+ ArrayType *ArrayOfGlobalStructTy =
+ ArrayType::get(MetadataInitializers[0]->getType(), N);
+ auto AllGlobals = new GlobalVariable(
+ M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,
+ ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), "");
+
+ IRB.CreateCall(AsanRegisterGlobals,
+ {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, N)});
+
+ // We also need to unregister globals at the end, e.g., when a shared library
+ // gets closed.
+ IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+ IRB_Dtor.CreateCall(AsanUnregisterGlobals,
+ {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, N)});
+}
+
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
@@ -1580,9 +1761,6 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
if (n == 0) return false;
auto &DL = M.getDataLayout();
- bool UseComdatMetadata = TargetTriple.isOSBinFormatCOFF();
- bool UseMachOGlobalsSection = ShouldUseMachOGlobalsSection();
- bool UseMetadataArray = !(UseComdatMetadata || UseMachOGlobalsSection);
// A global is described by a structure
// size_t beg;
@@ -1597,19 +1775,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
StructType *GlobalStructTy =
StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
IntptrTy, IntptrTy, IntptrTy, nullptr);
- unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(GlobalStructTy);
- assert(isPowerOf2_32(SizeOfGlobalStruct) &&
- "global metadata will not be padded appropriately");
- SmallVector<Constant *, 16> Initializers(UseMetadataArray ? n : 0);
-
- // On recent Mach-O platforms, use a structure which binds the liveness of
- // the global variable to the metadata struct. Keep the list of "Liveness" GV
- // created to be added to llvm.compiler.used
- StructType *LivenessTy = nullptr;
- if (UseMachOGlobalsSection)
- LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr);
- SmallVector<GlobalValue *, 16> LivenessGlobals(
- UseMachOGlobalsSection ? n : 0);
+ SmallVector<GlobalVariable *, 16> NewGlobals(n);
+ SmallVector<Constant *, 16> Initializers(n);
bool HasDynamicallyInitializedGlobals = false;
@@ -1681,25 +1848,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
NewGlobal->takeName(G);
G->eraseFromParent();
- G = NewGlobal;
-
- if (UseComdatMetadata) {
- // Get or create a COMDAT for G so that we can use it with our metadata.
- Comdat *C = G->getComdat();
- if (!C) {
- if (!G->hasName()) {
- // If G is unnamed, it must be internal. Give it an artificial name
- // so we can put it in a comdat.
- assert(G->hasLocalLinkage());
- G->setName(Twine(kAsanGenPrefix) + "_anon_global");
- }
- C = M.getOrInsertComdat(G->getName());
- // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.
- if (TargetTriple.isOSBinFormatCOFF())
- C->setSelectionKind(Comdat::NoDuplicates);
- G->setComdat(C);
- }
- }
+ NewGlobals[i] = NewGlobal;
Constant *SourceLoc;
if (!MD.SourceLoc.empty()) {
@@ -1750,117 +1899,21 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
- // If we aren't using separate metadata globals, add it to the initializer
- // list and continue.
- if (UseMetadataArray) {
- Initializers[i] = Initializer;
- continue;
- }
-
- // Create a separate metadata global and put it in the appropriate ASan
- // global registration section.
- GlobalVariable *Metadata = new GlobalVariable(
- M, GlobalStructTy, false, GlobalVariable::InternalLinkage,
- Initializer, Twine("__asan_global_") +
- GlobalValue::getRealLinkageName(G->getName()));
- Metadata->setSection(getGlobalMetadataSection());
-
- // We don't want any padding, but we also need a reasonable alignment.
- // The MSVC linker always inserts padding when linking incrementally. We
- // cope with that by aligning each struct to its size, which must be a power
- // of two.
- Metadata->setAlignment(SizeOfGlobalStruct);
-
- // On platforms that support comdats, put the metadata and the
- // instrumented global in the same group. This ensures that the metadata
- // is discarded if the instrumented global is discarded.
- if (UseComdatMetadata) {
- assert(G->hasComdat());
- Metadata->setComdat(G->getComdat());
- continue;
- }
- assert(UseMachOGlobalsSection);
+ Initializers[i] = Initializer;
+ }
- // On recent Mach-O platforms, we emit the global metadata in a way that
- // allows the linker to properly strip dead globals.
- auto LivenessBinder = ConstantStruct::get(
- LivenessTy, Initializer->getAggregateElement(0u),
- ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr);
- GlobalVariable *Liveness = new GlobalVariable(
- M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder,
- Twine("__asan_binder_") + G->getName());
- Liveness->setSection("__DATA,__asan_liveness,regular,live_support");
- LivenessGlobals[i] = Liveness;
+ if (TargetTriple.isOSBinFormatCOFF()) {
+ InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
+ } else if (ShouldUseMachOGlobalsSection()) {
+ InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
+ } else {
+ InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers);
}
// Create calls for poisoning before initializers run and unpoisoning after.
if (HasDynamicallyInitializedGlobals)
createInitializerPoisonCalls(M, ModuleName);
- // Platforms with a dedicated metadata section don't need to emit any more
- // code.
- if (UseComdatMetadata)
- return true;
-
- GlobalVariable *AllGlobals = nullptr;
- GlobalVariable *RegisteredFlag = nullptr;
-
- if (UseMachOGlobalsSection) {
- // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
- // to look up the loaded image that contains it. Second, we can store in it
- // whether registration has already occurred, to prevent duplicate
- // registration.
- //
- // common linkage ensures that there is only one global per shared library.
- RegisteredFlag = new GlobalVariable(
- M, IntptrTy, false, GlobalVariable::CommonLinkage,
- ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
- RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
-
- // Update llvm.compiler.used, adding the new liveness globals. This is
- // needed so that during LTO these variables stay alive. The alternative
- // would be to have the linker handling the LTO symbols, but libLTO
- // current API does not expose access to the section for each symbol.
- if (!LivenessGlobals.empty())
- appendToCompilerUsed(M, LivenessGlobals);
- } else if (UseMetadataArray) {
- // On platforms that don't have a custom metadata section, we emit an array
- // of global metadata structures.
- ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
- AllGlobals = new GlobalVariable(
- M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,
- ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
- }
-
- // Create a call to register the globals with the runtime.
- if (UseMachOGlobalsSection) {
- IRB.CreateCall(AsanRegisterImageGlobals,
- {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
- } else {
- IRB.CreateCall(AsanRegisterGlobals,
- {IRB.CreatePointerCast(AllGlobals, IntptrTy),
- ConstantInt::get(IntptrTy, n)});
- }
-
- // We also need to unregister globals at the end, e.g., when a shared library
- // gets closed.
- Function *AsanDtorFunction =
- Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
- GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
- BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
- IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
-
- if (UseMachOGlobalsSection) {
- IRB_Dtor.CreateCall(AsanUnregisterImageGlobals,
- {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
- } else {
- IRB_Dtor.CreateCall(AsanUnregisterGlobals,
- {IRB.CreatePointerCast(AllGlobals, IntptrTy),
- ConstantInt::get(IntptrTy, n)});
- }
-
- appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
-
DEBUG(dbgs() << M);
return true;
}
diff --git a/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index d9659694da4..52035c79a4a 100644
--- a/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -488,6 +488,13 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
Value *Addr = IsWrite
? cast<StoreInst>(I)->getPointerOperand()
: cast<LoadInst>(I)->getPointerOperand();
+
+ // swifterror memory addresses are mem2reg promoted by instruction selection.
+ // As such they cannot have regular uses like an instrumentation function and
+ // it makes no sense to track them as memory.
+ if (Addr->isSwiftError())
+ return false;
+
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
diff --git a/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 141e99b92cd..84f9373ae91 100644
--- a/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -41,6 +41,8 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
+static cl::opt<bool> DontProcessAdds("cvp-dont-process-adds", cl::init(true));
+
namespace {
class CorrelatedValuePropagation : public FunctionPass {
public:
@@ -405,6 +407,9 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
typedef OverflowingBinaryOperator OBO;
+ if (DontProcessAdds)
+ return false;
+
if (AddOp->getType()->isVectorTy() || hasLocalDefs(AddOp))
return false;
diff --git a/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 90c26e13db7..f8e1d2e1a08 100644
--- a/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -200,13 +200,11 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
class GVNHoist {
public:
GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
- MemorySSA *MSSA, bool OptForMinSize)
- : DT(DT), AA(AA), MD(MD), MSSA(MSSA), OptForMinSize(OptForMinSize),
- HoistingGeps(OptForMinSize), HoistedCtr(0) {
- // Hoist as far as possible when optimizing for code-size.
- if (OptForMinSize)
- MaxNumberOfBBSInPath = -1;
- }
+ MemorySSA *MSSA)
+ : DT(DT), AA(AA), MD(MD), MSSA(MSSA),
+ HoistingGeps(false),
+ HoistedCtr(0)
+ { }
bool run(Function &F) {
VN.setDomTree(DT);
@@ -251,7 +249,6 @@ private:
AliasAnalysis *AA;
MemoryDependenceResults *MD;
MemorySSA *MSSA;
- const bool OptForMinSize;
const bool HoistingGeps;
DenseMap<const Value *, unsigned> DFSNumber;
BBSideEffectsSet BBSideEffects;
@@ -505,11 +502,6 @@ private:
bool safeToHoistScalar(const BasicBlock *HoistBB,
SmallPtrSetImpl<const BasicBlock *> &WL,
int &NBBsOnAllPaths) {
- // Enable scalar hoisting at -Oz as it is safe to hoist scalars to a place
- // where they are partially needed.
- if (OptForMinSize)
- return true;
-
// Check that the hoisted expression is needed on all paths.
if (!hoistingFromAllPaths(HoistBB, WL))
return false;
@@ -923,13 +915,8 @@ private:
Intr->getIntrinsicID() == Intrinsic::assume)
continue;
}
- if (Call->mayHaveSideEffects()) {
- if (!OptForMinSize)
- break;
- // We may continue hoisting across calls which write to memory.
- if (Call->mayThrow())
- break;
- }
+ if (Call->mayHaveSideEffects())
+ break;
if (Call->isConvergent())
break;
@@ -971,7 +958,7 @@ public:
auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
+ GVNHoist G(&DT, &AA, &MD, &MSSA);
return G.run(F);
}
@@ -991,7 +978,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
AliasAnalysis &AA = AM.getResult<AAManager>(F);
MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
- GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
+ GVNHoist G(&DT, &AA, &MD, &MSSA);
if (!G.run(F))
return PreservedAnalyses::all();
diff --git a/gnu/llvm/lib/Transforms/Scalar/LICM.cpp b/gnu/llvm/lib/Transforms/Scalar/LICM.cpp
index 4c15c8a32be..f51d11c04cb 100644
--- a/gnu/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1196,10 +1196,7 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
auto mergeLoop = [&](Loop *L) {
// Loop over the body of this loop, looking for calls, invokes, and stores.
- // Because subloops have already been incorporated into AST, we skip blocks
- // in subloops.
for (BasicBlock *BB : L->blocks())
- if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
};
diff --git a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 01728ae680d..194587a85e7 100644
--- a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -158,8 +158,9 @@ struct MemAccessTy {
bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
- static MemAccessTy getUnknown(LLVMContext &Ctx) {
- return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
+ static MemAccessTy getUnknown(LLVMContext &Ctx,
+ unsigned AS = UnknownAddressSpace) {
+ return MemAccessTy(Type::getVoidTy(Ctx), AS);
}
};
@@ -2279,8 +2280,10 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
// TODO: Be less conservative when the type is similar and can use the same
// addressing modes.
if (Kind == LSRUse::Address) {
- if (AccessTy != LU.AccessTy)
- NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
+ if (AccessTy.MemTy != LU.AccessTy.MemTy) {
+ NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
+ AccessTy.AddrSpace);
+ }
}
// Conservatively assume HasBaseReg is true for now.
diff --git a/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp b/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 6043e04bb8c..57e6e3ddad9 100644
--- a/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -85,6 +85,8 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes");
STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");
STATISTIC(NumGVNAvoidedSortedLeaderChanges,
"Number of avoided sorted leader changes");
+STATISTIC(NumGVNNotMostDominatingLeader,
+ "Number of times a member dominated it's new classes' leader");
//===----------------------------------------------------------------------===//
// GVN Pass
@@ -1073,17 +1075,20 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I,
if (I == OldClass->NextLeader.first)
OldClass->NextLeader = {nullptr, ~0U};
- // The new instruction and new class leader may either be siblings in the
- // dominator tree, or the new class leader should dominate the new member
- // instruction. We simply check that the member instruction does not properly
- // dominate the new class leader.
- assert(
- !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader ||
- I == NewClass->RepLeader ||
- !DT->properlyDominates(
+ // It's possible, though unlikely, for us to discover equivalences such
+ // that the current leader does not dominate the old one.
+ // This statistic tracks how often this happens.
+ // We assert on phi nodes when this happens, currently, for debugging, because
+ // we want to make sure we name phi node cycles properly.
+ if (isa<Instruction>(NewClass->RepLeader) && NewClass->RepLeader &&
+ I != NewClass->RepLeader &&
+ DT->properlyDominates(
I->getParent(),
- cast<Instruction>(NewClass->RepLeader)->getParent()) &&
- "New class for instruction should not be dominated by instruction");
+ cast<Instruction>(NewClass->RepLeader)->getParent())) {
+ ++NumGVNNotMostDominatingLeader;
+ assert(!isa<PHINode>(I) &&
+ "New class for instruction should not be dominated by instruction");
+ }
if (NewClass->RepLeader != I) {
auto DFSNum = InstrDFS.lookup(I);
diff --git a/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp b/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 181a324861e..65c814d7a63 100644
--- a/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -1521,8 +1521,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
if (CI->isNegative() && !CI->isMinValue(true)) {
Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
- assert(!Duplicates.count(Factor) &&
- "Shouldn't have two constant factors, missed a canonicalize");
+ if (!Duplicates.insert(Factor).second)
+ continue;
unsigned Occ = ++FactorOccurrences[Factor];
if (Occ > MaxOcc) {
MaxOcc = Occ;
@@ -1534,8 +1534,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
APFloat F(CF->getValueAPF());
F.changeSign();
Factor = ConstantFP::get(CF->getContext(), F);
- assert(!Duplicates.count(Factor) &&
- "Shouldn't have two constant factors, missed a canonicalize");
+ if (!Duplicates.insert(Factor).second)
+ continue;
unsigned Occ = ++FactorOccurrences[Factor];
if (Occ > MaxOcc) {
MaxOcc = Occ;
diff --git a/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp b/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp
index 34be9069248..ede381c4c24 100644
--- a/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -1705,7 +1705,10 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
// If this is an exact definition of this function, then we can propagate
// information about its result into callsites of it.
- if (F.hasExactDefinition())
+ // Don't touch naked functions. They may contain asm returning a
+ // value we don't see, so we may end up interprocedurally propagating
+ // the return value incorrectly.
+ if (F.hasExactDefinition() && !F.hasFnAttribute(Attribute::Naked))
Solver.AddTrackedFunction(&F);
// If this function only has direct calls that we can see, we can track its
diff --git a/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index f9a602bc268..e346ebd6a00 100644
--- a/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -189,11 +189,14 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
assert(OriginalBB == OldLoop->getHeader() &&
"Header should be first in RPO");
+ NewLoop = new Loop();
Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
- assert(NewLoopParent &&
- "Expected parent loop before sub-loop in RPO");
- NewLoop = new Loop;
- NewLoopParent->addChildLoop(NewLoop);
+
+ if (NewLoopParent)
+ NewLoopParent->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+
NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
return OldLoop;
} else {
diff --git a/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 85da3ba899a..d3ea1564115 100644
--- a/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -302,17 +302,22 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
}
NewLoopsMap NewLoops;
- NewLoops[L] = NewLoop;
+ if (NewLoop)
+ NewLoops[L] = NewLoop;
+ else if (ParentLoop)
+ NewLoops[L] = ParentLoop;
+
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
-
- if (NewLoop) {
+
+ // If we're unrolling the outermost loop, there's no remainder loop,
+ // and this block isn't in a nested loop, then the new block is not
+ // in any loop. Otherwise, add it to loopinfo.
+ if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
- } else if (ParentLoop)
- ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
if (Header == *BB) {
diff --git a/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6e30919246c..7b0bddbbb83 100644
--- a/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1436,6 +1436,14 @@ static bool canSinkInstructions(
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
I->getType()->isTokenTy())
return false;
+
+ // Conservatively return false if I is an inline-asm instruction. Sinking
+ // and merging inline-asm instructions can potentially create arguments
+ // that cannot satisfy the inline-asm constraints.
+ if (const auto *C = dyn_cast<CallInst>(I))
+ if (C->isInlineAsm())
+ return false;
+
// Everything must have only one use too, apart from stores which
// have no uses.
if (!isa<StoreInst>(I) && !I->hasOneUse())
diff --git a/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1b1f86f8efd..dac7032fa08 100644
--- a/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5602,6 +5602,13 @@ void LoopVectorizationLegality::collectLoopUniforms() {
// is consecutive-like, the pointer operand should remain uniform.
else if (hasConsecutiveLikePtrOperand(&I))
ConsecutiveLikePtrs.insert(Ptr);
+
+ // Otherwise, if the memory instruction will be vectorized and its
+ // pointer operand is non-consecutive-like, the memory instruction should
+ // be a gather or scatter operation. Its pointer operand will be
+ // non-uniform.
+ else
+ PossibleNonUniformPtrs.insert(Ptr);
}
// Add to the Worklist all consecutive and consecutive-like pointers that
diff --git a/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1c7cbc7edf9..328f2700296 100644
--- a/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4026,40 +4026,36 @@ bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
if (!V)
return false;
- Value *P = V->getParent();
-
- // Vectorize in current basic block only.
- auto *Op0 = dyn_cast<Instruction>(V->getOperand(0));
- auto *Op1 = dyn_cast<Instruction>(V->getOperand(1));
- if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P)
- return false;
-
// Try to vectorize V.
- if (tryToVectorizePair(Op0, Op1, R))
+ if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
return true;
- auto *A = dyn_cast<BinaryOperator>(Op0);
- auto *B = dyn_cast<BinaryOperator>(Op1);
+ BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+ BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
// Try to skip B.
if (B && B->hasOneUse()) {
- auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
- auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
- if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
+ BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+ BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+ if (tryToVectorizePair(A, B0, R)) {
return true;
- if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
+ }
+ if (tryToVectorizePair(A, B1, R)) {
return true;
+ }
}
// Try to skip A.
if (A && A->hasOneUse()) {
- auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
- auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
- if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
+ BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+ BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+ if (tryToVectorizePair(A0, B, R)) {
return true;
- if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
+ }
+ if (tryToVectorizePair(A1, B, R)) {
return true;
+ }
}
- return false;
+ return 0;
}
/// \brief Generate a shuffle mask to be used in a reduction tree.
@@ -4511,143 +4507,29 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
return nullptr;
}
-namespace {
-/// Tracks instructons and its children.
-class WeakVHWithLevel final : public CallbackVH {
- /// Operand index of the instruction currently beeing analized.
- unsigned Level = 0;
- /// Is this the instruction that should be vectorized, or are we now
- /// processing children (i.e. operands of this instruction) for potential
- /// vectorization?
- bool IsInitial = true;
-
-public:
- explicit WeakVHWithLevel() = default;
- WeakVHWithLevel(Value *V) : CallbackVH(V){};
- /// Restart children analysis each time it is repaced by the new instruction.
- void allUsesReplacedWith(Value *New) override {
- setValPtr(New);
- Level = 0;
- IsInitial = true;
- }
- /// Check if the instruction was not deleted during vectorization.
- bool isValid() const { return !getValPtr(); }
- /// Is the istruction itself must be vectorized?
- bool isInitial() const { return IsInitial; }
- /// Try to vectorize children.
- void clearInitial() { IsInitial = false; }
- /// Are all children processed already?
- bool isFinal() const {
- assert(getValPtr() &&
- (isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() >= Level));
- return getValPtr() &&
- cast<Instruction>(getValPtr())->getNumOperands() == Level;
- }
- /// Get next child operation.
- Value *nextOperand() {
- assert(getValPtr() && isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() > Level);
- return cast<Instruction>(getValPtr())->getOperand(Level++);
- }
- virtual ~WeakVHWithLevel() = default;
-};
-} // namespace
-
/// \brief Attempt to reduce a horizontal reduction.
/// If it is legal to match a horizontal reduction feeding
-/// the phi node P with reduction operators Root in a basic block BB, then check
-/// if it can be done.
+/// the phi node P with reduction operators BI, then check if it
+/// can be done.
/// \returns true if a horizontal reduction was matched and reduced.
/// \returns false if a horizontal reduction was not matched.
-static bool canBeVectorized(
- PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
- TargetTransformInfo *TTI,
- const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) {
+static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI,
+ BoUpSLP &R, TargetTransformInfo *TTI,
+ unsigned MinRegSize) {
if (!ShouldVectorizeHor)
return false;
- if (!Root)
- return false;
-
- if (Root->getParent() != BB)
+ HorizontalReduction HorRdx(MinRegSize);
+ if (!HorRdx.matchAssociativeReduction(P, BI))
return false;
- SmallVector<WeakVHWithLevel, 8> Stack(1, Root);
- SmallSet<Value *, 8> VisitedInstrs;
- bool Res = false;
- while (!Stack.empty()) {
- Value *V = Stack.back();
- if (!V) {
- Stack.pop_back();
- continue;
- }
- auto *Inst = dyn_cast<Instruction>(V);
- if (!Inst || isa<PHINode>(Inst)) {
- Stack.pop_back();
- continue;
- }
- if (Stack.back().isInitial()) {
- Stack.back().clearInitial();
- if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
- HorizontalReduction HorRdx(R.getMinVecRegSize());
- if (HorRdx.matchAssociativeReduction(P, BI)) {
- // If there is a sufficient number of reduction values, reduce
- // to a nearby power-of-2. Can safely generate oversized
- // vectors and rely on the backend to split them to legal sizes.
- HorRdx.ReduxWidth =
- std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
-
- if (HorRdx.tryToReduce(R, TTI)) {
- Res = true;
- P = nullptr;
- continue;
- }
- }
- if (P) {
- Inst = dyn_cast<Instruction>(BI->getOperand(0));
- if (Inst == P)
- Inst = dyn_cast<Instruction>(BI->getOperand(1));
- if (!Inst) {
- P = nullptr;
- continue;
- }
- }
- }
- P = nullptr;
- if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
- Res = true;
- continue;
- }
- }
- if (Stack.back().isFinal()) {
- Stack.pop_back();
- continue;
- }
- if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand()))
- if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second &&
- Stack.size() < RecursionMaxDepth)
- Stack.push_back(NextV);
- }
- return Res;
-}
-
-bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
- BasicBlock *BB, BoUpSLP &R,
- TargetTransformInfo *TTI) {
- if (!V)
- return false;
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
- return false;
+ // If there is a sufficient number of reduction values, reduce
+ // to a nearby power-of-2. Can safely generate oversized
+ // vectors and rely on the backend to split them to legal sizes.
+ HorRdx.ReduxWidth =
+ std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
- if (!isa<BinaryOperator>(I))
- P = nullptr;
- // Try to match and vectorize a horizontal reduction.
- return canBeVectorized(P, I, BB, R, TTI,
- [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
- return tryToVectorize(BI, R);
- });
+ return HorRdx.tryToReduce(R, TTI);
}
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
@@ -4717,42 +4599,67 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (P->getNumIncomingValues() != 2)
return Changed;
+ Value *Rdx = getReductionValue(DT, P, BB, LI);
+
+ // Check if this is a Binary Operator.
+ BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+ if (!BI)
+ continue;
+
// Try to match and vectorize a horizontal reduction.
- if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
- TTI)) {
+ if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) {
Changed = true;
it = BB->begin();
e = BB->end();
continue;
}
+
+ Value *Inst = BI->getOperand(0);
+ if (Inst == P)
+ Inst = BI->getOperand(1);
+
+ if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
continue;
}
- if (ShouldStartVectorizeHorAtStore) {
- if (StoreInst *SI = dyn_cast<StoreInst>(it)) {
- // Try to match and vectorize a horizontal reduction.
- if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R,
- TTI)) {
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
+ if (ShouldStartVectorizeHorAtStore)
+ if (StoreInst *SI = dyn_cast<StoreInst>(it))
+ if (BinaryOperator *BinOp =
+ dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+ if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
+ R.getMinVecRegSize()) ||
+ tryToVectorize(BinOp, R)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
}
- }
- }
// Try to vectorize horizontal reductions feeding into a return.
- if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) {
- if (RI->getNumOperands() != 0) {
- // Try to match and vectorize a horizontal reduction.
- if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) {
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
+ if (RI->getNumOperands() != 0)
+ if (BinaryOperator *BinOp =
+ dyn_cast<BinaryOperator>(RI->getOperand(0))) {
+ DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
+ if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
+ R.getMinVecRegSize()) ||
+ tryToVectorizePair(BinOp->getOperand(0), BinOp->getOperand(1),
+ R)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
}
- }
- }
// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
@@ -4765,14 +4672,16 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
continue;
}
- for (int I = 0; I < 2; ++I) {
- if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) {
- Changed = true;
- // We would like to start over since some instructions are deleted
- // and the iterator may become invalid value.
- it = BB->begin();
- e = BB->end();
- break;
+ for (int i = 0; i < 2; ++i) {
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
+ if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ break;
+ }
}
}
continue;