diff options
Diffstat (limited to 'gnu/llvm/lib')
90 files changed, 1538 insertions, 1199 deletions
diff --git a/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp index e8d86bdbca5..c8d05794949 100644 --- a/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/gnu/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1191,14 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return MayAlias; AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, - AAMDNodes(), V2, V2Size, V2AAInfo, - nullptr, UnderlyingV2); + AAMDNodes(), V2, MemoryLocation::UnknownSize, + V2AAInfo, nullptr, UnderlyingV2); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values - // cannot alias per GEP semantics: "A pointer value formed from a - // getelementptr instruction is associated with the addresses associated - // with the first operand of the getelementptr". + // cannot alias per GEP semantics: "Any memory access must be done through + // a pointer value associated with an address range of the memory access, + // otherwise the behavior is undefined.". return R; // If the max search depth is reached the result is undefined diff --git a/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 6387bb36166..f5ba637e58e 100644 --- a/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/gnu/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -405,6 +405,7 @@ char ModuleSummaryIndexWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) diff --git a/gnu/llvm/lib/Analysis/ScalarEvolution.cpp b/gnu/llvm/lib/Analysis/ScalarEvolution.cpp index b3905cc01e8..ed328f12c46 100644 --- a/gnu/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/gnu/llvm/lib/Analysis/ScalarEvolution.cpp @@ -127,10 +127,15 @@ static cl::opt<unsigned> MulOpsInlineThreshold( cl::desc("Threshold for inlining multiplication operands into a SCEV"), cl::init(1000)); -static cl::opt<unsigned> - MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden, - cl::desc("Maximum depth of recursive compare complexity"), - cl::init(32)); +static cl::opt<unsigned> MaxSCEVCompareDepth( + "scalar-evolution-max-scev-compare-depth", cl::Hidden, + cl::desc("Maximum depth of recursive SCEV complexity comparisons"), + cl::init(32)); + +static cl::opt<unsigned> MaxValueCompareDepth( + "scalar-evolution-max-value-compare-depth", cl::Hidden, + cl::desc("Maximum depth of recursive value complexity comparisons"), + cl::init(2)); //===----------------------------------------------------------------------===// // SCEV class definitions @@ -481,7 +486,7 @@ static int CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache, const LoopInfo *const LI, Value *LV, Value *RV, unsigned Depth) { - if (Depth > MaxCompareDepth || EqCache.count({LV, RV})) + if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV})) return 0; // Order pointer values after integer values. This helps SCEVExpander form @@ -568,7 +573,7 @@ static int CompareSCEVComplexity( if (LType != RType) return (int)LType - (int)RType; - if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS})) + if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS})) return 0; // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, diff --git a/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index d9e249aad21..a46e49ccde8 100644 --- a/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/gnu/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -512,7 +512,7 @@ private: } Metadata *getFnMetadataByID(unsigned ID) { - return MDLoader->getMetadataFwdRef(ID); + return MDLoader->getMetadataFwdRefOrLoad(ID); } BasicBlock *getBasicBlock(unsigned ID) const { diff --git a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 4a5d18e2db7..b89f5be4a36 100644 --- a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -448,6 +448,7 @@ class MetadataLoader::MetadataLoaderImpl { bool StripTBAA = false; bool HasSeenOldLoopTags = false; + bool NeedUpgradeToDIGlobalVariableExpression = false; /// True if metadata is being parsed for a module being ThinLTO imported. bool IsImporting = false; @@ -473,6 +474,45 @@ class MetadataLoader::MetadataLoaderImpl { CUSubprograms.clear(); } + /// Upgrade old-style bare DIGlobalVariables to DIGlobalVariableExpressions. + void upgradeCUVariables() { + if (!NeedUpgradeToDIGlobalVariableExpression) + return; + + // Upgrade list of variables attached to the CUs. + if (NamedMDNode *CUNodes = TheModule.getNamedMetadata("llvm.dbg.cu")) + for (unsigned I = 0, E = CUNodes->getNumOperands(); I != E; ++I) { + auto *CU = cast<DICompileUnit>(CUNodes->getOperand(I)); + if (auto *GVs = dyn_cast_or_null<MDTuple>(CU->getRawGlobalVariables())) + for (unsigned I = 0; I < GVs->getNumOperands(); I++) + if (auto *GV = + dyn_cast_or_null<DIGlobalVariable>(GVs->getOperand(I))) { + auto *DGVE = + DIGlobalVariableExpression::getDistinct(Context, GV, nullptr); + GVs->replaceOperandWith(I, DGVE); + } + } + + // Upgrade variables attached to globals. + for (auto &GV : TheModule.globals()) { + SmallVector<MDNode *, 1> MDs, NewMDs; + GV.getMetadata(LLVMContext::MD_dbg, MDs); + GV.eraseMetadata(LLVMContext::MD_dbg); + for (auto *MD : MDs) + if (auto *DGV = dyn_cast_or_null<DIGlobalVariable>(MD)) { + auto *DGVE = + DIGlobalVariableExpression::getDistinct(Context, DGV, nullptr); + GV.addMetadata(LLVMContext::MD_dbg, *DGVE); + } else + GV.addMetadata(LLVMContext::MD_dbg, *MD); + } + } + + void upgradeDebugInfo() { + upgradeCUSubprograms(); + upgradeCUVariables(); + } + public: MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule, BitcodeReaderValueList &ValueList, @@ -485,8 +525,21 @@ public: Error parseMetadata(bool ModuleLevel); bool hasFwdRefs() const { return MetadataList.hasFwdRefs(); } - Metadata *getMetadataFwdRef(unsigned Idx) { - return MetadataList.getMetadataFwdRef(Idx); + + Metadata *getMetadataFwdRefOrLoad(unsigned ID) { + if (ID < MDStringRef.size()) + return lazyLoadOneMDString(ID); + if (auto *MD = MetadataList.lookup(ID)) + return MD; + // If lazy-loading is enabled, we try recursively to load the operand + // instead of creating a temporary. + if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) { + PlaceholderQueue Placeholders; + lazyLoadOneMetadata(ID, Placeholders); + resolveForwardRefsAndPlaceholders(Placeholders); + return MetadataList.lookup(ID); + } + return MetadataList.getMetadataFwdRef(ID); } MDNode *getMDNodeFwdRefOrNull(unsigned Idx) { @@ -713,7 +766,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { // Reading the named metadata created forward references and/or // placeholders, that we flush here. resolveForwardRefsAndPlaceholders(Placeholders); - upgradeCUSubprograms(); + upgradeDebugInfo(); // Return at the beginning of the block, since it is easy to skip it // entirely from there. Stream.ReadBlockEnd(); // Pop the abbrev block context. @@ -737,7 +790,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { return error("Malformed block"); case BitstreamEntry::EndBlock: resolveForwardRefsAndPlaceholders(Placeholders); - upgradeCUSubprograms(); + upgradeDebugInfo(); return Error::success(); case BitstreamEntry::Record: // The interesting case. @@ -768,13 +821,12 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata( unsigned ID, PlaceholderQueue &Placeholders) { assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size()); assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString"); -#ifndef NDEBUG // Lookup first if the metadata hasn't already been loaded. if (auto *MD = MetadataList.lookup(ID)) { auto *N = dyn_cast_or_null<MDNode>(MD); - assert(N && N->isTemporary() && "Lazy loading an already loaded metadata"); + if (!N->isTemporary()) + return; } -#endif SmallVector<uint64_t, 64> Record; StringRef Blob; IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]); @@ -827,8 +879,22 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( auto getMD = [&](unsigned ID) -> Metadata * { if (ID < MDStringRef.size()) return lazyLoadOneMDString(ID); - if (!IsDistinct) + if (!IsDistinct) { + if (auto *MD = MetadataList.lookup(ID)) + return MD; + // If lazy-loading is enabled, we try recursively to load the operand + // instead of creating a temporary. + if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) { + // Create a temporary for the node that is referencing the operand we + // will lazy-load. It is needed before recursing in case there are + // uniquing cycles. + MetadataList.getMetadataFwdRef(NextMetadataNo); + lazyLoadOneMetadata(ID, Placeholders); + return MetadataList.lookup(ID); + } + // Return a temporary. return MetadataList.getMetadataFwdRef(ID); + } if (auto *MD = MetadataList.getMetadataIfResolved(ID)) return MD; return &Placeholders.getPlaceholderOp(ID); @@ -893,7 +959,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // If this isn't a LocalAsMetadata record, we're dropping it. This used // to be legal, but there's no upgrade path. auto dropRecord = [&] { - MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++); + MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo); + NextMetadataNo++; }; if (Record.size() != 2) { dropRecord(); @@ -908,7 +975,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_OLD_NODE: { @@ -933,7 +1001,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( } else Elts.push_back(nullptr); } - MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++); + MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_VALUE: { @@ -946,7 +1015,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_DISTINCT_NODE: @@ -959,7 +1029,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Elts.push_back(getMDOrNull(ID)); MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts) : MDNode::get(Context, Elts), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_LOCATION: { @@ -973,7 +1044,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Metadata *InlinedAt = getMDOrNull(Record[4]); MetadataList.assignValue( GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_GENERIC_DEBUG: { @@ -993,7 +1065,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( DwarfOps.push_back(getMDOrNull(Record[I])); MetadataList.assignValue( GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_SUBRANGE: { @@ -1004,7 +1077,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DISubrange, (Context, Record[1], unrotateSign(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_ENUMERATOR: { @@ -1015,7 +1089,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]), getMDString(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_BASIC_TYPE: { @@ -1027,7 +1102,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DIBasicType, (Context, Record[1], getMDString(Record[2]), Record[3], Record[4], Record[5])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_DERIVED_TYPE: { @@ -1043,7 +1119,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getDITypeRefOrNull(Record[5]), getDITypeRefOrNull(Record[6]), Record[7], Record[8], Record[9], Flags, getDITypeRefOrNull(Record[11]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_COMPOSITE_TYPE: { @@ -1108,7 +1185,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (!IsNotUsedInTypeRef && Identifier) MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT)); - MetadataList.assignValue(CT, NextMetadataNo++); + MetadataList.assignValue(CT, NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_SUBROUTINE_TYPE: { @@ -1125,7 +1203,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } @@ -1139,7 +1218,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, getMDOrNull(Record[1]), getMDString(Record[2]), getMDString(Record[3]), getMDString(Record[4]), getMDString(Record[5]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } @@ -1155,7 +1235,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Record.size() == 3 ? DIFile::CSK_None : static_cast<DIFile::ChecksumKind>(Record[3]), Record.size() == 3 ? nullptr : getMDString(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_COMPILE_UNIT: { @@ -1174,7 +1255,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( Record.size() <= 14 ? 0 : Record[14], Record.size() <= 16 ? true : Record[16]); - MetadataList.assignValue(CU, NextMetadataNo++); + MetadataList.assignValue(CU, NextMetadataNo); + NextMetadataNo++; // Move the Upgrade the list of subprograms. if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11])) @@ -1221,7 +1303,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[16 + Offset]), // declaration getMDOrNull(Record[17 + Offset]) // variables )); - MetadataList.assignValue(SP, NextMetadataNo++); + MetadataList.assignValue(SP, NextMetadataNo); + NextMetadataNo++; // Upgrade sp->function mapping to function->sp mapping. if (HasFn) { @@ -1246,7 +1329,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DILexicalBlock, (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3], Record[4])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_LEXICAL_BLOCK_FILE: { @@ -1258,7 +1342,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DILexicalBlockFile, (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_NAMESPACE: { @@ -1272,7 +1357,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), getMDString(Record[3]), Record[4], ExportSymbols)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_MACRO: { @@ -1284,7 +1370,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DIMacro, (Context, Record[1], Record[2], getMDString(Record[3]), getMDString(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_MACRO_FILE: { @@ -1296,7 +1383,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( GET_OR_DISTINCT(DIMacroFile, (Context, Record[1], Record[2], getMDOrNull(Record[3]), getMDOrNull(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_TEMPLATE_TYPE: { @@ -1307,7 +1395,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter, (Context, getMDString(Record[1]), getDITypeRefOrNull(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_TEMPLATE_VALUE: { @@ -1320,7 +1409,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, Record[1], getMDString(Record[2]), getDITypeRefOrNull(Record[3]), getMDOrNull(Record[4]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_GLOBAL_VAR: { @@ -1338,7 +1428,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[4]), Record[5], getDITypeRefOrNull(Record[6]), Record[7], Record[8], getMDOrNull(Record[10]), Record[11])), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; } else if (Version == 0) { // Upgrade old metadata, which stored a global variable reference or a // ConstantInt here. @@ -1369,10 +1460,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getDITypeRefOrNull(Record[6]), Record[7], Record[8], getMDOrNull(Record[10]), AlignInBits)); - auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr); - MetadataList.assignValue(DGVE, NextMetadataNo++); + DIGlobalVariableExpression *DGVE = nullptr; + if (Attach || Expr) + DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr); + else + NeedUpgradeToDIGlobalVariableExpression = true; if (Attach) Attach->addDebugInfo(DGVE); + + auto *MDNode = Expr ? cast<Metadata>(DGVE) : cast<Metadata>(DGV); + MetadataList.assignValue(MDNode, NextMetadataNo); + NextMetadataNo++; } else return error("Invalid record"); @@ -1403,7 +1501,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag], getDITypeRefOrNull(Record[5 + HasTag]), Record[6 + HasTag], Flags, AlignInBits)), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_EXPRESSION: { @@ -1420,7 +1519,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue( GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_GLOBAL_VAR_EXPR: { @@ -1431,7 +1531,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression, (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_OBJC_PROPERTY: { @@ -1445,7 +1546,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( getMDOrNull(Record[2]), Record[3], getMDString(Record[4]), getMDString(Record[5]), Record[6], getDITypeRefOrNull(Record[7]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_IMPORTED_ENTITY: { @@ -1458,7 +1560,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( (Context, Record[1], getMDOrNull(Record[2]), getDITypeRefOrNull(Record[3]), Record[4], getMDString(Record[5]))), - NextMetadataNo++); + NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_STRING_OLD: { @@ -1468,13 +1571,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String); ++NumMDStringLoaded; Metadata *MD = MDString::get(Context, String); - MetadataList.assignValue(MD, NextMetadataNo++); + MetadataList.assignValue(MD, NextMetadataNo); + NextMetadataNo++; break; } case bitc::METADATA_STRINGS: { auto CreateNextMDString = [&](StringRef Str) { ++NumMDStringLoaded; - MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++); + MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo); + NextMetadataNo++; }; if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString)) return Err; @@ -1714,8 +1819,8 @@ bool MetadataLoader::hasFwdRefs() const { return Pimpl->hasFwdRefs(); } /// Return the given metadata, creating a replaceable forward reference if /// necessary. -Metadata *MetadataLoader::getMetadataFwdRef(unsigned Idx) { - return Pimpl->getMetadataFwdRef(Idx); +Metadata *MetadataLoader::getMetadataFwdRefOrLoad(unsigned Idx) { + return Pimpl->getMetadataFwdRefOrLoad(Idx); } MDNode *MetadataLoader::getMDNodeFwdRefOrNull(unsigned Idx) { diff --git a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h index 753846c6ead..442dfc94e4e 100644 --- a/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h +++ b/gnu/llvm/lib/Bitcode/Reader/MetadataLoader.h @@ -63,7 +63,7 @@ public: /// Return the given metadata, creating a replaceable forward reference if /// necessary. - Metadata *getMetadataFwdRef(unsigned Idx); + Metadata *getMetadataFwdRefOrLoad(unsigned Idx); MDNode *getMDNodeFwdRefOrNull(unsigned Idx); diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 9f6caa95a9e..24fdbfc901f 100644 --- a/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/gnu/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -567,6 +567,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->AddBlankLine(); } +/// Emit the directive and value for debug thread local expression +/// +/// \p Value - The value to emit. +/// \p Size - The size of the integer (in bytes) to emit. +void AsmPrinter::EmitDebugValue(const MCExpr *Value, + unsigned Size) const { + OutStreamer->EmitValue(Value, Size); +} + /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::EmitFunctionHeader() { diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index a8a3b30d5b6..87991899547 100644 --- a/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -484,7 +484,7 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form)); + AP->EmitDebugValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 0db623bbc29..d904372af58 100644 --- a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -522,22 +522,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, } // .. else use frame index. - if (DV.getFrameIndex().empty()) + if (!DV.hasFrameIndexExprs()) return VariableDie; - auto Expr = DV.getExpression().begin(); DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - for (auto FI : DV.getFrameIndex()) { + for (auto &Fragment : DV.getFrameIndexExprs()) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); - DwarfExpr.addFragmentOffset(*Expr); + int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); + DwarfExpr.addFragmentOffset(Fragment.Expr); DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), FrameReg, Offset); - DwarfExpr.AddExpression(*Expr); - ++Expr; + DwarfExpr.AddExpression(Fragment.Expr); } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index b465b98f368..91a3d0989cc 100644 --- a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -199,6 +199,15 @@ const DIType *DbgVariable::getType() const { return Ty; } +ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { + std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), + [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { + return A.Expr->getFragmentInfo()->OffsetInBits < + B.Expr->getFragmentInfo()->OffsetInBits; + }); + return FrameIndexExprs; +} + static const DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), diff --git a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index e5bf33db81f..253e3f06200 100644 --- a/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/gnu/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -54,7 +54,7 @@ class MachineModuleInfo; /// /// Variables can be created from allocas, in which case they're generated from /// the MMI table. Such variables can have multiple expressions and frame -/// indices. The \a Expr and \a FrameIndices array must match. +/// indices. /// /// Variables can be created from \c DBG_VALUE instructions. Those whose /// location changes over time use \a DebugLocListIndex, while those with a @@ -64,11 +64,16 @@ class MachineModuleInfo; class DbgVariable { const DILocalVariable *Var; /// Variable Descriptor. const DILocation *IA; /// Inlined at location. - SmallVector<const DIExpression *, 1> Expr; /// Complex address. DIE *TheDIE = nullptr; /// Variable DIE. unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs. const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction. - SmallVector<int, 1> FrameIndex; /// Frame index. + + struct FrameIndexExpr { + int FI; + const DIExpression *Expr; + }; + mutable SmallVector<FrameIndexExpr, 1> + FrameIndexExprs; /// Frame index + expression. public: /// Construct a DbgVariable. @@ -80,21 +85,18 @@ public: /// Initialize from the MMI table. void initializeMMI(const DIExpression *E, int FI) { - assert(Expr.empty() && "Already initialized?"); - assert(FrameIndex.empty() && "Already initialized?"); + assert(FrameIndexExprs.empty() && "Already initialized?"); assert(!MInsn && "Already initialized?"); assert((!E || E->isValid()) && "Expected valid expression"); assert(~FI && "Expected valid index"); - Expr.push_back(E); - FrameIndex.push_back(FI); + FrameIndexExprs.push_back({FI, E}); } /// Initialize from a DBG_VALUE instruction. void initializeDbgValue(const MachineInstr *DbgValue) { - assert(Expr.empty() && "Already initialized?"); - assert(FrameIndex.empty() && "Already initialized?"); + assert(FrameIndexExprs.empty() && "Already initialized?"); assert(!MInsn && "Already initialized?"); assert(Var == DbgValue->getDebugVariable() && "Wrong variable"); @@ -103,16 +105,15 @@ public: MInsn = DbgValue; if (auto *E = DbgValue->getDebugExpression()) if (E->getNumElements()) - Expr.push_back(E); + FrameIndexExprs.push_back({0, E}); } // Accessors. const DILocalVariable *getVariable() const { return Var; } const DILocation *getInlinedAt() const { return IA; } - ArrayRef<const DIExpression *> getExpression() const { return Expr; } const DIExpression *getSingleExpression() const { - assert(MInsn && Expr.size() <= 1); - return Expr.size() ? Expr[0] : nullptr; + assert(MInsn && FrameIndexExprs.size() <= 1); + return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } @@ -120,7 +121,9 @@ public: unsigned getDebugLocListIndex() const { return DebugLocListIndex; } StringRef getName() const { return Var->getName(); } const MachineInstr *getMInsn() const { return MInsn; } - ArrayRef<int> getFrameIndex() const { return FrameIndex; } + /// Get the FI entries, sorted by fragment offset. + ArrayRef<FrameIndexExpr> getFrameIndexExprs() const; + bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); } void addMMIEntry(const DbgVariable &V) { assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); @@ -128,16 +131,15 @@ public: assert(V.Var == Var && "conflicting variable"); assert(V.IA == IA && "conflicting inlined-at location"); - assert(!FrameIndex.empty() && "Expected an MMI entry"); - assert(!V.FrameIndex.empty() && "Expected an MMI entry"); - assert(Expr.size() == FrameIndex.size() && "Mismatched expressions"); - assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions"); + assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); + assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); - Expr.append(V.Expr.begin(), V.Expr.end()); - FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end()); - assert(all_of(Expr, [](const DIExpression *E) { - return E && E->isFragment(); - }) && "conflicting locations for variable"); + FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); + assert(all_of(FrameIndexExprs, + [](FrameIndexExpr &FIE) { + return FIE.Expr && FIE.Expr->isFragment(); + }) && + "conflicting locations for variable"); } // Translate tag to proper Dwarf tag. @@ -167,11 +169,11 @@ public: bool hasComplexAddress() const { assert(MInsn && "Expected DBG_VALUE, not MMI variable"); - assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable"); - assert( - (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) && - "Invalid Expr for DBG_VALUE"); - return !Expr.empty(); + assert((FrameIndexExprs.empty() || + (FrameIndexExprs.size() == 1 && + FrameIndexExprs[0].Expr->getNumElements())) && + "Invalid Expr for DBG_VALUE"); + return !FrameIndexExprs.empty(); } bool isBlockByrefVariable() const; const DIType *getType() const; diff --git a/gnu/llvm/lib/CodeGen/BranchFolding.cpp b/gnu/llvm/lib/CodeGen/BranchFolding.cpp index a898e327ccc..6fba161033b 100644 --- a/gnu/llvm/lib/CodeGen/BranchFolding.cpp +++ b/gnu/llvm/lib/CodeGen/BranchFolding.cpp @@ -49,7 +49,6 @@ STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); STATISTIC(NumHoist , "Number of times common instructions are hoisted"); -STATISTIC(NumTailCalls, "Number of tail calls optimized"); static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); @@ -1387,42 +1386,6 @@ ReoptimizeBlock: } } - if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && - MF.getFunction()->optForSize()) { - // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch - // direction, thereby defeating careful block placement and regressing - // performance. Therefore, only consider this for optsize functions. - MachineInstr &TailCall = *MBB->getFirstNonDebugInstr(); - if (TII->isUnconditionalTailCall(TailCall)) { - MachineBasicBlock *Pred = *MBB->pred_begin(); - MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; - SmallVector<MachineOperand, 4> PredCond; - bool PredAnalyzable = - !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true); - - if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) { - // The predecessor has a conditional branch to this block which consists - // of only a tail call. Try to fold the tail call into the conditional - // branch. - if (TII->canMakeTailCallConditional(PredCond, TailCall)) { - // TODO: It would be nice if analyzeBranch() could provide a pointer - // to the branch insturction so replaceBranchWithTailCall() doesn't - // have to search for it. - TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall); - ++NumTailCalls; - Pred->removeSuccessor(MBB); - MadeChange = true; - return MadeChange; - } - } - // If the predecessor is falling through to this block, we could reverse - // the branch condition and fold the tail call into that. However, after - // that we might have to re-arrange the CFG to fall through to the other - // block and there is a high risk of regressing code size rather than - // improving it. - } - } - // Analyze the branch in the current block. MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; SmallVector<MachineOperand, 4> CurCond; diff --git a/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp index e7c6b03f1a4..32c57e3e370 100644 --- a/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/gnu/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -707,9 +707,8 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. - SmallVector<LiveReg, 4> Regs; - for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { - int rx = *i; + SmallVector<const LiveReg *, 4> Regs; + for (int rx : used) { assert(LiveRegs && "no space allocated for live registers"); const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. @@ -718,16 +717,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; } // Sorted insertion. - bool Inserted = false; - for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end(); - i != e && !Inserted; ++i) { - if (LR.Def < i->Def) { - Inserted = true; - Regs.insert(i, LR); - } - } - if (!Inserted) - Regs.push_back(LR); + auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR, + [](const LiveReg *LHS, const LiveReg *RHS) { + return LHS->Def < RHS->Def; + }); + Regs.insert(I, &LR); } // doms are now sorted in order of appearance. Try to merge them all, giving @@ -735,14 +729,14 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { DomainValue *dv = nullptr; while (!Regs.empty()) { if (!dv) { - dv = Regs.pop_back_val().Value; + dv = Regs.pop_back_val()->Value; // Force the first dv to match the current instruction. dv->AvailableDomains = dv->getCommonDomains(available); assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } - DomainValue *Latest = Regs.pop_back_val().Value; + DomainValue *Latest = Regs.pop_back_val()->Value; // Skip already merged values. if (Latest == dv || Latest->Next) continue; diff --git a/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp index c8f79d7fb71..ec35b3f6449 100644 --- a/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/gnu/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -174,7 +174,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, - unsigned MaxFactor) { + unsigned MaxFactor, unsigned OpNumElts) { unsigned NumElts = Mask.size(); if (NumElts < 4) return false; @@ -246,6 +246,9 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, if (StartMask < 0) break; + // We must stay within the vectors; This case can happen with undefs. + if (StartMask + LaneLen > OpNumElts*2) + break; } // Found an interleaved mask of current factor. @@ -406,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore( // Check if the shufflevector is RE-interleave shuffle. unsigned Factor; - if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor)) + unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements(); + if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts)) return false; DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); diff --git a/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 92d043df26b..5de6dec29fb 100644 --- a/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/gnu/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -61,6 +61,7 @@ namespace { private: void ClobberRegister(unsigned Reg); + void ReadRegister(unsigned Reg); void CopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); @@ -120,6 +121,18 @@ void MachineCopyPropagation::ClobberRegister(unsigned Reg) { } } +void MachineCopyPropagation::ReadRegister(unsigned Reg) { + // If 'Reg' is defined by a copy, the copy is no longer a candidate + // for elimination. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + Reg2MIMap::iterator CI = CopyMap.find(*AI); + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); + MaybeDeadCopies.remove(CI->second); + } + } +} + /// Return true if \p PreviousCopy did copy register \p Src to register \p Def. /// This fact may have been obscured by sub register usage or may not be true at /// all even though Src and Def are subregisters of the registers used in @@ -212,12 +225,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If Src is defined by a previous copy, the previous copy cannot be // eliminated. - for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { - Reg2MIMap::iterator CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) { - DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump()); - MaybeDeadCopies.remove(CI->second); - } + ReadRegister(Src); + for (const MachineOperand &MO : MI->implicit_operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + ReadRegister(Reg); } DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); @@ -234,6 +249,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // ... // %xmm2<def> = copy %xmm9 ClobberRegister(Def); + for (const MachineOperand &MO : MI->implicit_operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + ClobberRegister(Reg); + } // Remember Def is defined by the copy. for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); @@ -268,17 +291,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { if (MO.isDef()) { Defs.push_back(Reg); - continue; - } - - // If 'Reg' is defined by a copy, the copy is no longer a candidate - // for elimination. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - Reg2MIMap::iterator CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) { - DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); - MaybeDeadCopies.remove(CI->second); - } + } else { + ReadRegister(Reg); } // Treat undef use like defs for copy propagation but not for // dead copy. We would need to do a liveness check to be sure the copy diff --git a/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp b/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp index 0f4bb59c49a..4bb3c229afc 100644 --- a/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/gnu/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1556,9 +1556,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { unsigned DstReg = CP.getDstReg(); + unsigned SrcReg = CP.getSrcReg(); assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(DstReg) && "Not a reserved register"); - LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + LiveInterval &RHS = LIS->getInterval(SrcReg); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); assert(RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1592,17 +1593,36 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // Delete the identity copy. MachineInstr *CopyMI; if (CP.isFlipped()) { - CopyMI = MRI->getVRegDef(RHS.reg); + // Physreg is copied into vreg + // %vregY = COPY %X + // ... //< no other def of %X here + // use %vregY + // => + // ... + // use %X + CopyMI = MRI->getVRegDef(SrcReg); } else { - if (!MRI->hasOneNonDBGUse(RHS.reg)) { + // VReg is copied into physreg: + // %vregX = def + // ... //< no other def or use of %Y here + // %Y = COPY %vregX + // => + // %Y = def + // ... + if (!MRI->hasOneNonDBGUse(SrcReg)) { DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); return false; } - MachineInstr *DestMI = MRI->getVRegDef(RHS.reg); - CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg); - const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); - const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot(); + if (!LIS->intervalIsInOneMBB(RHS)) { + DEBUG(dbgs() << "\t\tComplex control flow!\n"); + return false; + } + + MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); + CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); + SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); + SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); if (!MRI->isConstantPhysReg(DstReg)) { // We checked above that there are no interfering defs of the physical @@ -1629,8 +1649,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // We're going to remove the copy which defines a physical reserved // register, so remove its valno, etc. - DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at " - << CopyRegIdx << "\n"); + DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI) + << " at " << CopyRegIdx << "\n"); LIS->removePhysRegDefAt(DstReg, CopyRegIdx); // Create a new dead def at the new def location. diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 680f62fa91b..2c7bffe7650 100644 --- a/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8123,9 +8123,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8137,7 +8140,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1->getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL && N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8367,10 +8373,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8384,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); @@ -13060,9 +13072,15 @@ SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N, !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) return SDValue(); - if (InVT1 != InVT2) + // Legalizing INSERT_SUBVECTOR is tricky - you basically have to + // lower it back into a BUILD_VECTOR. So if the inserted type is + // illegal, don't even try. + if (InVT1 != InVT2) { + if (!TLI.isTypeLegal(InVT2)) + return SDValue(); VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } ShuffleNumElems = NumElems * 2; } else { // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3b91e58879b..4a9042cfb3f 100644 --- a/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/gnu/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -502,8 +502,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); - unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned Reg; + MachineInstr *DefMI; + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); + if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Reg = R->getReg(); + DefMI = nullptr; + } else { + Reg = getVR(Node->getOperand(0), VRBaseMap); + DefMI = MRI->getVRegDef(Reg); + } + unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && @@ -519,20 +528,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); MRI->clearKillFlags(SrcReg); } else { - // VReg may not support a SubIdx sub-register, and we may need to + // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - VReg = ConstrainForSubReg(VReg, SubIdx, - Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = ConstrainForSubReg(Reg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); + MachineInstrBuilder CopyMI = + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + CopyMI.addReg(Reg, 0, SubIdx); + else + CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 27a9ac337f2..6906f67ebac 100644 --- a/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/gnu/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3439,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LD->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); - if (L->getValueType(0).isVector()) { + if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { + // Later code assumes the vector loads produced will be mergeable, so we + // must pad the final entry up to the previous width. Scalars are + // combined separately. SmallVector<SDValue, 16> Loads; Loads.push_back(L); unsigned size = L->getValueSizeInBits(0); diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9ca646534e2..996c95bd5f0 100644 --- a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5832,6 +5832,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; diff --git a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6d717b44eb7..64e6c221229 100644 --- a/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/gnu/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2248,7 +2248,7 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { /// to use the new results. void SelectionDAGISel::UpdateChains( SDNode *NodeToMatch, SDValue InputChain, - const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { + SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { SmallVector<SDNode*, 4> NowDeadNodes; // Now that all the normal results are replaced, we replace the chain and @@ -2260,6 +2260,11 @@ void SelectionDAGISel::UpdateChains( // Replace all the chain results with the final chain we ended up with. for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { SDNode *ChainNode = ChainNodesMatched[i]; + // If ChainNode is null, it's because we replaced it on a previous + // iteration and we cleared it out of the map. Just skip it. + if (!ChainNode) + continue; + assert(ChainNode->getOpcode() != ISD::DELETED_NODE && "Deleted node left in chain"); @@ -2272,6 +2277,11 @@ void SelectionDAGISel::UpdateChains( if (ChainVal.getValueType() == MVT::Glue) ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); + SelectionDAG::DAGNodeDeletedListener NDL( + *CurDAG, [&](SDNode *N, SDNode *E) { + std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, + static_cast<SDNode *>(nullptr)); + }); CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. @@ -2772,14 +2782,15 @@ struct MatchScope { /// for this. class MatchStateUpdater : public SelectionDAG::DAGUpdateListener { - SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes; - SmallVectorImpl<MatchScope> &MatchScopes; + SDNode **NodeToMatch; + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes; + SmallVectorImpl<MatchScope> &MatchScopes; public: - MatchStateUpdater(SelectionDAG &DAG, - SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN, - SmallVectorImpl<MatchScope> &MS) : - SelectionDAG::DAGUpdateListener(DAG), - RecordedNodes(RN), MatchScopes(MS) { } + MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch, + SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN, + SmallVectorImpl<MatchScope> &MS) + : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch), + RecordedNodes(RN), MatchScopes(MS) {} void NodeDeleted(SDNode *N, SDNode *E) override { // Some early-returns here to avoid the search if we deleted the node or @@ -2789,6 +2800,9 @@ public: // update listener during matching a complex patterns. if (!E || E->isMachineOpcode()) return; + // Check if NodeToMatch was updated. + if (N == *NodeToMatch) + *NodeToMatch = E; // Performing linear search here does not matter because we almost never // run this code. You'd have to have a CSE during complex pattern // matching. @@ -3081,7 +3095,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // consistent. std::unique_ptr<MatchStateUpdater> MSU; if (ComplexPatternFuncMutatesDAG()) - MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes, MatchScopes)); if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, diff --git a/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index a14b86179d6..40537e4fa78 100644 --- a/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/gnu/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -150,13 +150,13 @@ static void computePrevailingCopies( } static StringMap<MemoryBufferRef> -generateModuleMap(const std::vector<MemoryBufferRef> &Modules) { +generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) { StringMap<MemoryBufferRef> ModuleMap; for (auto &ModuleBuffer : Modules) { assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) == ModuleMap.end() && "Expect unique Buffer Identifier"); - ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer; + ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer(); } return ModuleMap; } @@ -522,13 +522,13 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, } // end anonymous namespace void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { - MemoryBufferRef Buffer(Data, Identifier); + ThinLTOBuffer Buffer(Data, Identifier); if (Modules.empty()) { // First module added, so initialize the triple and some options LLVMContext Context; StringRef TripleStr; - ErrorOr<std::string> TripleOrErr = - expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer)); + ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); if (TripleOrErr) TripleStr = *TripleOrErr; Triple TheTriple(TripleStr); @@ -538,8 +538,8 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { else { LLVMContext Context; StringRef TripleStr; - ErrorOr<std::string> TripleOrErr = - expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer)); + ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); if (TripleOrErr) TripleStr = *TripleOrErr; assert(TMBuilder.TheTriple.str() == TripleStr && @@ -588,7 +588,8 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { uint64_t NextModuleId = 0; for (auto &ModuleBuffer : Modules) { Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(ModuleBuffer); + object::ModuleSummaryIndexObjectFile::create( + ModuleBuffer.getMemBuffer()); if (!ObjOrErr) { // FIXME diagnose logAllUnhandledErrors( @@ -829,11 +830,22 @@ static std::string writeGeneratedObject(int count, StringRef CacheEntryPath, // Main entry point for the ThinLTO processing void ThinLTOCodeGenerator::run() { + // Prepare the resulting object vector + assert(ProducedBinaries.empty() && "The generator should not be reused"); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries.resize(Modules.size()); + else { + sys::fs::create_directories(SavedObjectsDirectoryPath); + bool IsDir; + sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); + if (!IsDir) + report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); + ProducedBinaryFiles.resize(Modules.size()); + } + if (CodeGenOnly) { // Perform only parallel codegen and return. ThreadPool Pool; - assert(ProducedBinaries.empty() && "The generator should not be reused"); - ProducedBinaries.resize(Modules.size()); int count = 0; for (auto &ModuleBuffer : Modules) { Pool.async([&](int count) { @@ -841,11 +853,17 @@ void ThinLTOCodeGenerator::run() { Context.setDiscardValueNames(LTODiscardValueNames); // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false, - /*IsImporting*/ false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // CodeGen - ProducedBinaries[count] = codegen(*TheModule); + auto OutputBuffer = codegen(*TheModule); + if (SavedObjectsDirectoryPath.empty()) + ProducedBinaries[count] = std::move(OutputBuffer); + else + ProducedBinaryFiles[count] = writeGeneratedObject( + count, "", SavedObjectsDirectoryPath, *OutputBuffer); }, count++); } @@ -866,18 +884,6 @@ void ThinLTOCodeGenerator::run() { WriteIndexToFile(*Index, OS); } - // Prepare the resulting object vector - assert(ProducedBinaries.empty() && "The generator should not be reused"); - if (SavedObjectsDirectoryPath.empty()) - ProducedBinaries.resize(Modules.size()); - else { - sys::fs::create_directories(SavedObjectsDirectoryPath); - bool IsDir; - sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir); - if (!IsDir) - report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'"); - ProducedBinaryFiles.resize(Modules.size()); - } // Prepare the module map. auto ModuleMap = generateModuleMap(Modules); @@ -939,8 +945,8 @@ void ThinLTOCodeGenerator::run() { std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); std::sort(ModulesOrdering.begin(), ModulesOrdering.end(), [&](int LeftIndex, int RightIndex) { - auto LSize = Modules[LeftIndex].getBufferSize(); - auto RSize = Modules[RightIndex].getBufferSize(); + auto LSize = Modules[LeftIndex].getBuffer().size(); + auto RSize = Modules[RightIndex].getBuffer().size(); return LSize > RSize; }); @@ -992,8 +998,9 @@ void ThinLTOCodeGenerator::run() { } // Parse module now - auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false, - /*IsImporting*/ false); + auto TheModule = + loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false, + /*IsImporting*/ false); // Save temps: original file. saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); diff --git a/gnu/llvm/lib/MC/MCCodeView.cpp b/gnu/llvm/lib/MC/MCCodeView.cpp index 3773542cf8a..99a5c11a498 100644 --- a/gnu/llvm/lib/MC/MCCodeView.cpp +++ b/gnu/llvm/lib/MC/MCCodeView.cpp @@ -509,17 +509,17 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout, // are artificially constructing. size_t RecordSize = FixedSizePortion.size() + sizeof(LocalVariableAddrRange) + 4 * NumGaps; - // Write out the recrod size. - support::endian::Writer<support::little>(OS).write<uint16_t>(RecordSize); + // Write out the record size. + LEWriter.write<uint16_t>(RecordSize); // Write out the fixed size prefix. OS << FixedSizePortion; // Make space for a fixup that will eventually have a section relative // relocation pointing at the offset where the variable becomes live. Fixups.push_back(MCFixup::create(Contents.size(), BE, FK_SecRel_4)); - Contents.resize(Contents.size() + 4); // Fixup for code start. + LEWriter.write<uint32_t>(0); // Fixup for code start. // Make space for a fixup that will record the section index for the code. Fixups.push_back(MCFixup::create(Contents.size(), BE, FK_SecRel_2)); - Contents.resize(Contents.size() + 2); // Fixup for section index. + LEWriter.write<uint16_t>(0); // Fixup for section index. // Write down the range's extent. LEWriter.write<uint16_t>(Chunk); @@ -529,7 +529,7 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout, } while (RangeSize > 0); // Emit the gaps afterwards. - assert((NumGaps == 0 || Bias < MaxDefRange) && + assert((NumGaps == 0 || Bias <= MaxDefRange) && "large ranges should not have gaps"); unsigned GapStartOffset = GapAndRangeSizes[I].second; for (++I; I != J; ++I) { @@ -537,7 +537,7 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout, assert(I < GapAndRangeSizes.size()); std::tie(GapSize, RangeSize) = GapAndRangeSizes[I]; LEWriter.write<uint16_t>(GapStartOffset); - LEWriter.write<uint16_t>(RangeSize); + LEWriter.write<uint16_t>(GapSize); GapStartOffset += GapSize + RangeSize; } } diff --git a/gnu/llvm/lib/MC/MCMachOStreamer.cpp b/gnu/llvm/lib/MC/MCMachOStreamer.cpp index 45a497240b4..bd425bb7309 100644 --- a/gnu/llvm/lib/MC/MCMachOStreamer.cpp +++ b/gnu/llvm/lib/MC/MCMachOStreamer.cpp @@ -142,7 +142,8 @@ static bool canGoAfterDWARF(const MCSectionMachO &MSec) { if (SegName == "__TEXT" && SecName == "__eh_frame") return true; - if (SegName == "__DATA" && SecName == "__nl_symbol_ptr") + if (SegName == "__DATA" && (SecName == "__nl_symbol_ptr" || + SecName == "__thread_ptr")) return true; return false; diff --git a/gnu/llvm/lib/Target/AArch64/AArch64.td b/gnu/llvm/lib/Target/AArch64/AArch64.td index 740766b151b..91c335fac32 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64.td +++ b/gnu/llvm/lib/Target/AArch64/AArch64.td @@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; -def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs", - "AvoidQuadLdStPairs", "true", - "Do not form quad load/store pair operations">; +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", @@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M1 processors", - [FeatureAvoidQuadLdStPairs, + [FeatureSlowPaired128, FeatureCRC, FeatureCrypto, FeatureCustomCheapAsMoveHandling, @@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M2/M3 processors", - [FeatureAvoidQuadLdStPairs, + [FeatureSlowPaired128, FeatureCRC, FeatureCrypto, FeatureCustomCheapAsMoveHandling, diff --git a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 9058617768d..938779d2369 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/gnu/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -91,7 +91,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType<v2i32>>, CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, - CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>, + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, // Big endian vectors must be passed as if they were 1-element vectors so that // their lanes are in a consistent order. @@ -138,8 +138,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>, - // A SwiftError is passed in X19. - CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X19], [W19]>>>, + // A SwiftError is passed in X21. + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, @@ -289,7 +289,7 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X19)>; + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the diff --git a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5c8acba26aa..4c789926e3e 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/gnu/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { return false; // On some CPUs quad load/store pairs are slower than two single load/stores. - if (Subtarget.avoidQuadLdStPairs()) { + if (Subtarget.isPaired128Slow()) { switch (MI.getOpcode()) { default: break; diff --git a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8a76c42b589..8e312dcf276 100644 --- a/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/gnu/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -687,9 +687,30 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineInstrBuilder MIB; DebugLoc DL = I->getDebugLoc(); MachineBasicBlock *MBB = I->getParent(); + MachineOperand RegOp0 = getLdStRegOp(*RtMI); + MachineOperand RegOp1 = getLdStRegOp(*Rt2MI); + // Kill flags may become invalid when moving stores for pairing. + if (RegOp0.isUse()) { + if (!MergeForward) { + // Clear kill flags on store if moving upwards. Example: + // STRWui %w0, ... + // USE %w1 + // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards + RegOp0.setIsKill(false); + RegOp1.setIsKill(false); + } else { + // Clear kill flags of the first stores register. Example: + // STRWui %w1, ... + // USE kill %w1 ; need to clear kill flag when moving STRWui downwards + // STRW %w0 + unsigned Reg = getLdStRegOp(*I).getReg(); + for (MachineInstr &MI : make_range(std::next(I), Paired)) + MI.clearRegisterKills(Reg, TRI); + } + } MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc))) - .addOperand(getLdStRegOp(*RtMI)) - .addOperand(getLdStRegOp(*Rt2MI)) + .addOperand(RegOp0) + .addOperand(RegOp1) .addOperand(BaseRegOp) .addImm(OffsetImm) .setMemRefs(I->mergeMemRefsWith(*Paired)); @@ -832,9 +853,11 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, .addImm(Imms); } } - StoreI->clearRegisterKills(StRt, TRI); - (void)BitExtMI; + // Clear kill flags between store and load. + for (MachineInstr &MI : make_range(StoreI->getIterator(), + BitExtMI->getIterator())) + MI.clearRegisterKills(StRt, TRI); DEBUG(dbgs() << "Promoting load by replacing :\n "); DEBUG(StoreI->print(dbgs())); diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td index 0b2badff7cc..13022009af1 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "Enable SI Machine Scheduler" >; +// Unless +-flat-for-global is specified, turn on FlatForGlobal for +// all OS-es on VI and newer hardware to avoid assertion failures due +// to missing ADDR64 variants of MUBUF instructions. +// FIXME: moveToVALU should be able to handle converting addr64 MUBUF +// instructions. + def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "FlatForGlobal", "true", diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 4acd55eb612..974e79fff3d 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; - if (STM.isAmdCodeObjectV2()) { + if (STM.isAmdCodeObjectV2(*MF)) { getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } @@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); - if (MFI->isKernel() && STM.isAmdCodeObjectV2()) { + if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { AMDGPUTargetStreamer *TS = static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); SmallString<128> SymbolName; @@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, // FIXME: Should use getKernArgSize header.kernarg_segment_byte_size = - STM.getKernArgSegmentSize(MFI->getABIArgOffset()); + STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 2b4fc5397b1..5bf347e4865 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { unsigned Opc = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; - // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, - // omod - SDValue Ops[8]; - - SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); - SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); - SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); - CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e48c1943cb0..54caa2c5dfa 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, SDLoc SL(N); switch (Opc) { case ISD::FADD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) SDValue LHS = N0.getOperand(0); SDValue RHS = N0.getOperand(1); @@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, } case ISD::FMA: case ISD::FMAD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) SDValue LHS = N0.getOperand(0); SDValue MHS = N0.getOperand(1); @@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(KILL) + NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(SENDMSGHALT) diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 69567aa5f71..f6adceac6f1 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -119,6 +119,16 @@ protected: public: AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); + bool mayIgnoreSignedZero(SDValue Op) const { + if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only + return true; + + if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op)) + return BO->Flags.hasNoSignedZeros(); + + return false; + } + bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; bool isTruncateFree(EVT Src, EVT Dest) const override; @@ -320,6 +330,7 @@ enum NodeType : unsigned { INTERP_P2, PC_ADD_REL_OFFSET, KILL, + DUMMY_CHAIN, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index f079c8d0c70..d7fa28bdc00 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode< // This argument to this node is a dword address. def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; +// Force dependencies for vector trunc stores +def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; + def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 74851aedbb2..c35a67de1d7 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, ParseSubtargetFeatures(GPU, FullFS); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es + // on VI and newer hardware to avoid assertion failures due to missing ADDR64 + // variants of MUBUF instructions. + if (!hasAddr64() && !FS.contains("flat-for-global")) { + FlatForGlobal = true; + } + // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? @@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const { - unsigned ImplicitBytes = getImplicitArgNumBytes(); +unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, + unsigned ExplicitArgBytes) const { + unsigned ImplicitBytes = getImplicitArgNumBytes(MF); if (ImplicitBytes == 0) return ExplicitArgBytes; diff --git a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 51ba501bddd..0e3cb7dc1f8 100644 --- a/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/gnu/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -311,22 +311,31 @@ public: return EnableXNACK; } - bool isAmdCodeObjectV2() const { - return isAmdHsaOS() || isMesa3DOS(); + bool isMesaKernel(const MachineFunction &MF) const { + return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } + + // Covers VS/PS/CS graphics shaders + bool isMesaGfxShader(const MachineFunction &MF) const { + return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv()); + } + + bool isAmdCodeObjectV2(const MachineFunction &MF) const { + return isAmdHsaOS() || isMesaKernel(MF); } /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. - unsigned getExplicitKernelArgOffset() const { - return isAmdCodeObjectV2() ? 0 : 36; + unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { + return isAmdCodeObjectV2(MF) ? 0 : 36; } unsigned getAlignmentForImplicitArgPtr() const { return isAmdHsaOS() ? 8 : 4; } - unsigned getImplicitArgNumBytes() const { - if (isMesa3DOS()) + unsigned getImplicitArgNumBytes(const MachineFunction &MF) const { + if (isMesaKernel(MF)) return 16; if (isAmdHsaOS() && isOpenCLEnv()) return 32; @@ -585,7 +594,7 @@ public: return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } - unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const; + unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const; /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; diff --git a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index de7ce5cb9e4..77fee4356b6 100644 --- a/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, llvm_unreachable("Unsupported private trunc store"); } - SDValue Chain = Store->getChain(); + SDValue OldChain = Store->getChain(); + bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); + // Skip dummy + SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; SDValue BasePtr = Store->getBasePtr(); SDValue Offset = Store->getOffset(); EVT MemVT = Store->getMemoryVT(); @@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, // Store dword // TODO: Can we be smarter about MachinePointerInfo? - return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + + // If we are part of expanded vector, make our neighbors depend on this store + if (VectorTrunc) { + // Make all other vector elements depend on this store + Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); + DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); + } + return NewStore; } SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Neither LOCAL nor PRIVATE can do vectors at the moment if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && VT.isVector()) { + if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + // Add an extra level of chain to isolate this vector + SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); + // TODO: can the chain be replaced without creating a new store? + SDValue NewStore = DAG.getTruncStore( + NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), + MemVT, StoreNode->getAlignment(), + StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); + StoreNode = cast<StoreSDNode>(NewStore); + } + return scalarizeVectorStore(StoreNode, DAG); } @@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // Put the mask in correct place SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); - // Put the mask in correct place + // Put the value bits in correct place SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); @@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments( unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset(); + unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( diff --git a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td index 19795bdde64..9210e66b0fe 100644 --- a/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; def MOV : R600_1OP <0x19, "MOV", []>; + +// This is a hack to get rid of DUMMY_CHAIN nodes. +// Most DUMMY_CHAINs should be eliminated during legalization, but undef +// values can sneak in some to selection. +let isPseudo = 1, isCodeGenOnly = 1 in { +def DUMMY_CHAIN : AMDGPUInst < + (outs), + (ins), + "DUMMY_CHAIN", + [(R600dummy_chain)] +>; +} // end let isPseudo = 1, isCodeGenOnly = 1 + + let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < diff --git a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index d0a69eafc58..0b571551588 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } @@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) { - assert(ST.isAmdCodeObjectV2()); + assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)); MRI.addLiveIn(PreloadedPrivateBufferReg); MBB.addLiveIn(PreloadedPrivateBufferReg); } @@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, bool CopyBuffer = ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister && + ST.isAmdCodeObjectV2(MF) && ScratchRsrcReg != PreloadedPrivateBufferReg; // This needs to be careful of the copying order to avoid overwriting one of @@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .addReg(PreloadedPrivateBufferReg, RegState::Kill); } - if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) { - assert(!ST.isAmdCodeObjectV2()); + if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) { + assert(!ST.isAmdCodeObjectV2(MF)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - BuildMI(MBB, I, DL, SMovB32, Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, SMovB32, Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + if (MFI->hasPrivateMemoryInputPtr()) { + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + + if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); + + BuildMI(MBB, I, DL, Mov64, Rsrc01) + .addReg(PreloadedPrivateBufferReg) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } else { + const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); + + PointerType *PtrTy = + PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), + AMDGPUAS::CONSTANT_ADDRESS); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + auto MMO = MF.getMachineMemOperand(PtrInfo, + MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable, + 0, 0); + BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) + .addReg(PreloadedPrivateBufferReg) + .addImm(0) // offset + .addImm(0) // glc + .addMemOperand(MMO) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } + } else { + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + + BuildMI(MBB, I, DL, SMovB32, Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + } BuildMI(MBB, I, DL, SMovB32, Rsrc2) .addImm(Rsrc23 & 0xffffffff) diff --git a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9140fe6cd14..b98f9f400ee 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (!AMDGPU::isShader(CallConv)) { assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); } else { - assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() && + assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && @@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments( !Info->hasWorkItemIDZ()); } + if (Info->hasPrivateMemoryInputPtr()) { + unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI); + MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(PrivateMemoryPtrReg); + } + // FIXME: How should these inputs interact with inreg / custom SGPR inputs? if (Info->hasPrivateSegmentBuffer()) { unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); @@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (VA.isMemLoc()) { VT = Ins[i].VT; EVT MemVT = VA.getLocVT(); - const unsigned Offset = Subtarget->getExplicitKernelArgOffset() + + const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. @@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments( if (getTargetMachine().getOptLevel() == CodeGenOpt::None) HasStackObjects = true; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects) { // If we have stack objects, we unquestionably need the private buffer // resource. For the Code Object V2 ABI, this will be the first 4 user @@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // TODO: Should this propagate fast-math-flags? switch (IntrinsicID) { + case Intrinsic::amdgcn_implicit_buffer_ptr: { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + } case Intrinsic::amdgcn_dispatch_ptr: case Intrinsic::amdgcn_queue_ptr: { - if (!Subtarget->isAmdCodeObjectV2()) { + if (!Subtarget->isAmdCodeObjectV2(MF)) { DiagnosticInfoUnsupported BadIntrin( *MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); diff --git a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td index b86c0419118..38e31e75ee6 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -997,6 +997,11 @@ def : Pat < >; def : Pat < + (i1 (trunc i16:$a)), + (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) +>; + +def : Pat < (i1 (trunc i64:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index e911817c451..ecd46b95ca6 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), - WorkItemIDZ(false) { + WorkItemIDZ(false), + PrivateMemoryInputPtr(false) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const Function *F = MF.getFunction(); @@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || MaySpill) PrivateSegmentWaveByteOffset = true; - if (ST.isAmdCodeObjectV2()) { + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F->hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; + } else if (ST.isMesaGfxShader(MF)) { + if (HasStackObjects || MaySpill) + PrivateMemoryInputPtr = true; } // We don't need to worry about accessing spills with flat instructions. @@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { return FlatScratchInitUserSGPR; } +unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { + PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return PrivateMemoryPtrUserSGPR; +} + SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( MachineFunction *MF, unsigned FrameIndex, diff --git a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 3b4e233cd78..6fc8d18bceb 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/gnu/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned ScratchRSrcReg; unsigned ScratchWaveOffsetReg; + // Input registers for non-HSA ABI + unsigned PrivateMemoryPtrUserSGPR; + // Input registers setup for the HSA ABI. // User SGPRs in allocation order. unsigned PrivateSegmentBufferUserSGPR; @@ -163,6 +166,11 @@ private: bool WorkItemIDY : 1; bool WorkItemIDZ : 1; + // Private memory buffer + // Compute directly in sgpr[0:1] + // Other shaders indirect 64-bits at sgpr[0:1] + bool PrivateMemoryInputPtr : 1; + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -198,6 +206,7 @@ public: unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); + unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI); // Add system SGPRs. unsigned addWorkGroupIDX() { @@ -302,6 +311,10 @@ public: return WorkItemIDZ; } + bool hasPrivateMemoryInputPtr() const { + return PrivateMemoryInputPtr; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } @@ -338,6 +351,10 @@ public: return QueuePtrUserSGPR; } + unsigned getPrivateMemoryPtrUserSGPR() const { + return PrivateMemoryPtrUserSGPR; + } + bool hasSpilledSGPRs() const { return HasSpilledSGPRs; } diff --git a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 8c4b24a4504..a1ed5e8441d 100644 --- a/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/gnu/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: - assert(ST.isAmdCodeObjectV2() && - "Non-CodeObjectV2 ABI currently uses relocations"); - assert(MFI->hasPrivateSegmentBuffer()); - return MFI->PrivateSegmentBufferUserSGPR; + if (ST.isAmdCodeObjectV2(MF)) { + assert(MFI->hasPrivateSegmentBuffer()); + return MFI->PrivateSegmentBufferUserSGPR; + } + assert(MFI->hasPrivateMemoryInputPtr()); + return MFI->PrivateMemoryPtrUserSGPR; case SIRegisterInfo::KERNARG_SEGMENT_PTR: assert(MFI->hasKernargSegmentPtr()); return MFI->KernargSegmentPtrUserSGPR; diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td index a15b9ceff2f..8cae83cd9d1 100644 --- a/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -607,12 +607,6 @@ def : Pat< (COPY $src) >; -def : Pat< - (i1 (trunc i16:$src)), - (COPY $src) ->; - - def : Pat < (i16 (trunc i64:$src)), (EXTRACT_SUBREG $src, sub0) diff --git a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 5efa64d25ce..c2a4d4ba99b 100644 --- a/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/gnu/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> { } class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { + // v_div_scale_{f32|f64} do not support input modifiers. + let HasModifiers = 0; let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; + let Asm64 = " $vdst, $sdst, $src0, $src1, $src2"; } def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> { @@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { let SchedRW = [WriteFloatFMA, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } // Double precision division pre-scale. def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { let SchedRW = [WriteDouble, WriteSALU]; let hasExtraSrcRegAllocReq = 1; + let AsmMatchConverter = ""; } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>; diff --git a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 8ec9cb02813..95db35ce8ff 100644 --- a/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); + // Emit the XRay table for this function. + emitXRayTable(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. diff --git a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td index 9c278a52a7f..7a7b7fede7c 100644 --- a/gnu/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/gnu/llvm/lib/Target/ARM/ARMCallingConv.td @@ -26,8 +26,8 @@ def CC_ARM_APCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, @@ -51,8 +51,8 @@ def RetCC_ARM_APCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, @@ -166,8 +166,8 @@ def CC_ARM_AAPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, @@ -182,8 +182,8 @@ def RetCC_ARM_AAPCS : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, @@ -206,8 +206,8 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is passed in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is passed in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, // HFAs are passed in a contiguous block of registers, or on the stack CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, @@ -227,8 +227,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>, - // A SwiftError is returned in R6. - CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R6]>>>, + // A SwiftError is returned in R8. + CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -267,8 +267,8 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; -// R6 is used to pass swifterror, remove it from CSR. -def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R6)>; +// R8 is used to pass swifterror, remove it from CSR. +def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; diff --git a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 95fcc8dcb45..baa4e0330cf 100644 --- a/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/gnu/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1225,16 +1225,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::tTPsoft: case ARM::TPsoft: { + const bool Thumb = Opcode == ARM::tTPsoft; + MachineInstrBuilder MIB; - if (Opcode == ARM::tTPsoft) + if (STI->genLongCalls()) { + MachineFunction *MF = MBB.getParent(); + MachineConstantPool *MCP = MF->getConstantPool(); + unsigned PCLabelID = AFI->createPICLabelUId(); + MachineConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(), + "__aeabi_read_tp", PCLabelID, 0); + unsigned Reg = MI.getOperand(0).getReg(); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get( ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__aeabi_read_tp", 0); - else + TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) + .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); + if (!Thumb) + MIB.addImm(0); + MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get( ARM::BL)) - .addExternalSymbol("__aeabi_read_tp", 0); + TII->get(Thumb ? ARM::tBLXr : ARM::BLX)); + if (Thumb) + MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.addReg(Reg, RegState::Kill); + } else { + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Thumb ? ARM::tBL : ARM::BL)); + if (Thumb) + MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.addExternalSymbol("__aeabi_read_tp", 0); + } MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp index 32b7c87e61b..0f84a235916 100644 --- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -97,171 +97,6 @@ namespace { }; } -void ARMTargetLowering::InitLibcallCallingConvs() { - // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or - // AAPCS_VFP. - for (const auto LC : { - RTLIB::SHL_I16, - RTLIB::SHL_I32, - RTLIB::SHL_I64, - RTLIB::SHL_I128, - RTLIB::SRL_I16, - RTLIB::SRL_I32, - RTLIB::SRL_I64, - RTLIB::SRL_I128, - RTLIB::SRA_I16, - RTLIB::SRA_I32, - RTLIB::SRA_I64, - RTLIB::SRA_I128, - RTLIB::MUL_I8, - RTLIB::MUL_I16, - RTLIB::MUL_I32, - RTLIB::MUL_I64, - RTLIB::MUL_I128, - RTLIB::MULO_I32, - RTLIB::MULO_I64, - RTLIB::MULO_I128, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, - RTLIB::SDIV_I32, - RTLIB::SDIV_I64, - RTLIB::SDIV_I128, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, - RTLIB::UDIV_I32, - RTLIB::UDIV_I64, - RTLIB::UDIV_I128, - RTLIB::SREM_I8, - RTLIB::SREM_I16, - RTLIB::SREM_I32, - RTLIB::SREM_I64, - RTLIB::SREM_I128, - RTLIB::UREM_I8, - RTLIB::UREM_I16, - RTLIB::UREM_I32, - RTLIB::UREM_I64, - RTLIB::UREM_I128, - RTLIB::SDIVREM_I8, - RTLIB::SDIVREM_I16, - RTLIB::SDIVREM_I32, - RTLIB::SDIVREM_I64, - RTLIB::SDIVREM_I128, - RTLIB::UDIVREM_I8, - RTLIB::UDIVREM_I16, - RTLIB::UDIVREM_I32, - RTLIB::UDIVREM_I64, - RTLIB::UDIVREM_I128, - RTLIB::NEG_I32, - RTLIB::NEG_I64, - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_F128, - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_F128, - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_F128, - RTLIB::FPEXT_F64_F128, - RTLIB::FPEXT_F32_F128, - RTLIB::FPEXT_F32_F64, - RTLIB::FPEXT_F16_F32, - RTLIB::FPROUND_F32_F16, - RTLIB::FPROUND_F64_F16, - RTLIB::FPROUND_F80_F16, - RTLIB::FPROUND_F128_F16, - RTLIB::FPROUND_F64_F32, - RTLIB::FPROUND_F80_F32, - RTLIB::FPROUND_F128_F32, - RTLIB::FPROUND_F80_F64, - RTLIB::FPROUND_F128_F64, - RTLIB::FPTOSINT_F32_I32, - RTLIB::FPTOSINT_F32_I64, - RTLIB::FPTOSINT_F32_I128, - RTLIB::FPTOSINT_F64_I32, - RTLIB::FPTOSINT_F64_I64, - RTLIB::FPTOSINT_F64_I128, - RTLIB::FPTOSINT_F80_I32, - RTLIB::FPTOSINT_F80_I64, - RTLIB::FPTOSINT_F80_I128, - RTLIB::FPTOSINT_F128_I32, - RTLIB::FPTOSINT_F128_I64, - RTLIB::FPTOSINT_F128_I128, - RTLIB::FPTOUINT_F32_I32, - RTLIB::FPTOUINT_F32_I64, - RTLIB::FPTOUINT_F32_I128, - RTLIB::FPTOUINT_F64_I32, - RTLIB::FPTOUINT_F64_I64, - RTLIB::FPTOUINT_F64_I128, - RTLIB::FPTOUINT_F80_I32, - RTLIB::FPTOUINT_F80_I64, - RTLIB::FPTOUINT_F80_I128, - RTLIB::FPTOUINT_F128_I32, - RTLIB::FPTOUINT_F128_I64, - RTLIB::FPTOUINT_F128_I128, - RTLIB::SINTTOFP_I32_F32, - RTLIB::SINTTOFP_I32_F64, - RTLIB::SINTTOFP_I32_F80, - RTLIB::SINTTOFP_I32_F128, - RTLIB::SINTTOFP_I64_F32, - RTLIB::SINTTOFP_I64_F64, - RTLIB::SINTTOFP_I64_F80, - RTLIB::SINTTOFP_I64_F128, - RTLIB::SINTTOFP_I128_F32, - RTLIB::SINTTOFP_I128_F64, - RTLIB::SINTTOFP_I128_F80, - RTLIB::SINTTOFP_I128_F128, - RTLIB::UINTTOFP_I32_F32, - RTLIB::UINTTOFP_I32_F64, - RTLIB::UINTTOFP_I32_F80, - RTLIB::UINTTOFP_I32_F128, - RTLIB::UINTTOFP_I64_F32, - RTLIB::UINTTOFP_I64_F64, - RTLIB::UINTTOFP_I64_F80, - RTLIB::UINTTOFP_I64_F128, - RTLIB::UINTTOFP_I128_F32, - RTLIB::UINTTOFP_I128_F64, - RTLIB::UINTTOFP_I128_F80, - RTLIB::UINTTOFP_I128_F128, - RTLIB::OEQ_F32, - RTLIB::OEQ_F64, - RTLIB::OEQ_F128, - RTLIB::UNE_F32, - RTLIB::UNE_F64, - RTLIB::UNE_F128, - RTLIB::OGE_F32, - RTLIB::OGE_F64, - RTLIB::OGE_F128, - RTLIB::OLT_F32, - RTLIB::OLT_F64, - RTLIB::OLT_F128, - RTLIB::OLE_F32, - RTLIB::OLE_F64, - RTLIB::OLE_F128, - RTLIB::OGT_F32, - RTLIB::OGT_F64, - RTLIB::OGT_F128, - RTLIB::UO_F32, - RTLIB::UO_F64, - RTLIB::UO_F128, - RTLIB::O_F32, - RTLIB::O_F64, - RTLIB::O_F128, - }) - setLibcallCallingConv(LC, CallingConv::ARM_AAPCS); -} - // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 @@ -349,7 +184,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - InitLibcallCallingConvs(); + if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && + !Subtarget->isTargetWatchOS()) { + const auto &E = Subtarget->getTargetTriple().getEnvironment(); + + bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF || + E == Triple::MuslEABIHF; + // Windows is a special case. Technically, we will replace all of the "GNU" + // calls with calls to MSVCRT if appropriate and adjust the calling + // convention then. + IsHFTarget = IsHFTarget || Subtarget->isTargetWindows(); + + for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) + setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), + IsHFTarget ? CallingConv::ARM_AAPCS_VFP + : CallingConv::ARM_AAPCS); + } if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. @@ -1937,7 +1787,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { + if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && + Outs[0].VT == MVT::i32) { assert(VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i32 && @@ -3176,17 +3027,20 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); } +static bool isReadOnly(const GlobalValue *GV) { + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->getBaseObject(); + return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) || + isa<Function>(GV); +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); const TargetMachine &TM = getTargetMachine(); - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GV = GA->getBaseObject(); - bool IsRO = - (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) || - isa<Function>(GV); + bool IsRO = isReadOnly(GV); // promoteToConstantPool only if not generating XO text section if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) @@ -7721,11 +7575,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: - if (Subtarget->isTargetWindows()) + if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); return LowerSDIV(Op, DAG); case ISD::UDIV: - if (Subtarget->isTargetWindows()) + if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); return LowerUDIV(Op, DAG); case ISD::ADDC: diff --git a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h index 7a7f91f4d3c..84c6eb845bb 100644 --- a/gnu/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/gnu/llvm/lib/Target/ARM/ARMISelLowering.h @@ -538,8 +538,6 @@ class InstrItineraryData; bool HasStandaloneRem = true; - void InitLibcallCallingConvs(); - void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); diff --git a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 97ca11ca501..d054578deb6 100644 --- a/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/gnu/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -413,6 +413,7 @@ public: Match_RequiresDifferentOperands, Match_RequiresNoZeroRegister, Match_RequiresSameSrcAndDst, + Match_NoFCCRegisterForCurrentISA, Match_NonZeroOperandForSync, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "MipsGenAsmMatcher.inc" @@ -1461,8 +1462,6 @@ public: bool isFCCAsmReg() const { if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC)) return false; - if (!AsmParser.hasEightFccRegisters()) - return RegIdx.Index == 0; return RegIdx.Index <= 7; } bool isACCAsmReg() const { @@ -4053,6 +4052,7 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, return Match_RequiresSameSrcAndDst; } } + unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { switch (Inst.getOpcode()) { // As described by the MIPSR6 spec, daui must not use the zero operand for @@ -4131,9 +4131,15 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) return Match_RequiresDifferentOperands; return Match_Success; - default: - return Match_Success; } + + uint64_t TSFlags = getInstDesc(Inst.getOpcode()).TSFlags; + if ((TSFlags & MipsII::HasFCCRegOperand) && + (Inst.getOperand(0).getReg() != Mips::FCC0) && !hasEightFccRegisters()) + return Match_NoFCCRegisterForCurrentISA; + + return Match_Success; + } static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, @@ -4191,6 +4197,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "invalid operand ($zero) for instruction"); case Match_RequiresSameSrcAndDst: return Error(IDLoc, "source and destination must match"); + case Match_NoFCCRegisterForCurrentISA: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "non-zero fcc register doesn't exist in current ISA level"); case Match_Immz: return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'"); case Match_UImm1_0: diff --git a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 35de7b27bf1..a90db2384c4 100644 --- a/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/gnu/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -123,7 +123,9 @@ namespace MipsII { HasForbiddenSlot = 1 << 5, /// IsPCRelativeLoad - A Load instruction with implicit source register /// ($pc) with explicit offset and destination register - IsPCRelativeLoad = 1 << 6 + IsPCRelativeLoad = 1 << 6, + /// HasFCCRegOperand - Instruction uses an $fcc<x> register. + HasFCCRegOperand = 1 << 7 }; } diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td index fc83761e409..5600f71ff68 100644 --- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFPU.td @@ -27,9 +27,20 @@ def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6; def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, - CEQS_FM_MM<0>; + CEQS_FM_MM<0> { + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; +} + def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, - CEQS_FM_MM<1>; + CEQS_FM_MM<1> { + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; +} def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6; @@ -164,6 +175,98 @@ let AdditionalPredicates = [InMicroMips] in { def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>, LW_FM_MM<0x26>; } + + multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt, + InstrItinClass itin> { + def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 0> { + let BaseOpcode = "c.f."#NAME; + let isCommutable = 1; + } + def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 1> { + let BaseOpcode = "c.un."#NAME; + let isCommutable = 1; + } + def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 2> { + let BaseOpcode = "c.eq."#NAME; + let isCommutable = 1; + } + def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 3> { + let BaseOpcode = "c.ueq."#NAME; + let isCommutable = 1; + } + def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 4> { + let BaseOpcode = "c.olt."#NAME; + } + def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 5> { + let BaseOpcode = "c.ult."#NAME; + } + def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 6> { + let BaseOpcode = "c.ole."#NAME; + } + def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 7> { + let BaseOpcode = "c.ule."#NAME; + } + def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 8> { + let BaseOpcode = "c.sf."#NAME; + let isCommutable = 1; + } + def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 9> { + let BaseOpcode = "c.ngle."#NAME; + } + def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 10> { + let BaseOpcode = "c.seq."#NAME; + let isCommutable = 1; + } + def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 11> { + let BaseOpcode = "c.ngl."#NAME; + } + def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 12> { + let BaseOpcode = "c.lt."#NAME; + } + def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 13> { + let BaseOpcode = "c.nge."#NAME; + } + def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 14> { + let BaseOpcode = "c.le."#NAME; + } + def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, + C_COND_FM_MM<fmt, 15> { + let BaseOpcode = "c.ngt."#NAME; + } + } + + defm S : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>, + ISA_MIPS1_NOT_32R6_64R6; + defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>, + ISA_MIPS1_NOT_32R6_64R6, FGR_32; + let DecoderNamespace = "Mips64" in + defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>, + ISA_MIPS1_NOT_32R6_64R6, FGR_64; + + defm S_MM : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6; + defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_32; + defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_64; + + defm : BC1_ALIASES<BC1T_MM, "bc1t", BC1F_MM, "bc1f">, + ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; } //===----------------------------------------------------------------------===// diff --git a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td index 8b595f9e6c4..774976828a0 100644 --- a/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/gnu/llvm/lib/Target/Mips/MicroMipsInstrFormats.td @@ -766,6 +766,7 @@ class SWXC1_FM_MM<bits<9> funct> : MMArch { class CEQS_FM_MM<bits<2> fmt> : MMArch { bits<5> fs; bits<5> ft; + bits<3> fcc; bits<4> cond; bits<32> Inst; @@ -773,13 +774,17 @@ class CEQS_FM_MM<bits<2> fmt> : MMArch { let Inst{31-26} = 0x15; let Inst{25-21} = ft; let Inst{20-16} = fs; - let Inst{15-13} = 0x0; // cc + let Inst{15-13} = fcc; let Inst{12} = 0; let Inst{11-10} = fmt; let Inst{9-6} = cond; let Inst{5-0} = 0x3c; } +class C_COND_FM_MM<bits <2> fmt, bits<4> c> : CEQS_FM_MM<fmt> { + let cond = c; +} + class BC1F_FM_MM<bits<5> tf> : MMArch { bits<16> offset; diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 179695bc698..04d6529a073 100644 --- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -1037,6 +1037,22 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, // TODO: implement } +// Emit .dtprelword or .dtpreldword directive +// and value for debug thread local expression. +void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, + unsigned Size) const { + switch (Size) { + case 4: + OutStreamer->EmitDTPRel32Value(Value); + break; + case 8: + OutStreamer->EmitDTPRel64Value(Value); + break; + default: + llvm_unreachable("Unexpected size of expression value."); + } +} + // Align all targets of indirect branches on bundle size. Used only if target // is NaCl. void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) { diff --git a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h index 259e557e128..c5cf5241c23 100644 --- a/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h +++ b/gnu/llvm/lib/Target/Mips/MipsAsmPrinter.h @@ -140,6 +140,7 @@ public: void EmitStartOfAsmFile(Module &M) override; void EmitEndOfAsmFile(Module &M) override; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + void EmitDebugValue(const MCExpr *Value, unsigned Size) const override; }; } diff --git a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp index 29f3e2c07e0..a44192f57aa 100644 --- a/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/gnu/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -698,8 +698,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) { unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0); emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1); - emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg( - Mips::FCC0, RegState::ImplicitDefine); + emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg) + .addReg(RightReg); emitInst(CondMovOpc, ResultReg) .addReg(RegWithOne) .addReg(Mips::FCC0) diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td index ab7aa9dcdca..df42d56d041 100644 --- a/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td +++ b/gnu/llvm/lib/Target/Mips/MipsInstrFPU.td @@ -219,6 +219,7 @@ class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin, let isTerminator = 1; let hasDelaySlot = DelaySlot; let Defs = [AT]; + let hasFCCRegOperand = 1; } class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin, @@ -229,41 +230,106 @@ class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin, !strconcat("c.$cond.", typestr)>, HARDFLOAT { let Defs = [FCC0]; let isCodeGenOnly = 1; + let hasFCCRegOperand = 1; } + +// Note: MIPS-IV introduced $fcc1-$fcc7 and renamed FCSR31[23] $fcc0. Rather +// duplicating the instruction definition for MIPS1 - MIPS3, we expand +// c.cond.ft if necessary, and reject it after constructing the +// instruction if the ISA doesn't support it. class C_COND_FT<string CondStr, string Typestr, RegisterOperand RC, InstrItinClass itin> : - InstSE<(outs), (ins RC:$fs, RC:$ft), - !strconcat("c.", CondStr, ".", Typestr, "\t$fs, $ft"), [], itin, - FrmFR>, HARDFLOAT; + InstSE<(outs FCCRegsOpnd:$fcc), (ins RC:$fs, RC:$ft), + !strconcat("c.", CondStr, ".", Typestr, "\t$fcc, $fs, $ft"), [], itin, + FrmFR>, HARDFLOAT { + let isCompare = 1; + let hasFCCRegOperand = 1; +} + multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt, InstrItinClass itin> { - def C_F_#NAME : C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM<fmt, 0>; - def C_UN_#NAME : C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM<fmt, 1>; - def C_EQ_#NAME : C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM<fmt, 2>; - def C_UEQ_#NAME : C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM<fmt, 3>; - def C_OLT_#NAME : C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM<fmt, 4>; - def C_ULT_#NAME : C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM<fmt, 5>; - def C_OLE_#NAME : C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM<fmt, 6>; - def C_ULE_#NAME : C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM<fmt, 7>; - def C_SF_#NAME : C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM<fmt, 8>; - def C_NGLE_#NAME : C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM<fmt, 9>; - def C_SEQ_#NAME : C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM<fmt, 10>; - def C_NGL_#NAME : C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM<fmt, 11>; - def C_LT_#NAME : C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM<fmt, 12>; - def C_NGE_#NAME : C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM<fmt, 13>; - def C_LE_#NAME : C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM<fmt, 14>; - def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>; + def C_F_#NAME : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, + C_COND_FM<fmt, 0> { + let BaseOpcode = "c.f."#NAME; + let isCommutable = 1; + } + def C_UN_#NAME : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, + C_COND_FM<fmt, 1> { + let BaseOpcode = "c.un."#NAME; + let isCommutable = 1; + } + def C_EQ_#NAME : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, + C_COND_FM<fmt, 2> { + let BaseOpcode = "c.eq."#NAME; + let isCommutable = 1; + } + def C_UEQ_#NAME : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, + C_COND_FM<fmt, 3> { + let BaseOpcode = "c.ueq."#NAME; + let isCommutable = 1; + } + def C_OLT_#NAME : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, + C_COND_FM<fmt, 4> { + let BaseOpcode = "c.olt."#NAME; + } + def C_ULT_#NAME : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, + C_COND_FM<fmt, 5> { + let BaseOpcode = "c.ult."#NAME; + } + def C_OLE_#NAME : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, + C_COND_FM<fmt, 6> { + let BaseOpcode = "c.ole."#NAME; + } + def C_ULE_#NAME : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, + C_COND_FM<fmt, 7> { + let BaseOpcode = "c.ule."#NAME; + } + def C_SF_#NAME : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, + C_COND_FM<fmt, 8> { + let BaseOpcode = "c.sf."#NAME; + let isCommutable = 1; + } + def C_NGLE_#NAME : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, + C_COND_FM<fmt, 9> { + let BaseOpcode = "c.ngle."#NAME; + } + def C_SEQ_#NAME : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, + C_COND_FM<fmt, 10> { + let BaseOpcode = "c.seq."#NAME; + let isCommutable = 1; + } + def C_NGL_#NAME : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, + C_COND_FM<fmt, 11> { + let BaseOpcode = "c.ngl."#NAME; + } + def C_LT_#NAME : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, + C_COND_FM<fmt, 12> { + let BaseOpcode = "c.lt."#NAME; + } + def C_NGE_#NAME : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, + C_COND_FM<fmt, 13> { + let BaseOpcode = "c.nge."#NAME; + } + def C_LE_#NAME : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, + C_COND_FM<fmt, 14> { + let BaseOpcode = "c.le."#NAME; + } + def C_NGT_#NAME : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, + C_COND_FM<fmt, 15> { + let BaseOpcode = "c.ngt."#NAME; + } } +let AdditionalPredicates = [NotInMicroMips] in { defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6; defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_32; let DecoderNamespace = "Mips64" in defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_64; - +} //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// @@ -549,13 +615,29 @@ def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>, /// Floating Point Compare let AdditionalPredicates = [NotInMicroMips] in { def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>, - ISA_MIPS1_NOT_32R6_64R6; + ISA_MIPS1_NOT_32R6_64R6 { + + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; + } def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - ISA_MIPS1_NOT_32R6_64R6, FGR_32; + ISA_MIPS1_NOT_32R6_64R6, FGR_32 { + // FIXME: This is a required to work around the fact that these instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; + } } let DecoderNamespace = "Mips64" in def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - ISA_MIPS1_NOT_32R6_64R6, FGR_64; + ISA_MIPS1_NOT_32R6_64R6, FGR_64 { + // FIXME: This is a required to work around the fact that thiese instructions + // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the + // fcc register set is used directly. + bits<3> fcc = 0; +} //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions @@ -602,15 +684,6 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd), //===----------------------------------------------------------------------===// // InstAliases. //===----------------------------------------------------------------------===// -def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>, - ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; -def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>, - ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT; -def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>, - ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; -def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>, - ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT; - def : MipsInstAlias <"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>, ISA_MIPS2, HARDFLOAT; @@ -630,6 +703,80 @@ def : MipsInstAlias def : MipsInstAlias <"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>, FGR_64, ISA_MIPS2, HARDFLOAT; + +multiclass C_COND_ALIASES<string TypeStr, RegisterOperand RC> { + def : MipsInstAlias<!strconcat("c.f.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_F_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.un.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_UN_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.eq.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_EQ_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ueq.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_UEQ_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.olt.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_OLT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ult.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_ULT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ole.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_OLE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ule.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_ULE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.sf.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_SF_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ngle.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGLE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.seq.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_SEQ_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ngl.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGL_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.lt.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_LT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.nge.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.le.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_LE_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; + def : MipsInstAlias<!strconcat("c.ngt.", TypeStr, " $fs, $ft"), + (!cast<Instruction>("C_NGT_"#NAME) FCC0, + RC:$fs, RC:$ft), 1>; +} + +multiclass BC1_ALIASES<Instruction BCTrue, string BCTrueString, + Instruction BCFalse, string BCFalseString> { + def : MipsInstAlias<!strconcat(BCTrueString, " $offset"), + (BCTrue FCC0, brtarget:$offset), 1>; + + def : MipsInstAlias<!strconcat(BCFalseString, " $offset"), + (BCFalse FCC0, brtarget:$offset), 1>; +} + +let AdditionalPredicates = [NotInMicroMips] in { + defm S : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6; + defm D32 : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_32; + defm D64 : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, + ISA_MIPS1_NOT_32R6_64R6, FGR_64; + + defm : BC1_ALIASES<BC1T, "bc1t", BC1F, "bc1f">, ISA_MIPS1_NOT_32R6_64R6, + HARDFLOAT; + defm : BC1_ALIASES<BC1TL, "bc1tl", BC1FL, "bc1fl">, ISA_MIPS2_NOT_32R6_64R6, + HARDFLOAT; +} //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// diff --git a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td index 1437fb75434..817d9b44b9c 100644 --- a/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/gnu/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -101,12 +101,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, bit IsPCRelativeLoad = 0; // Load instruction with implicit source register // ($pc) and with explicit offset and destination // register + bit hasFCCRegOperand = 0; // Instruction uses $fcc<X> register and is + // present in MIPS-I to MIPS-III. - // TSFlags layout should be kept in sync with MipsInstrInfo.h. + // TSFlags layout should be kept in sync with MCTargetDesc/MipsBaseInfo.h. let TSFlags{3-0} = FormBits; let TSFlags{4} = isCTI; let TSFlags{5} = hasForbiddenSlot; let TSFlags{6} = IsPCRelativeLoad; + let TSFlags{7} = hasFCCRegOperand; let DecoderNamespace = "Mips"; @@ -829,6 +832,7 @@ class BC1F_FM<bit nd, bit tf> : StdArch { class CEQS_FM<bits<5> fmt> : StdArch { bits<5> fs; bits<5> ft; + bits<3> fcc; bits<4> cond; bits<32> Inst; @@ -837,7 +841,7 @@ class CEQS_FM<bits<5> fmt> : StdArch { let Inst{25-21} = fmt; let Inst{20-16} = ft; let Inst{15-11} = fs; - let Inst{10-8} = 0; // cc + let Inst{10-8} = fcc; let Inst{7-4} = 0x3; let Inst{3-0} = cond; } diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp index fadab780612..c5d6a05d661 100644 --- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -148,3 +148,11 @@ MCSection *MipsTargetObjectFile::getSectionForConstant(const DataLayout &DL, // Otherwise, we work the same as ELF. return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align); } + +const MCExpr * +MipsTargetObjectFile::getDebugThreadLocalSymbol(const MCSymbol *Sym) const { + const MCExpr *Expr = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + return MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(0x8000, getContext()), getContext()); +} diff --git a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h index e5423f9578a..a37ec154ff7 100644 --- a/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h +++ b/gnu/llvm/lib/Target/Mips/MipsTargetObjectFile.h @@ -42,6 +42,8 @@ class MipsTargetMachine; MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C, unsigned &Align) const override; + /// Describe a TLS variable address within debug info. + const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; }; } // end namespace llvm diff --git a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 90111bbea07..f615cc7cc97 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1508,8 +1508,14 @@ def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst), PPC970_DGroup_Single; } // hasSideEffects = 0 +def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src), + "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>; +def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src), + "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src), "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; +def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src), + "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; def : Pat<(int_ppc_dcbt xoaddr:$dst), (DCBT 0, xoaddr:$dst)>; @@ -2381,6 +2387,13 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), "mftb $RT, $SPR", IIC_SprMFTB>; +def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR), + "mfpmr $RT, $SPR", IIC_SprMFPMR>; + +def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT), + "mtpmr $SPR, $RT", IIC_SprMTPMR>; + + // A pseudo-instruction used to implement the read of the 64-bit cycle counter // on a 32-bit target. let hasSideEffects = 1, usesCustomInserter = 1 in diff --git a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td index edabe774867..d240529bc73 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -118,6 +118,8 @@ def IIC_SprTLBIE : InstrItinClass; def IIC_SprABORT : InstrItinClass; def IIC_SprMSGSYNC : InstrItinClass; def IIC_SprSTOP : InstrItinClass; +def IIC_SprMFPMR : InstrItinClass; +def IIC_SprMTPMR : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td index f687d326b52..15d5991b938 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -249,6 +249,10 @@ def PPCE500mcItineraries : ProcessorItineraries< InstrStage<5, [E500_SFX0]>], [8, 1], [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, InstrStage<4, [E500_SFX0]>], [7, 1], // Latency = 4, Repeat rate = 4 @@ -257,6 +261,10 @@ def PPCE500mcItineraries : ProcessorItineraries< InstrStage<1, [E500_SFX0, E500_SFX1]>], [4, 1], // Latency = 1, Repeat rate = 1 [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SFX0]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, InstrStage<4, [E500_SFX0]>], [7, 1], // Latency = 4, Repeat rate = 4 diff --git a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td index 5db886cf8f9..32f8e652dd5 100644 --- a/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/gnu/llvm/lib/Target/PowerPC/PPCScheduleE5500.td @@ -313,20 +313,24 @@ def PPCE5500Itineraries : ProcessorItineraries< InstrStage<5, [E5500_CFX_0]>], [9, 2], // Latency = 5, Repeat rate = 5 [E5500_GPR_Bypass, E5500_CR_Bypass]>, - InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, - InstrStage<4, [E5500_SFX0]>], + InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_CFX_0]>], [8, 2], // Latency = 4, Repeat rate = 4 [E5500_GPR_Bypass, E5500_GPR_Bypass]>, InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<1, [E5500_CFX_0]>], [5], // Latency = 1, Repeat rate = 1 [E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, InstrStage<4, [E5500_CFX_0]>], [8, 2], // Latency = 4, Repeat rate = 4 [NoBypass, E5500_GPR_Bypass]>, InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, - InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + InstrStage<1, [E5500_CFX_0]>], [5], // Latency = 1, Repeat rate = 1 [E5500_GPR_Bypass]>, InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, diff --git a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 192b942490e..985acf92a2d 100644 --- a/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/gnu/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -77,11 +77,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, default: return false; case X86::TCRETURNdi: - case X86::TCRETURNdicc: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: - case X86::TCRETURNdi64cc: case X86::TCRETURNri64: case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; @@ -99,10 +97,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, Offset = StackAdj - MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); - if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { - assert(Offset == 0 && "Conditional tail call cannot adjust the stack."); - } - if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); @@ -111,21 +105,12 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Jump to label or value in register. bool IsWin64 = STI->isTargetWin64(); - if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || - Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { + if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) { unsigned Op; switch (Opcode) { case X86::TCRETURNdi: Op = X86::TAILJMPd; break; - case X86::TCRETURNdicc: - Op = X86::TAILJMPd_CC; - break; - case X86::TCRETURNdi64cc: - assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder."); - // TODO: We could do it for Win64 "leaf" functions though; PR30337. - Op = X86::TAILJMPd64_CC; - break; default: // Note: Win64 uses REX prefixes indirect jumps out of functions, but // not direct ones. @@ -141,10 +126,6 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } - if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { - MIB.addImm(MBBI->getOperand(2).getImm()); - } - } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { unsigned Op = (Opcode == X86::TCRETURNmi) ? X86::TAILJMPm diff --git a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f13b722eb3..08fe2bad281 100644 --- a/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/gnu/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28788,10 +28788,12 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// If a vector select has an operand that is -1 or 0, simplify the select to a -/// bitwise logic operation. -static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +/// If a vector select has an operand that is -1 or 0, try to simplify the +/// select to a bitwise logic operation. +static SDValue +combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -28853,18 +28855,28 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, } } - if (!TValIsAllOnes && !FValIsAllZeros) + // vselect Cond, 111..., 000... -> Cond + if (TValIsAllOnes && FValIsAllZeros) + return DAG.getBitcast(VT, Cond); + + if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT)) return SDValue(); - SDValue Ret; - if (TValIsAllOnes && FValIsAllZeros) - Ret = Cond; - else if (TValIsAllOnes) - Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS)); - else if (FValIsAllZeros) - Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS)); + // vselect Cond, 111..., X -> or Cond, X + if (TValIsAllOnes) { + SDValue CastRHS = DAG.getBitcast(CondVT, RHS); + SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); + return DAG.getBitcast(VT, Or); + } + + // vselect Cond, X, 000... -> and Cond, X + if (FValIsAllZeros) { + SDValue CastLHS = DAG.getBitcast(CondVT, LHS); + SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); + return DAG.getBitcast(VT, And); + } - return DAG.getBitcast(VT, Ret); + return SDValue(); } static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { @@ -29353,7 +29365,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } } - if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget)) + if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) return V; // If this is a *dynamic* select (non-constant condition) and we can match @@ -31260,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } -/// Check if truncation with saturation form type \p SrcVT to \p DstVT -/// is valid for the given \p Subtarget. -static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - // FIXME: Scalar type may be supported if we move it to vector register. - if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512) - return false; - - EVT SrcElVT = SrcVT.getScalarType(); - EVT DstElVT = DstVT.getScalarType(); - if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64) - return false; - if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32) - return false; - if (SrcVT.is512BitVector() || Subtarget.hasVLX()) - return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); - return false; -} - -/// Return true if VPACK* instruction can be used for the given types -/// and it is avalable on \p Subtarget. -static bool -isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { - if (Subtarget.hasSSE2()) - // v16i16 -> v16i8 - if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8) - return true; - if (Subtarget.hasSSE41()) - // v8i32 -> v8i16 - if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16) - return true; - return false; -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectUSatPattern(SDValue In, EVT VT) { - if (In.getOpcode() != ISD::UMIN) - return SDValue(); - - //Saturation with truncation. We truncate from InVT to VT. - assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() && - "Unexpected types for truncate operation"); - - APInt C; - if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { - // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according - // the element size of the destination type. - return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) : - SDValue(); - } - return SDValue(); -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// The types should allow to use VPMOVUS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512USatPattern(SDValue In, EVT VT, - const X86Subtarget &Subtarget) { - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectUSatPattern(In, VT); -} - -static SDValue -combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDValue USatVal = detectUSatPattern(In, VT); - if (USatVal) { - if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL); - return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi); - } - } - return SDValue(); -} - /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -31913,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); - if (SDValue Val = - detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget)) - return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -32539,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; - // Try to combine truncation with unsigned saturation. - if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) - return Val; - // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { @@ -33778,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, } } - // Try to synthesize horizontal adds from adds of shuffles. + // Try to synthesize horizontal subs from subs of shuffles. EVT VT = N->getValueType(0); if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && - isHorizontalBinOp(Op0, Op1, true)) + isHorizontalBinOp(Op0, Op1, false)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); return OptimizeConditionalInDecrement(N, DAG); diff --git a/gnu/llvm/lib/Target/X86/X86InstrControl.td b/gnu/llvm/lib/Target/X86/X86InstrControl.td index 4ea223e82be..2f260c48df4 100644 --- a/gnu/llvm/lib/Target/X86/X86InstrControl.td +++ b/gnu/llvm/lib/Target/X86/X86InstrControl.td @@ -264,21 +264,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, "jmp{l}\t{*}$dst", [], IIC_JMP_MEM>; } -// Conditional tail calls are similar to the above, but they are branches -// rather than barriers, and they use EFLAGS. -let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [ESP, EFLAGS] in { - def TCRETURNdicc : PseudoI<(outs), - (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; - - // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i32imm_pcrel:$dst, i32imm:$cond), - "", - [], IIC_JMP_REL>; -} - //===----------------------------------------------------------------------===// // Call Instructions... @@ -340,19 +325,3 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; } } - -// Conditional tail calls are similar to the above, but they are branches -// rather than barriers, and they use EFLAGS. -let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [RSP, EFLAGS] in { - def TCRETURNdi64cc : PseudoI<(outs), - (ins i64i32imm_pcrel:$dst, i32imm:$offset, - i32imm:$cond), []>; - - // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst, i32imm:$cond), - "", - [], IIC_JMP_REL>; -} diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp index e3484d062bc..627b6120b04 100644 --- a/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5108,85 +5108,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { return !isPredicated(MI); } -bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - case X86::TCRETURNdi: - case X86::TCRETURNri: - case X86::TCRETURNmi: - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - return true; - default: - return false; - } -} - -bool X86InstrInfo::canMakeTailCallConditional( - SmallVectorImpl<MachineOperand> &BranchCond, - const MachineInstr &TailCall) const { - if (TailCall.getOpcode() != X86::TCRETURNdi && - TailCall.getOpcode() != X86::TCRETURNdi64) { - // Only direct calls can be done with a conditional branch. - return false; - } - - if (Subtarget.isTargetWin64()) { - // Conditional tail calls confuse the Win64 unwinder. - // TODO: Allow them for "leaf" functions; PR30337. - return false; - } - - assert(BranchCond.size() == 1); - if (BranchCond[0].getImm() > X86::LAST_VALID_COND) { - // Can't make a conditional tail call with this condition. - return false; - } - - const X86MachineFunctionInfo *X86FI = - TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>(); - if (X86FI->getTCReturnAddrDelta() != 0 || - TailCall.getOperand(1).getImm() != 0) { - // A conditional tail call cannot do any stack adjustment. - return false; - } - - return true; -} - -void X86InstrInfo::replaceBranchWithTailCall( - MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond, - const MachineInstr &TailCall) const { - assert(canMakeTailCallConditional(BranchCond, TailCall)); - - MachineBasicBlock::iterator I = MBB.end(); - while (I != MBB.begin()) { - --I; - if (I->isDebugValue()) - continue; - if (!I->isBranch()) - assert(0 && "Can't find the branch to replace!"); - - X86::CondCode CC = getCondFromBranchOpc(I->getOpcode()); - assert(BranchCond.size() == 1); - if (CC != BranchCond[0].getImm()) - continue; - - break; - } - - unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc - : X86::TCRETURNdi64cc; - - auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc)); - MIB->addOperand(TailCall.getOperand(0)); // Destination. - MIB.addImm(0); // Stack offset (not used). - MIB->addOperand(BranchCond[0]); // Condition. - MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters. - - I->eraseFromParent(); -} - // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may // not be a fallthrough MBB now due to layout changes). Return nullptr if the // fallthrough MBB cannot be identified. diff --git a/gnu/llvm/lib/Target/X86/X86InstrInfo.h b/gnu/llvm/lib/Target/X86/X86InstrInfo.h index 8d746172dcb..acfdef4da7a 100644 --- a/gnu/llvm/lib/Target/X86/X86InstrInfo.h +++ b/gnu/llvm/lib/Target/X86/X86InstrInfo.h @@ -316,13 +316,6 @@ public: // Branch analysis. bool isUnpredicatedTerminator(const MachineInstr &MI) const override; - bool isUnconditionalTailCall(const MachineInstr &MI) const override; - bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond, - const MachineInstr &TailCall) const override; - void replaceBranchWithTailCall(MachineBasicBlock &MBB, - SmallVectorImpl<MachineOperand> &Cond, - const MachineInstr &TailCall) const override; - bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, diff --git a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp index a38a4b30b77..feeb2fd5993 100644 --- a/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/gnu/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -498,16 +498,11 @@ ReSimplify: break; } - // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction. + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction. { unsigned Opcode; case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode; case X86::TAILJMPd: case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode; - case X86::TAILJMPd_CC: - case X86::TAILJMPd64_CC: - Opcode = X86::GetCondBranchFromCond( - static_cast<X86::CondCode>(MI->getOperand(1).getImm())); - goto SetTailJmpOpcode; SetTailJmpOpcode: MCOperand Saved = OutMI.getOperand(0); @@ -1281,11 +1276,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::TAILJMPr: case X86::TAILJMPm: case X86::TAILJMPd: - case X86::TAILJMPd_CC: case X86::TAILJMPr64: case X86::TAILJMPm64: case X86::TAILJMPd64: - case X86::TAILJMPd64_CC: case X86::TAILJMPr64_REX: case X86::TAILJMPm64_REX: // Lower these as normal, but add some comments. diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp index 727ff70c3ff..586bb7bd7b1 100644 --- a/gnu/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/gnu/llvm/lib/Target/X86/X86Subtarget.cpp @@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) stackAlignment = 16; - - assert((!isPMULLDSlow() || hasSSE41()) && - "Feature Slow PMULLD can only be set on a subtarget with SSE4.1"); } void X86Subtarget::initializeEnvironment() { diff --git a/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index d086ee05a64..941efb210d1 100644 --- a/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -141,8 +141,8 @@ static cl::opt<int> PreInlineThreshold( "(default = 75)")); static cl::opt<bool> EnableGVNHoist( - "enable-gvn-hoist", cl::init(true), cl::Hidden, - cl::desc("Enable the GVN hoisting pass (default = on)")); + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass")); static cl::opt<bool> DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), diff --git a/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 013159cde77..428f94bb5e9 100644 --- a/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -884,6 +884,10 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, if (!GEPLHS->hasAllConstantIndices()) return nullptr; + // Make sure the pointers have the same type. + if (GEPLHS->getType() != RHS->getType()) + return nullptr; + Value *PtrBase, *Index; std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL); @@ -4035,11 +4039,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { Constant *CMinus1 = ConstantInt::get(Op0->getType(), *CmpC - 1); return new ICmpInst(ICmpInst::ICMP_EQ, Op0, CMinus1); } - // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear - if (CmpC->isMinSignedValue()) { - Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); - } } break; } @@ -4059,11 +4058,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (*CmpC == Op0Max - 1) return new ICmpInst(ICmpInst::ICMP_EQ, Op0, ConstantInt::get(Op1->getType(), *CmpC + 1)); - - // (x >u 2147483647) -> (x <s 0) -> true if sign bit set - if (CmpC->isMaxSignedValue()) - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Constant::getNullValue(Op0->getType())); } break; } @@ -4295,6 +4289,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) return nullptr; + // FIXME: We only do this after checking for min/max to prevent infinite + // looping caused by a reverse canonicalization of these patterns for min/max. + // FIXME: The organization of folds is a mess. These would naturally go into + // canonicalizeCmpWithConstant(), but we can't move all of the above folds + // down here after the min/max restriction. + ICmpInst::Predicate Pred = I.getPredicate(); + const APInt *C; + if (match(Op1, m_APInt(C))) { + // For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set + if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) { + Constant *Zero = Constant::getNullValue(Op0->getType()); + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero); + } + + // For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear + if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) { + Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); + } + } + if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; diff --git a/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 388c5e4e7fa..49e516e9c17 100644 --- a/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -502,7 +502,8 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { !DL.isNonIntegralPointerType(Ty)) { if (all_of(LI.users(), [&LI](User *U) { auto *SI = dyn_cast<StoreInst>(U); - return SI && SI->getPointerOperand() != &LI; + return SI && SI->getPointerOperand() != &LI && + !SI->getPointerOperand()->isSwiftError(); })) { LoadInst *NewLoad = combineLoadToNewType( IC, LI, diff --git a/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 9c4b417e35e..f5e9e7dd5a9 100644 --- a/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -600,6 +600,22 @@ private: void initializeCallbacks(Module &M); bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); + void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers); + void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers); + void + InstrumentGlobalsWithMetadataArray(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers); + + GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, + StringRef OriginalName); + void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); + IRBuilder<> CreateAsanModuleDtor(Module &M); + bool ShouldInstrumentGlobal(GlobalVariable *G); bool ShouldUseMachOGlobalsSection() const; StringRef getGlobalMetadataSection() const; @@ -997,7 +1013,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. - !AI.isUsedWithInAlloca()); + !AI.isUsedWithInAlloca() && + // swifterror allocas are register promoted by ISel + !AI.isSwiftError()); ProcessedAllocas[&AI] = IsInteresting; return IsInteresting; @@ -1072,12 +1090,19 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, } } - // Do not instrument acesses from different address spaces; we cannot deal - // with them. if (PtrOperand) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType()); if (PtrTy->getPointerAddressSpace() != 0) return nullptr; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (PtrOperand->isSwiftError()) + return nullptr; } // Treat memory accesses to promotable allocas as non-interesting since they @@ -1553,17 +1578,173 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { // Declare the functions that find globals in a shared object and then invoke // the (un)register function on them. - AsanRegisterImageGlobals = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kAsanRegisterImageGlobalsName, - IRB.getVoidTy(), IntptrTy, nullptr)); + AsanRegisterImageGlobals = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage); - AsanUnregisterImageGlobals = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kAsanUnregisterImageGlobalsName, - IRB.getVoidTy(), IntptrTy, nullptr)); + AsanUnregisterImageGlobals = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); } +// Put the metadata and the instrumented global in the same group. This ensures +// that the metadata is discarded if the instrumented global is discarded. +void AddressSanitizerModule::SetComdatForGlobalMetadata( + GlobalVariable *G, GlobalVariable *Metadata) { + Module &M = *G->getParent(); + Comdat *C = G->getComdat(); + if (!C) { + if (!G->hasName()) { + // If G is unnamed, it must be internal. Give it an artificial name + // so we can put it in a comdat. + assert(G->hasLocalLinkage()); + G->setName(Twine(kAsanGenPrefix) + "_anon_global"); + } + C = M.getOrInsertComdat(G->getName()); + // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. + if (TargetTriple.isOSBinFormatCOFF()) + C->setSelectionKind(Comdat::NoDuplicates); + G->setComdat(C); + } + + assert(G->hasComdat()); + Metadata->setComdat(G->getComdat()); +} + +// Create a separate metadata global and put it in the appropriate ASan +// global registration section. +GlobalVariable * +AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, + StringRef OriginalName) { + GlobalVariable *Metadata = + new GlobalVariable(M, Initializer->getType(), false, + GlobalVariable::InternalLinkage, Initializer, + Twine("__asan_global_") + + GlobalValue::getRealLinkageName(OriginalName)); + Metadata->setSection(getGlobalMetadataSection()); + return Metadata; +} + +IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { + Function *AsanDtorFunction = + Function::Create(FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); + BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); + + return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); +} + +void AddressSanitizerModule::InstrumentGlobalsCOFF( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + auto &DL = M.getDataLayout(); + + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + Constant *Initializer = MetadataInitializers[i]; + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, Initializer, G->getName()); + + // The MSVC linker always inserts padding when linking incrementally. We + // cope with that by aligning each struct to its size, which must be a power + // of two. + unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType()); + assert(isPowerOf2_32(SizeOfGlobalStruct) && + "global metadata will not be padded appropriately"); + Metadata->setAlignment(SizeOfGlobalStruct); + + SetComdatForGlobalMetadata(G, Metadata); + } +} + +void AddressSanitizerModule::InstrumentGlobalsMachO( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + + // On recent Mach-O platforms, use a structure which binds the liveness of + // the global variable to the metadata struct. Keep the list of "Liveness" GV + // created to be added to llvm.compiler.used + StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); + SmallVector<GlobalValue *, 16> LivenessGlobals(ExtendedGlobals.size()); + + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + Constant *Initializer = MetadataInitializers[i]; + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, Initializer, G->getName()); + + // On recent Mach-O platforms, we emit the global metadata in a way that + // allows the linker to properly strip dead globals. + auto LivenessBinder = ConstantStruct::get( + LivenessTy, Initializer->getAggregateElement(0u), + ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr); + GlobalVariable *Liveness = new GlobalVariable( + M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, + Twine("__asan_binder_") + G->getName()); + Liveness->setSection("__DATA,__asan_liveness,regular,live_support"); + LivenessGlobals[i] = Liveness; + } + + // Update llvm.compiler.used, adding the new liveness globals. This is + // needed so that during LTO these variables stay alive. The alternative + // would be to have the linker handling the LTO symbols, but libLTO + // current API does not expose access to the section for each symbol. + if (!LivenessGlobals.empty()) + appendToCompilerUsed(M, LivenessGlobals); + + // RegisteredFlag serves two purposes. First, we can pass it to dladdr() + // to look up the loaded image that contains it. Second, we can store in it + // whether registration has already occurred, to prevent duplicate + // registration. + // + // common linkage ensures that there is only one global per shared library. + GlobalVariable *RegisteredFlag = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::CommonLinkage, + ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); + + IRB.CreateCall(AsanRegisterImageGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); +} + +void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + unsigned N = ExtendedGlobals.size(); + assert(N > 0); + + // On platforms that don't have a custom metadata section, we emit an array + // of global metadata structures. + ArrayType *ArrayOfGlobalStructTy = + ArrayType::get(MetadataInitializers[0]->getType(), N); + auto AllGlobals = new GlobalVariable( + M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, + ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), ""); + + IRB.CreateCall(AsanRegisterGlobals, + {IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, N)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterGlobals, + {IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, N)}); +} + // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. @@ -1580,9 +1761,6 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { if (n == 0) return false; auto &DL = M.getDataLayout(); - bool UseComdatMetadata = TargetTriple.isOSBinFormatCOFF(); - bool UseMachOGlobalsSection = ShouldUseMachOGlobalsSection(); - bool UseMetadataArray = !(UseComdatMetadata || UseMachOGlobalsSection); // A global is described by a structure // size_t beg; @@ -1597,19 +1775,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, nullptr); - unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(GlobalStructTy); - assert(isPowerOf2_32(SizeOfGlobalStruct) && - "global metadata will not be padded appropriately"); - SmallVector<Constant *, 16> Initializers(UseMetadataArray ? n : 0); - - // On recent Mach-O platforms, use a structure which binds the liveness of - // the global variable to the metadata struct. Keep the list of "Liveness" GV - // created to be added to llvm.compiler.used - StructType *LivenessTy = nullptr; - if (UseMachOGlobalsSection) - LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); - SmallVector<GlobalValue *, 16> LivenessGlobals( - UseMachOGlobalsSection ? n : 0); + SmallVector<GlobalVariable *, 16> NewGlobals(n); + SmallVector<Constant *, 16> Initializers(n); bool HasDynamicallyInitializedGlobals = false; @@ -1681,25 +1848,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); NewGlobal->takeName(G); G->eraseFromParent(); - G = NewGlobal; - - if (UseComdatMetadata) { - // Get or create a COMDAT for G so that we can use it with our metadata. - Comdat *C = G->getComdat(); - if (!C) { - if (!G->hasName()) { - // If G is unnamed, it must be internal. Give it an artificial name - // so we can put it in a comdat. - assert(G->hasLocalLinkage()); - G->setName(Twine(kAsanGenPrefix) + "_anon_global"); - } - C = M.getOrInsertComdat(G->getName()); - // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. - if (TargetTriple.isOSBinFormatCOFF()) - C->setSelectionKind(Comdat::NoDuplicates); - G->setComdat(C); - } - } + NewGlobals[i] = NewGlobal; Constant *SourceLoc; if (!MD.SourceLoc.empty()) { @@ -1750,117 +1899,21 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); - // If we aren't using separate metadata globals, add it to the initializer - // list and continue. - if (UseMetadataArray) { - Initializers[i] = Initializer; - continue; - } - - // Create a separate metadata global and put it in the appropriate ASan - // global registration section. - GlobalVariable *Metadata = new GlobalVariable( - M, GlobalStructTy, false, GlobalVariable::InternalLinkage, - Initializer, Twine("__asan_global_") + - GlobalValue::getRealLinkageName(G->getName())); - Metadata->setSection(getGlobalMetadataSection()); - - // We don't want any padding, but we also need a reasonable alignment. - // The MSVC linker always inserts padding when linking incrementally. We - // cope with that by aligning each struct to its size, which must be a power - // of two. - Metadata->setAlignment(SizeOfGlobalStruct); - - // On platforms that support comdats, put the metadata and the - // instrumented global in the same group. This ensures that the metadata - // is discarded if the instrumented global is discarded. - if (UseComdatMetadata) { - assert(G->hasComdat()); - Metadata->setComdat(G->getComdat()); - continue; - } - assert(UseMachOGlobalsSection); + Initializers[i] = Initializer; + } - // On recent Mach-O platforms, we emit the global metadata in a way that - // allows the linker to properly strip dead globals. - auto LivenessBinder = ConstantStruct::get( - LivenessTy, Initializer->getAggregateElement(0u), - ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr); - GlobalVariable *Liveness = new GlobalVariable( - M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, - Twine("__asan_binder_") + G->getName()); - Liveness->setSection("__DATA,__asan_liveness,regular,live_support"); - LivenessGlobals[i] = Liveness; + if (TargetTriple.isOSBinFormatCOFF()) { + InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); + } else if (ShouldUseMachOGlobalsSection()) { + InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); + } else { + InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); } // Create calls for poisoning before initializers run and unpoisoning after. if (HasDynamicallyInitializedGlobals) createInitializerPoisonCalls(M, ModuleName); - // Platforms with a dedicated metadata section don't need to emit any more - // code. - if (UseComdatMetadata) - return true; - - GlobalVariable *AllGlobals = nullptr; - GlobalVariable *RegisteredFlag = nullptr; - - if (UseMachOGlobalsSection) { - // RegisteredFlag serves two purposes. First, we can pass it to dladdr() - // to look up the loaded image that contains it. Second, we can store in it - // whether registration has already occurred, to prevent duplicate - // registration. - // - // common linkage ensures that there is only one global per shared library. - RegisteredFlag = new GlobalVariable( - M, IntptrTy, false, GlobalVariable::CommonLinkage, - ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); - RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); - - // Update llvm.compiler.used, adding the new liveness globals. This is - // needed so that during LTO these variables stay alive. The alternative - // would be to have the linker handling the LTO symbols, but libLTO - // current API does not expose access to the section for each symbol. - if (!LivenessGlobals.empty()) - appendToCompilerUsed(M, LivenessGlobals); - } else if (UseMetadataArray) { - // On platforms that don't have a custom metadata section, we emit an array - // of global metadata structures. - ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); - AllGlobals = new GlobalVariable( - M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, - ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); - } - - // Create a call to register the globals with the runtime. - if (UseMachOGlobalsSection) { - IRB.CreateCall(AsanRegisterImageGlobals, - {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); - } else { - IRB.CreateCall(AsanRegisterGlobals, - {IRB.CreatePointerCast(AllGlobals, IntptrTy), - ConstantInt::get(IntptrTy, n)}); - } - - // We also need to unregister globals at the end, e.g., when a shared library - // gets closed. - Function *AsanDtorFunction = - Function::Create(FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); - BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); - IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); - - if (UseMachOGlobalsSection) { - IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, - {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); - } else { - IRB_Dtor.CreateCall(AsanUnregisterGlobals, - {IRB.CreatePointerCast(AllGlobals, IntptrTy), - ConstantInt::get(IntptrTy, n)}); - } - - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); - DEBUG(dbgs() << M); return true; } diff --git a/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index d9659694da4..52035c79a4a 100644 --- a/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -488,6 +488,13 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand() : cast<LoadInst>(I)->getPointerOperand(); + + // swifterror memory addresses are mem2reg promoted by instruction selection. + // As such they cannot have regular uses like an instrumentation function and + // it makes no sense to track them as memory. + if (Addr->isSwiftError()) + return false; + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; diff --git a/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 141e99b92cd..84f9373ae91 100644 --- a/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -41,6 +41,8 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); +static cl::opt<bool> DontProcessAdds("cvp-dont-process-adds", cl::init(true)); + namespace { class CorrelatedValuePropagation : public FunctionPass { public: @@ -405,6 +407,9 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { typedef OverflowingBinaryOperator OBO; + if (DontProcessAdds) + return false; + if (AddOp->getType()->isVectorTy() || hasLocalDefs(AddOp)) return false; diff --git a/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp index 90c26e13db7..f8e1d2e1a08 100644 --- a/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -200,13 +200,11 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { class GVNHoist { public: GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD, - MemorySSA *MSSA, bool OptForMinSize) - : DT(DT), AA(AA), MD(MD), MSSA(MSSA), OptForMinSize(OptForMinSize), - HoistingGeps(OptForMinSize), HoistedCtr(0) { - // Hoist as far as possible when optimizing for code-size. - if (OptForMinSize) - MaxNumberOfBBSInPath = -1; - } + MemorySSA *MSSA) + : DT(DT), AA(AA), MD(MD), MSSA(MSSA), + HoistingGeps(false), + HoistedCtr(0) + { } bool run(Function &F) { VN.setDomTree(DT); @@ -251,7 +249,6 @@ private: AliasAnalysis *AA; MemoryDependenceResults *MD; MemorySSA *MSSA; - const bool OptForMinSize; const bool HoistingGeps; DenseMap<const Value *, unsigned> DFSNumber; BBSideEffectsSet BBSideEffects; @@ -505,11 +502,6 @@ private: bool safeToHoistScalar(const BasicBlock *HoistBB, SmallPtrSetImpl<const BasicBlock *> &WL, int &NBBsOnAllPaths) { - // Enable scalar hoisting at -Oz as it is safe to hoist scalars to a place - // where they are partially needed. - if (OptForMinSize) - return true; - // Check that the hoisted expression is needed on all paths. if (!hoistingFromAllPaths(HoistBB, WL)) return false; @@ -923,13 +915,8 @@ private: Intr->getIntrinsicID() == Intrinsic::assume) continue; } - if (Call->mayHaveSideEffects()) { - if (!OptForMinSize) - break; - // We may continue hoisting across calls which write to memory. - if (Call->mayThrow()) - break; - } + if (Call->mayHaveSideEffects()) + break; if (Call->isConvergent()) break; @@ -971,7 +958,7 @@ public: auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); - GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize()); + GVNHoist G(&DT, &AA, &MD, &MSSA); return G.run(F); } @@ -991,7 +978,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { AliasAnalysis &AA = AM.getResult<AAManager>(F); MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); - GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize()); + GVNHoist G(&DT, &AA, &MD, &MSSA); if (!G.run(F)) return PreservedAnalyses::all(); diff --git a/gnu/llvm/lib/Transforms/Scalar/LICM.cpp b/gnu/llvm/lib/Transforms/Scalar/LICM.cpp index 4c15c8a32be..f51d11c04cb 100644 --- a/gnu/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1196,10 +1196,7 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, auto mergeLoop = [&](Loop *L) { // Loop over the body of this loop, looking for calls, invokes, and stores. - // Because subloops have already been incorporated into AST, we skip blocks - // in subloops. for (BasicBlock *BB : L->blocks()) - if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block }; diff --git a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 01728ae680d..194587a85e7 100644 --- a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -158,8 +158,9 @@ struct MemAccessTy { bool operator!=(MemAccessTy Other) const { return !(*this == Other); } - static MemAccessTy getUnknown(LLVMContext &Ctx) { - return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace); + static MemAccessTy getUnknown(LLVMContext &Ctx, + unsigned AS = UnknownAddressSpace) { + return MemAccessTy(Type::getVoidTy(Ctx), AS); } }; @@ -2279,8 +2280,10 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, // TODO: Be less conservative when the type is similar and can use the same // addressing modes. if (Kind == LSRUse::Address) { - if (AccessTy != LU.AccessTy) - NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext()); + if (AccessTy.MemTy != LU.AccessTy.MemTy) { + NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), + AccessTy.AddrSpace); + } } // Conservatively assume HasBaseReg is true for now. diff --git a/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp b/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp index 6043e04bb8c..57e6e3ddad9 100644 --- a/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -85,6 +85,8 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); +STATISTIC(NumGVNNotMostDominatingLeader, + "Number of times a member dominated it's new classes' leader"); //===----------------------------------------------------------------------===// // GVN Pass @@ -1073,17 +1075,20 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, if (I == OldClass->NextLeader.first) OldClass->NextLeader = {nullptr, ~0U}; - // The new instruction and new class leader may either be siblings in the - // dominator tree, or the new class leader should dominate the new member - // instruction. We simply check that the member instruction does not properly - // dominate the new class leader. - assert( - !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader || - I == NewClass->RepLeader || - !DT->properlyDominates( + // It's possible, though unlikely, for us to discover equivalences such + // that the current leader does not dominate the old one. + // This statistic tracks how often this happens. + // We assert on phi nodes when this happens, currently, for debugging, because + // we want to make sure we name phi node cycles properly. + if (isa<Instruction>(NewClass->RepLeader) && NewClass->RepLeader && + I != NewClass->RepLeader && + DT->properlyDominates( I->getParent(), - cast<Instruction>(NewClass->RepLeader)->getParent()) && - "New class for instruction should not be dominated by instruction"); + cast<Instruction>(NewClass->RepLeader)->getParent())) { + ++NumGVNNotMostDominatingLeader; + assert(!isa<PHINode>(I) && + "New class for instruction should not be dominated by instruction"); + } if (NewClass->RepLeader != I) { auto DFSNum = InstrDFS.lookup(I); diff --git a/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp b/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp index 181a324861e..65c814d7a63 100644 --- a/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -1521,8 +1521,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) { if (CI->isNegative() && !CI->isMinValue(true)) { Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); - assert(!Duplicates.count(Factor) && - "Shouldn't have two constant factors, missed a canonicalize"); + if (!Duplicates.insert(Factor).second) + continue; unsigned Occ = ++FactorOccurrences[Factor]; if (Occ > MaxOcc) { MaxOcc = Occ; @@ -1534,8 +1534,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, APFloat F(CF->getValueAPF()); F.changeSign(); Factor = ConstantFP::get(CF->getContext(), F); - assert(!Duplicates.count(Factor) && - "Shouldn't have two constant factors, missed a canonicalize"); + if (!Duplicates.insert(Factor).second) + continue; unsigned Occ = ++FactorOccurrences[Factor]; if (Occ > MaxOcc) { MaxOcc = Occ; diff --git a/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp b/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp index 34be9069248..ede381c4c24 100644 --- a/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1705,7 +1705,10 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, // If this is an exact definition of this function, then we can propagate // information about its result into callsites of it. - if (F.hasExactDefinition()) + // Don't touch naked functions. They may contain asm returning a + // value we don't see, so we may end up interprocedurally propagating + // the return value incorrectly. + if (F.hasExactDefinition() && !F.hasFnAttribute(Attribute::Naked)) Solver.AddTrackedFunction(&F); // If this function only has direct calls that we can see, we can track its diff --git a/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp index f9a602bc268..e346ebd6a00 100644 --- a/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -189,11 +189,14 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, assert(OriginalBB == OldLoop->getHeader() && "Header should be first in RPO"); + NewLoop = new Loop(); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); + + if (NewLoopParent) + NewLoopParent->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return OldLoop; } else { diff --git a/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 85da3ba899a..d3ea1564115 100644 --- a/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -302,17 +302,22 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, } NewLoopsMap NewLoops; - NewLoops[L] = NewLoop; + if (NewLoop) + NewLoops[L] = NewLoop; + else if (ParentLoop) + NewLoops[L] = ParentLoop; + // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - - if (NewLoop) { + + // If we're unrolling the outermost loop, there's no remainder loop, + // and this block isn't in a nested loop, then the new block is not + // in any loop. Otherwise, add it to loopinfo. + if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); - } else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { diff --git a/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6e30919246c..7b0bddbbb83 100644 --- a/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1436,6 +1436,14 @@ static bool canSinkInstructions( if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || I->getType()->isTokenTy()) return false; + + // Conservatively return false if I is an inline-asm instruction. Sinking + // and merging inline-asm instructions can potentially create arguments + // that cannot satisfy the inline-asm constraints. + if (const auto *C = dyn_cast<CallInst>(I)) + if (C->isInlineAsm()) + return false; + // Everything must have only one use too, apart from stores which // have no uses. if (!isa<StoreInst>(I) && !I->hasOneUse()) diff --git a/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1b1f86f8efd..dac7032fa08 100644 --- a/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5602,6 +5602,13 @@ void LoopVectorizationLegality::collectLoopUniforms() { // is consecutive-like, the pointer operand should remain uniform. else if (hasConsecutiveLikePtrOperand(&I)) ConsecutiveLikePtrs.insert(Ptr); + + // Otherwise, if the memory instruction will be vectorized and its + // pointer operand is non-consecutive-like, the memory instruction should + // be a gather or scatter operation. Its pointer operand will be + // non-uniform. + else + PossibleNonUniformPtrs.insert(Ptr); } // Add to the Worklist all consecutive and consecutive-like pointers that diff --git a/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1c7cbc7edf9..328f2700296 100644 --- a/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4026,40 +4026,36 @@ bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { if (!V) return false; - Value *P = V->getParent(); - - // Vectorize in current basic block only. - auto *Op0 = dyn_cast<Instruction>(V->getOperand(0)); - auto *Op1 = dyn_cast<Instruction>(V->getOperand(1)); - if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P) - return false; - // Try to vectorize V. - if (tryToVectorizePair(Op0, Op1, R)) + if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) return true; - auto *A = dyn_cast<BinaryOperator>(Op0); - auto *B = dyn_cast<BinaryOperator>(Op1); + BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); + BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); // Try to skip B. if (B && B->hasOneUse()) { - auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); - auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); - if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R)) + BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); + BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); + if (tryToVectorizePair(A, B0, R)) { return true; - if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R)) + } + if (tryToVectorizePair(A, B1, R)) { return true; + } } // Try to skip A. if (A && A->hasOneUse()) { - auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); - auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); - if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R)) + BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); + BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); + if (tryToVectorizePair(A0, B, R)) { return true; - if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R)) + } + if (tryToVectorizePair(A1, B, R)) { return true; + } } - return false; + return 0; } /// \brief Generate a shuffle mask to be used in a reduction tree. @@ -4511,143 +4507,29 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } -namespace { -/// Tracks instructons and its children. -class WeakVHWithLevel final : public CallbackVH { - /// Operand index of the instruction currently beeing analized. - unsigned Level = 0; - /// Is this the instruction that should be vectorized, or are we now - /// processing children (i.e. operands of this instruction) for potential - /// vectorization? - bool IsInitial = true; - -public: - explicit WeakVHWithLevel() = default; - WeakVHWithLevel(Value *V) : CallbackVH(V){}; - /// Restart children analysis each time it is repaced by the new instruction. - void allUsesReplacedWith(Value *New) override { - setValPtr(New); - Level = 0; - IsInitial = true; - } - /// Check if the instruction was not deleted during vectorization. - bool isValid() const { return !getValPtr(); } - /// Is the istruction itself must be vectorized? - bool isInitial() const { return IsInitial; } - /// Try to vectorize children. - void clearInitial() { IsInitial = false; } - /// Are all children processed already? - bool isFinal() const { - assert(getValPtr() && - (isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() >= Level)); - return getValPtr() && - cast<Instruction>(getValPtr())->getNumOperands() == Level; - } - /// Get next child operation. - Value *nextOperand() { - assert(getValPtr() && isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() > Level); - return cast<Instruction>(getValPtr())->getOperand(Level++); - } - virtual ~WeakVHWithLevel() = default; -}; -} // namespace - /// \brief Attempt to reduce a horizontal reduction. /// If it is legal to match a horizontal reduction feeding -/// the phi node P with reduction operators Root in a basic block BB, then check -/// if it can be done. +/// the phi node P with reduction operators BI, then check if it +/// can be done. /// \returns true if a horizontal reduction was matched and reduced. /// \returns false if a horizontal reduction was not matched. -static bool canBeVectorized( - PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, - TargetTransformInfo *TTI, - const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) { +static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI, + BoUpSLP &R, TargetTransformInfo *TTI, + unsigned MinRegSize) { if (!ShouldVectorizeHor) return false; - if (!Root) - return false; - - if (Root->getParent() != BB) + HorizontalReduction HorRdx(MinRegSize); + if (!HorRdx.matchAssociativeReduction(P, BI)) return false; - SmallVector<WeakVHWithLevel, 8> Stack(1, Root); - SmallSet<Value *, 8> VisitedInstrs; - bool Res = false; - while (!Stack.empty()) { - Value *V = Stack.back(); - if (!V) { - Stack.pop_back(); - continue; - } - auto *Inst = dyn_cast<Instruction>(V); - if (!Inst || isa<PHINode>(Inst)) { - Stack.pop_back(); - continue; - } - if (Stack.back().isInitial()) { - Stack.back().clearInitial(); - if (auto *BI = dyn_cast<BinaryOperator>(Inst)) { - HorizontalReduction HorRdx(R.getMinVecRegSize()); - if (HorRdx.matchAssociativeReduction(P, BI)) { - // If there is a sufficient number of reduction values, reduce - // to a nearby power-of-2. Can safely generate oversized - // vectors and rely on the backend to split them to legal sizes. - HorRdx.ReduxWidth = - std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); - - if (HorRdx.tryToReduce(R, TTI)) { - Res = true; - P = nullptr; - continue; - } - } - if (P) { - Inst = dyn_cast<Instruction>(BI->getOperand(0)); - if (Inst == P) - Inst = dyn_cast<Instruction>(BI->getOperand(1)); - if (!Inst) { - P = nullptr; - continue; - } - } - } - P = nullptr; - if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) { - Res = true; - continue; - } - } - if (Stack.back().isFinal()) { - Stack.pop_back(); - continue; - } - if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand())) - if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second && - Stack.size() < RecursionMaxDepth) - Stack.push_back(NextV); - } - return Res; -} - -bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, - BasicBlock *BB, BoUpSLP &R, - TargetTransformInfo *TTI) { - if (!V) - return false; - auto *I = dyn_cast<Instruction>(V); - if (!I) - return false; + // If there is a sufficient number of reduction values, reduce + // to a nearby power-of-2. Can safely generate oversized + // vectors and rely on the backend to split them to legal sizes. + HorRdx.ReduxWidth = + std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); - if (!isa<BinaryOperator>(I)) - P = nullptr; - // Try to match and vectorize a horizontal reduction. - return canBeVectorized(P, I, BB, R, TTI, - [this](BinaryOperator *BI, BoUpSLP &R) -> bool { - return tryToVectorize(BI, R); - }); + return HorRdx.tryToReduce(R, TTI); } bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { @@ -4717,42 +4599,67 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (P->getNumIncomingValues() != 2) return Changed; + Value *Rdx = getReductionValue(DT, P, BB, LI); + + // Check if this is a Binary Operator. + BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); + if (!BI) + continue; + // Try to match and vectorize a horizontal reduction. - if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R, - TTI)) { + if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) { Changed = true; it = BB->begin(); e = BB->end(); continue; } + + Value *Inst = BI->getOperand(0); + if (Inst == P) + Inst = BI->getOperand(1); + + if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) { + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } + continue; } - if (ShouldStartVectorizeHorAtStore) { - if (StoreInst *SI = dyn_cast<StoreInst>(it)) { - // Try to match and vectorize a horizontal reduction. - if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R, - TTI)) { - Changed = true; - it = BB->begin(); - e = BB->end(); - continue; + if (ShouldStartVectorizeHorAtStore) + if (StoreInst *SI = dyn_cast<StoreInst>(it)) + if (BinaryOperator *BinOp = + dyn_cast<BinaryOperator>(SI->getValueOperand())) { + if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI, + R.getMinVecRegSize()) || + tryToVectorize(BinOp, R)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } } - } - } // Try to vectorize horizontal reductions feeding into a return. - if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) { - if (RI->getNumOperands() != 0) { - // Try to match and vectorize a horizontal reduction. - if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) { - Changed = true; - it = BB->begin(); - e = BB->end(); - continue; + if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) + if (RI->getNumOperands() != 0) + if (BinaryOperator *BinOp = + dyn_cast<BinaryOperator>(RI->getOperand(0))) { + DEBUG(dbgs() << "SLP: Found a return to vectorize.\n"); + if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI, + R.getMinVecRegSize()) || + tryToVectorizePair(BinOp->getOperand(0), BinOp->getOperand(1), + R)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } } - } - } // Try to vectorize trees that start at compare instructions. if (CmpInst *CI = dyn_cast<CmpInst>(it)) { @@ -4765,14 +4672,16 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { continue; } - for (int I = 0; I < 2; ++I) { - if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) { - Changed = true; - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. - it = BB->begin(); - e = BB->end(); - break; + for (int i = 0; i < 2; ++i) { + if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) { + if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) { + Changed = true; + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + it = BB->begin(); + e = BB->end(); + break; + } } } continue; |
