diff options
Diffstat (limited to 'gnu/llvm/lib/Transforms')
17 files changed, 400 insertions, 392 deletions
diff --git a/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index d086ee05a64..941efb210d1 100644 --- a/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/gnu/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -141,8 +141,8 @@ static cl::opt<int> PreInlineThreshold( "(default = 75)")); static cl::opt<bool> EnableGVNHoist( - "enable-gvn-hoist", cl::init(true), cl::Hidden, - cl::desc("Enable the GVN hoisting pass (default = on)")); + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass")); static cl::opt<bool> DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), diff --git a/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 013159cde77..428f94bb5e9 100644 --- a/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/gnu/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -884,6 +884,10 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, if (!GEPLHS->hasAllConstantIndices()) return nullptr; + // Make sure the pointers have the same type. + if (GEPLHS->getType() != RHS->getType()) + return nullptr; + Value *PtrBase, *Index; std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL); @@ -4035,11 +4039,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { Constant *CMinus1 = ConstantInt::get(Op0->getType(), *CmpC - 1); return new ICmpInst(ICmpInst::ICMP_EQ, Op0, CMinus1); } - // (x <u 2147483648) -> (x >s -1) -> true if sign bit clear - if (CmpC->isMinSignedValue()) { - Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); - } } break; } @@ -4059,11 +4058,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (*CmpC == Op0Max - 1) return new ICmpInst(ICmpInst::ICMP_EQ, Op0, ConstantInt::get(Op1->getType(), *CmpC + 1)); - - // (x >u 2147483647) -> (x <s 0) -> true if sign bit set - if (CmpC->isMaxSignedValue()) - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Constant::getNullValue(Op0->getType())); } break; } @@ -4295,6 +4289,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) return nullptr; + // FIXME: We only do this after checking for min/max to prevent infinite + // looping caused by a reverse canonicalization of these patterns for min/max. + // FIXME: The organization of folds is a mess. These would naturally go into + // canonicalizeCmpWithConstant(), but we can't move all of the above folds + // down here after the min/max restriction. + ICmpInst::Predicate Pred = I.getPredicate(); + const APInt *C; + if (match(Op1, m_APInt(C))) { + // For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set + if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) { + Constant *Zero = Constant::getNullValue(Op0->getType()); + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero); + } + + // For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear + if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) { + Constant *AllOnes = Constant::getAllOnesValue(Op0->getType()); + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes); + } + } + if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; diff --git a/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 388c5e4e7fa..49e516e9c17 100644 --- a/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/gnu/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -502,7 +502,8 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { !DL.isNonIntegralPointerType(Ty)) { if (all_of(LI.users(), [&LI](User *U) { auto *SI = dyn_cast<StoreInst>(U); - return SI && SI->getPointerOperand() != &LI; + return SI && SI->getPointerOperand() != &LI && + !SI->getPointerOperand()->isSwiftError(); })) { LoadInst *NewLoad = combineLoadToNewType( IC, LI, diff --git a/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 9c4b417e35e..f5e9e7dd5a9 100644 --- a/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/gnu/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -600,6 +600,22 @@ private: void initializeCallbacks(Module &M); bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); + void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers); + void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers); + void + InstrumentGlobalsWithMetadataArray(IRBuilder<> &IRB, Module &M, + ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers); + + GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, + StringRef OriginalName); + void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); + IRBuilder<> CreateAsanModuleDtor(Module &M); + bool ShouldInstrumentGlobal(GlobalVariable *G); bool ShouldUseMachOGlobalsSection() const; StringRef getGlobalMetadataSection() const; @@ -997,7 +1013,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. - !AI.isUsedWithInAlloca()); + !AI.isUsedWithInAlloca() && + // swifterror allocas are register promoted by ISel + !AI.isSwiftError()); ProcessedAllocas[&AI] = IsInteresting; return IsInteresting; @@ -1072,12 +1090,19 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, } } - // Do not instrument acesses from different address spaces; we cannot deal - // with them. if (PtrOperand) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType()); if (PtrTy->getPointerAddressSpace() != 0) return nullptr; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (PtrOperand->isSwiftError()) + return nullptr; } // Treat memory accesses to promotable allocas as non-interesting since they @@ -1553,17 +1578,173 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { // Declare the functions that find globals in a shared object and then invoke // the (un)register function on them. - AsanRegisterImageGlobals = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kAsanRegisterImageGlobalsName, - IRB.getVoidTy(), IntptrTy, nullptr)); + AsanRegisterImageGlobals = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage); - AsanUnregisterImageGlobals = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kAsanUnregisterImageGlobalsName, - IRB.getVoidTy(), IntptrTy, nullptr)); + AsanUnregisterImageGlobals = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); } +// Put the metadata and the instrumented global in the same group. This ensures +// that the metadata is discarded if the instrumented global is discarded. +void AddressSanitizerModule::SetComdatForGlobalMetadata( + GlobalVariable *G, GlobalVariable *Metadata) { + Module &M = *G->getParent(); + Comdat *C = G->getComdat(); + if (!C) { + if (!G->hasName()) { + // If G is unnamed, it must be internal. Give it an artificial name + // so we can put it in a comdat. + assert(G->hasLocalLinkage()); + G->setName(Twine(kAsanGenPrefix) + "_anon_global"); + } + C = M.getOrInsertComdat(G->getName()); + // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. + if (TargetTriple.isOSBinFormatCOFF()) + C->setSelectionKind(Comdat::NoDuplicates); + G->setComdat(C); + } + + assert(G->hasComdat()); + Metadata->setComdat(G->getComdat()); +} + +// Create a separate metadata global and put it in the appropriate ASan +// global registration section. +GlobalVariable * +AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, + StringRef OriginalName) { + GlobalVariable *Metadata = + new GlobalVariable(M, Initializer->getType(), false, + GlobalVariable::InternalLinkage, Initializer, + Twine("__asan_global_") + + GlobalValue::getRealLinkageName(OriginalName)); + Metadata->setSection(getGlobalMetadataSection()); + return Metadata; +} + +IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { + Function *AsanDtorFunction = + Function::Create(FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); + BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); + + return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); +} + +void AddressSanitizerModule::InstrumentGlobalsCOFF( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + auto &DL = M.getDataLayout(); + + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + Constant *Initializer = MetadataInitializers[i]; + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, Initializer, G->getName()); + + // The MSVC linker always inserts padding when linking incrementally. We + // cope with that by aligning each struct to its size, which must be a power + // of two. + unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType()); + assert(isPowerOf2_32(SizeOfGlobalStruct) && + "global metadata will not be padded appropriately"); + Metadata->setAlignment(SizeOfGlobalStruct); + + SetComdatForGlobalMetadata(G, Metadata); + } +} + +void AddressSanitizerModule::InstrumentGlobalsMachO( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + + // On recent Mach-O platforms, use a structure which binds the liveness of + // the global variable to the metadata struct. Keep the list of "Liveness" GV + // created to be added to llvm.compiler.used + StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); + SmallVector<GlobalValue *, 16> LivenessGlobals(ExtendedGlobals.size()); + + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + Constant *Initializer = MetadataInitializers[i]; + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, Initializer, G->getName()); + + // On recent Mach-O platforms, we emit the global metadata in a way that + // allows the linker to properly strip dead globals. + auto LivenessBinder = ConstantStruct::get( + LivenessTy, Initializer->getAggregateElement(0u), + ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr); + GlobalVariable *Liveness = new GlobalVariable( + M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, + Twine("__asan_binder_") + G->getName()); + Liveness->setSection("__DATA,__asan_liveness,regular,live_support"); + LivenessGlobals[i] = Liveness; + } + + // Update llvm.compiler.used, adding the new liveness globals. This is + // needed so that during LTO these variables stay alive. The alternative + // would be to have the linker handling the LTO symbols, but libLTO + // current API does not expose access to the section for each symbol. + if (!LivenessGlobals.empty()) + appendToCompilerUsed(M, LivenessGlobals); + + // RegisteredFlag serves two purposes. First, we can pass it to dladdr() + // to look up the loaded image that contains it. Second, we can store in it + // whether registration has already occurred, to prevent duplicate + // registration. + // + // common linkage ensures that there is only one global per shared library. + GlobalVariable *RegisteredFlag = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::CommonLinkage, + ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); + + IRB.CreateCall(AsanRegisterImageGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); +} + +void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray( + IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, + ArrayRef<Constant *> MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + unsigned N = ExtendedGlobals.size(); + assert(N > 0); + + // On platforms that don't have a custom metadata section, we emit an array + // of global metadata structures. + ArrayType *ArrayOfGlobalStructTy = + ArrayType::get(MetadataInitializers[0]->getType(), N); + auto AllGlobals = new GlobalVariable( + M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, + ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), ""); + + IRB.CreateCall(AsanRegisterGlobals, + {IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, N)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterGlobals, + {IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, N)}); +} + // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. @@ -1580,9 +1761,6 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { if (n == 0) return false; auto &DL = M.getDataLayout(); - bool UseComdatMetadata = TargetTriple.isOSBinFormatCOFF(); - bool UseMachOGlobalsSection = ShouldUseMachOGlobalsSection(); - bool UseMetadataArray = !(UseComdatMetadata || UseMachOGlobalsSection); // A global is described by a structure // size_t beg; @@ -1597,19 +1775,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, nullptr); - unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(GlobalStructTy); - assert(isPowerOf2_32(SizeOfGlobalStruct) && - "global metadata will not be padded appropriately"); - SmallVector<Constant *, 16> Initializers(UseMetadataArray ? n : 0); - - // On recent Mach-O platforms, use a structure which binds the liveness of - // the global variable to the metadata struct. Keep the list of "Liveness" GV - // created to be added to llvm.compiler.used - StructType *LivenessTy = nullptr; - if (UseMachOGlobalsSection) - LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); - SmallVector<GlobalValue *, 16> LivenessGlobals( - UseMachOGlobalsSection ? n : 0); + SmallVector<GlobalVariable *, 16> NewGlobals(n); + SmallVector<Constant *, 16> Initializers(n); bool HasDynamicallyInitializedGlobals = false; @@ -1681,25 +1848,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); NewGlobal->takeName(G); G->eraseFromParent(); - G = NewGlobal; - - if (UseComdatMetadata) { - // Get or create a COMDAT for G so that we can use it with our metadata. - Comdat *C = G->getComdat(); - if (!C) { - if (!G->hasName()) { - // If G is unnamed, it must be internal. Give it an artificial name - // so we can put it in a comdat. - assert(G->hasLocalLinkage()); - G->setName(Twine(kAsanGenPrefix) + "_anon_global"); - } - C = M.getOrInsertComdat(G->getName()); - // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. - if (TargetTriple.isOSBinFormatCOFF()) - C->setSelectionKind(Comdat::NoDuplicates); - G->setComdat(C); - } - } + NewGlobals[i] = NewGlobal; Constant *SourceLoc; if (!MD.SourceLoc.empty()) { @@ -1750,117 +1899,21 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); - // If we aren't using separate metadata globals, add it to the initializer - // list and continue. - if (UseMetadataArray) { - Initializers[i] = Initializer; - continue; - } - - // Create a separate metadata global and put it in the appropriate ASan - // global registration section. - GlobalVariable *Metadata = new GlobalVariable( - M, GlobalStructTy, false, GlobalVariable::InternalLinkage, - Initializer, Twine("__asan_global_") + - GlobalValue::getRealLinkageName(G->getName())); - Metadata->setSection(getGlobalMetadataSection()); - - // We don't want any padding, but we also need a reasonable alignment. - // The MSVC linker always inserts padding when linking incrementally. We - // cope with that by aligning each struct to its size, which must be a power - // of two. - Metadata->setAlignment(SizeOfGlobalStruct); - - // On platforms that support comdats, put the metadata and the - // instrumented global in the same group. This ensures that the metadata - // is discarded if the instrumented global is discarded. - if (UseComdatMetadata) { - assert(G->hasComdat()); - Metadata->setComdat(G->getComdat()); - continue; - } - assert(UseMachOGlobalsSection); + Initializers[i] = Initializer; + } - // On recent Mach-O platforms, we emit the global metadata in a way that - // allows the linker to properly strip dead globals. - auto LivenessBinder = ConstantStruct::get( - LivenessTy, Initializer->getAggregateElement(0u), - ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr); - GlobalVariable *Liveness = new GlobalVariable( - M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, - Twine("__asan_binder_") + G->getName()); - Liveness->setSection("__DATA,__asan_liveness,regular,live_support"); - LivenessGlobals[i] = Liveness; + if (TargetTriple.isOSBinFormatCOFF()) { + InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); + } else if (ShouldUseMachOGlobalsSection()) { + InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); + } else { + InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); } // Create calls for poisoning before initializers run and unpoisoning after. if (HasDynamicallyInitializedGlobals) createInitializerPoisonCalls(M, ModuleName); - // Platforms with a dedicated metadata section don't need to emit any more - // code. - if (UseComdatMetadata) - return true; - - GlobalVariable *AllGlobals = nullptr; - GlobalVariable *RegisteredFlag = nullptr; - - if (UseMachOGlobalsSection) { - // RegisteredFlag serves two purposes. First, we can pass it to dladdr() - // to look up the loaded image that contains it. Second, we can store in it - // whether registration has already occurred, to prevent duplicate - // registration. - // - // common linkage ensures that there is only one global per shared library. - RegisteredFlag = new GlobalVariable( - M, IntptrTy, false, GlobalVariable::CommonLinkage, - ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); - RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); - - // Update llvm.compiler.used, adding the new liveness globals. This is - // needed so that during LTO these variables stay alive. The alternative - // would be to have the linker handling the LTO symbols, but libLTO - // current API does not expose access to the section for each symbol. - if (!LivenessGlobals.empty()) - appendToCompilerUsed(M, LivenessGlobals); - } else if (UseMetadataArray) { - // On platforms that don't have a custom metadata section, we emit an array - // of global metadata structures. - ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); - AllGlobals = new GlobalVariable( - M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, - ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); - } - - // Create a call to register the globals with the runtime. - if (UseMachOGlobalsSection) { - IRB.CreateCall(AsanRegisterImageGlobals, - {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); - } else { - IRB.CreateCall(AsanRegisterGlobals, - {IRB.CreatePointerCast(AllGlobals, IntptrTy), - ConstantInt::get(IntptrTy, n)}); - } - - // We also need to unregister globals at the end, e.g., when a shared library - // gets closed. - Function *AsanDtorFunction = - Function::Create(FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); - BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); - IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); - - if (UseMachOGlobalsSection) { - IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, - {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); - } else { - IRB_Dtor.CreateCall(AsanUnregisterGlobals, - {IRB.CreatePointerCast(AllGlobals, IntptrTy), - ConstantInt::get(IntptrTy, n)}); - } - - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); - DEBUG(dbgs() << M); return true; } diff --git a/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index d9659694da4..52035c79a4a 100644 --- a/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/gnu/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -488,6 +488,13 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand() : cast<LoadInst>(I)->getPointerOperand(); + + // swifterror memory addresses are mem2reg promoted by instruction selection. + // As such they cannot have regular uses like an instrumentation function and + // it makes no sense to track them as memory. + if (Addr->isSwiftError()) + return false; + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; diff --git a/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 141e99b92cd..84f9373ae91 100644 --- a/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -41,6 +41,8 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); +static cl::opt<bool> DontProcessAdds("cvp-dont-process-adds", cl::init(true)); + namespace { class CorrelatedValuePropagation : public FunctionPass { public: @@ -405,6 +407,9 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { typedef OverflowingBinaryOperator OBO; + if (DontProcessAdds) + return false; + if (AddOp->getType()->isVectorTy() || hasLocalDefs(AddOp)) return false; diff --git a/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp index 90c26e13db7..f8e1d2e1a08 100644 --- a/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -200,13 +200,11 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { class GVNHoist { public: GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD, - MemorySSA *MSSA, bool OptForMinSize) - : DT(DT), AA(AA), MD(MD), MSSA(MSSA), OptForMinSize(OptForMinSize), - HoistingGeps(OptForMinSize), HoistedCtr(0) { - // Hoist as far as possible when optimizing for code-size. - if (OptForMinSize) - MaxNumberOfBBSInPath = -1; - } + MemorySSA *MSSA) + : DT(DT), AA(AA), MD(MD), MSSA(MSSA), + HoistingGeps(false), + HoistedCtr(0) + { } bool run(Function &F) { VN.setDomTree(DT); @@ -251,7 +249,6 @@ private: AliasAnalysis *AA; MemoryDependenceResults *MD; MemorySSA *MSSA; - const bool OptForMinSize; const bool HoistingGeps; DenseMap<const Value *, unsigned> DFSNumber; BBSideEffectsSet BBSideEffects; @@ -505,11 +502,6 @@ private: bool safeToHoistScalar(const BasicBlock *HoistBB, SmallPtrSetImpl<const BasicBlock *> &WL, int &NBBsOnAllPaths) { - // Enable scalar hoisting at -Oz as it is safe to hoist scalars to a place - // where they are partially needed. - if (OptForMinSize) - return true; - // Check that the hoisted expression is needed on all paths. if (!hoistingFromAllPaths(HoistBB, WL)) return false; @@ -923,13 +915,8 @@ private: Intr->getIntrinsicID() == Intrinsic::assume) continue; } - if (Call->mayHaveSideEffects()) { - if (!OptForMinSize) - break; - // We may continue hoisting across calls which write to memory. - if (Call->mayThrow()) - break; - } + if (Call->mayHaveSideEffects()) + break; if (Call->isConvergent()) break; @@ -971,7 +958,7 @@ public: auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); - GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize()); + GVNHoist G(&DT, &AA, &MD, &MSSA); return G.run(F); } @@ -991,7 +978,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { AliasAnalysis &AA = AM.getResult<AAManager>(F); MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); - GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize()); + GVNHoist G(&DT, &AA, &MD, &MSSA); if (!G.run(F)) return PreservedAnalyses::all(); diff --git a/gnu/llvm/lib/Transforms/Scalar/LICM.cpp b/gnu/llvm/lib/Transforms/Scalar/LICM.cpp index 4c15c8a32be..f51d11c04cb 100644 --- a/gnu/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1196,10 +1196,7 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, auto mergeLoop = [&](Loop *L) { // Loop over the body of this loop, looking for calls, invokes, and stores. - // Because subloops have already been incorporated into AST, we skip blocks - // in subloops. for (BasicBlock *BB : L->blocks()) - if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block }; diff --git a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 01728ae680d..194587a85e7 100644 --- a/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -158,8 +158,9 @@ struct MemAccessTy { bool operator!=(MemAccessTy Other) const { return !(*this == Other); } - static MemAccessTy getUnknown(LLVMContext &Ctx) { - return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace); + static MemAccessTy getUnknown(LLVMContext &Ctx, + unsigned AS = UnknownAddressSpace) { + return MemAccessTy(Type::getVoidTy(Ctx), AS); } }; @@ -2279,8 +2280,10 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, // TODO: Be less conservative when the type is similar and can use the same // addressing modes. if (Kind == LSRUse::Address) { - if (AccessTy != LU.AccessTy) - NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext()); + if (AccessTy.MemTy != LU.AccessTy.MemTy) { + NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), + AccessTy.AddrSpace); + } } // Conservatively assume HasBaseReg is true for now. diff --git a/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp b/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp index 6043e04bb8c..57e6e3ddad9 100644 --- a/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -85,6 +85,8 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); +STATISTIC(NumGVNNotMostDominatingLeader, + "Number of times a member dominated it's new classes' leader"); //===----------------------------------------------------------------------===// // GVN Pass @@ -1073,17 +1075,20 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, if (I == OldClass->NextLeader.first) OldClass->NextLeader = {nullptr, ~0U}; - // The new instruction and new class leader may either be siblings in the - // dominator tree, or the new class leader should dominate the new member - // instruction. We simply check that the member instruction does not properly - // dominate the new class leader. - assert( - !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader || - I == NewClass->RepLeader || - !DT->properlyDominates( + // It's possible, though unlikely, for us to discover equivalences such + // that the current leader does not dominate the old one. + // This statistic tracks how often this happens. + // We assert on phi nodes when this happens, currently, for debugging, because + // we want to make sure we name phi node cycles properly. + if (isa<Instruction>(NewClass->RepLeader) && NewClass->RepLeader && + I != NewClass->RepLeader && + DT->properlyDominates( I->getParent(), - cast<Instruction>(NewClass->RepLeader)->getParent()) && - "New class for instruction should not be dominated by instruction"); + cast<Instruction>(NewClass->RepLeader)->getParent())) { + ++NumGVNNotMostDominatingLeader; + assert(!isa<PHINode>(I) && + "New class for instruction should not be dominated by instruction"); + } if (NewClass->RepLeader != I) { auto DFSNum = InstrDFS.lookup(I); diff --git a/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp b/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp index 181a324861e..65c814d7a63 100644 --- a/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -1521,8 +1521,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) { if (CI->isNegative() && !CI->isMinValue(true)) { Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); - assert(!Duplicates.count(Factor) && - "Shouldn't have two constant factors, missed a canonicalize"); + if (!Duplicates.insert(Factor).second) + continue; unsigned Occ = ++FactorOccurrences[Factor]; if (Occ > MaxOcc) { MaxOcc = Occ; @@ -1534,8 +1534,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, APFloat F(CF->getValueAPF()); F.changeSign(); Factor = ConstantFP::get(CF->getContext(), F); - assert(!Duplicates.count(Factor) && - "Shouldn't have two constant factors, missed a canonicalize"); + if (!Duplicates.insert(Factor).second) + continue; unsigned Occ = ++FactorOccurrences[Factor]; if (Occ > MaxOcc) { MaxOcc = Occ; diff --git a/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp b/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp index 34be9069248..ede381c4c24 100644 --- a/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/gnu/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1705,7 +1705,10 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, // If this is an exact definition of this function, then we can propagate // information about its result into callsites of it. - if (F.hasExactDefinition()) + // Don't touch naked functions. They may contain asm returning a + // value we don't see, so we may end up interprocedurally propagating + // the return value incorrectly. + if (F.hasExactDefinition() && !F.hasFnAttribute(Attribute::Naked)) Solver.AddTrackedFunction(&F); // If this function only has direct calls that we can see, we can track its diff --git a/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp index f9a602bc268..e346ebd6a00 100644 --- a/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/gnu/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -189,11 +189,14 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB, assert(OriginalBB == OldLoop->getHeader() && "Header should be first in RPO"); + NewLoop = new Loop(); Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop()); - assert(NewLoopParent && - "Expected parent loop before sub-loop in RPO"); - NewLoop = new Loop; - NewLoopParent->addChildLoop(NewLoop); + + if (NewLoopParent) + NewLoopParent->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + NewLoop->addBasicBlockToLoop(ClonedBB, *LI); return OldLoop; } else { diff --git a/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 85da3ba899a..d3ea1564115 100644 --- a/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/gnu/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -302,17 +302,22 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, } NewLoopsMap NewLoops; - NewLoops[L] = NewLoop; + if (NewLoop) + NewLoops[L] = NewLoop; + else if (ParentLoop) + NewLoops[L] = ParentLoop; + // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - - if (NewLoop) { + + // If we're unrolling the outermost loop, there's no remainder loop, + // and this block isn't in a nested loop, then the new block is not + // in any loop. Otherwise, add it to loopinfo. + if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); - } else if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; if (Header == *BB) { diff --git a/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6e30919246c..7b0bddbbb83 100644 --- a/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/gnu/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1436,6 +1436,14 @@ static bool canSinkInstructions( if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || I->getType()->isTokenTy()) return false; + + // Conservatively return false if I is an inline-asm instruction. Sinking + // and merging inline-asm instructions can potentially create arguments + // that cannot satisfy the inline-asm constraints. + if (const auto *C = dyn_cast<CallInst>(I)) + if (C->isInlineAsm()) + return false; + // Everything must have only one use too, apart from stores which // have no uses. if (!isa<StoreInst>(I) && !I->hasOneUse()) diff --git a/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1b1f86f8efd..dac7032fa08 100644 --- a/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/gnu/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5602,6 +5602,13 @@ void LoopVectorizationLegality::collectLoopUniforms() { // is consecutive-like, the pointer operand should remain uniform. else if (hasConsecutiveLikePtrOperand(&I)) ConsecutiveLikePtrs.insert(Ptr); + + // Otherwise, if the memory instruction will be vectorized and its + // pointer operand is non-consecutive-like, the memory instruction should + // be a gather or scatter operation. Its pointer operand will be + // non-uniform. + else + PossibleNonUniformPtrs.insert(Ptr); } // Add to the Worklist all consecutive and consecutive-like pointers that diff --git a/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1c7cbc7edf9..328f2700296 100644 --- a/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/gnu/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4026,40 +4026,36 @@ bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { if (!V) return false; - Value *P = V->getParent(); - - // Vectorize in current basic block only. - auto *Op0 = dyn_cast<Instruction>(V->getOperand(0)); - auto *Op1 = dyn_cast<Instruction>(V->getOperand(1)); - if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P) - return false; - // Try to vectorize V. - if (tryToVectorizePair(Op0, Op1, R)) + if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) return true; - auto *A = dyn_cast<BinaryOperator>(Op0); - auto *B = dyn_cast<BinaryOperator>(Op1); + BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); + BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); // Try to skip B. if (B && B->hasOneUse()) { - auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); - auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); - if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R)) + BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); + BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); + if (tryToVectorizePair(A, B0, R)) { return true; - if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R)) + } + if (tryToVectorizePair(A, B1, R)) { return true; + } } // Try to skip A. if (A && A->hasOneUse()) { - auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); - auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); - if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R)) + BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); + BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); + if (tryToVectorizePair(A0, B, R)) { return true; - if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R)) + } + if (tryToVectorizePair(A1, B, R)) { return true; + } } - return false; + return 0; } /// \brief Generate a shuffle mask to be used in a reduction tree. @@ -4511,143 +4507,29 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } -namespace { -/// Tracks instructons and its children. -class WeakVHWithLevel final : public CallbackVH { - /// Operand index of the instruction currently beeing analized. - unsigned Level = 0; - /// Is this the instruction that should be vectorized, or are we now - /// processing children (i.e. operands of this instruction) for potential - /// vectorization? - bool IsInitial = true; - -public: - explicit WeakVHWithLevel() = default; - WeakVHWithLevel(Value *V) : CallbackVH(V){}; - /// Restart children analysis each time it is repaced by the new instruction. - void allUsesReplacedWith(Value *New) override { - setValPtr(New); - Level = 0; - IsInitial = true; - } - /// Check if the instruction was not deleted during vectorization. - bool isValid() const { return !getValPtr(); } - /// Is the istruction itself must be vectorized? - bool isInitial() const { return IsInitial; } - /// Try to vectorize children. - void clearInitial() { IsInitial = false; } - /// Are all children processed already? - bool isFinal() const { - assert(getValPtr() && - (isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() >= Level)); - return getValPtr() && - cast<Instruction>(getValPtr())->getNumOperands() == Level; - } - /// Get next child operation. - Value *nextOperand() { - assert(getValPtr() && isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() > Level); - return cast<Instruction>(getValPtr())->getOperand(Level++); - } - virtual ~WeakVHWithLevel() = default; -}; -} // namespace - /// \brief Attempt to reduce a horizontal reduction. /// If it is legal to match a horizontal reduction feeding -/// the phi node P with reduction operators Root in a basic block BB, then check -/// if it can be done. +/// the phi node P with reduction operators BI, then check if it +/// can be done. /// \returns true if a horizontal reduction was matched and reduced. /// \returns false if a horizontal reduction was not matched. -static bool canBeVectorized( - PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, - TargetTransformInfo *TTI, - const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) { +static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI, + BoUpSLP &R, TargetTransformInfo *TTI, + unsigned MinRegSize) { if (!ShouldVectorizeHor) return false; - if (!Root) - return false; - - if (Root->getParent() != BB) + HorizontalReduction HorRdx(MinRegSize); + if (!HorRdx.matchAssociativeReduction(P, BI)) return false; - SmallVector<WeakVHWithLevel, 8> Stack(1, Root); - SmallSet<Value *, 8> VisitedInstrs; - bool Res = false; - while (!Stack.empty()) { - Value *V = Stack.back(); - if (!V) { - Stack.pop_back(); - continue; - } - auto *Inst = dyn_cast<Instruction>(V); - if (!Inst || isa<PHINode>(Inst)) { - Stack.pop_back(); - continue; - } - if (Stack.back().isInitial()) { - Stack.back().clearInitial(); - if (auto *BI = dyn_cast<BinaryOperator>(Inst)) { - HorizontalReduction HorRdx(R.getMinVecRegSize()); - if (HorRdx.matchAssociativeReduction(P, BI)) { - // If there is a sufficient number of reduction values, reduce - // to a nearby power-of-2. Can safely generate oversized - // vectors and rely on the backend to split them to legal sizes. - HorRdx.ReduxWidth = - std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); - - if (HorRdx.tryToReduce(R, TTI)) { - Res = true; - P = nullptr; - continue; - } - } - if (P) { - Inst = dyn_cast<Instruction>(BI->getOperand(0)); - if (Inst == P) - Inst = dyn_cast<Instruction>(BI->getOperand(1)); - if (!Inst) { - P = nullptr; - continue; - } - } - } - P = nullptr; - if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) { - Res = true; - continue; - } - } - if (Stack.back().isFinal()) { - Stack.pop_back(); - continue; - } - if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand())) - if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second && - Stack.size() < RecursionMaxDepth) - Stack.push_back(NextV); - } - return Res; -} - -bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, - BasicBlock *BB, BoUpSLP &R, - TargetTransformInfo *TTI) { - if (!V) - return false; - auto *I = dyn_cast<Instruction>(V); - if (!I) - return false; + // If there is a sufficient number of reduction values, reduce + // to a nearby power-of-2. Can safely generate oversized + // vectors and rely on the backend to split them to legal sizes. + HorRdx.ReduxWidth = + std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); - if (!isa<BinaryOperator>(I)) - P = nullptr; - // Try to match and vectorize a horizontal reduction. - return canBeVectorized(P, I, BB, R, TTI, - [this](BinaryOperator *BI, BoUpSLP &R) -> bool { - return tryToVectorize(BI, R); - }); + return HorRdx.tryToReduce(R, TTI); } bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { @@ -4717,42 +4599,67 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (P->getNumIncomingValues() != 2) return Changed; + Value *Rdx = getReductionValue(DT, P, BB, LI); + + // Check if this is a Binary Operator. + BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); + if (!BI) + continue; + // Try to match and vectorize a horizontal reduction. - if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R, - TTI)) { + if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) { Changed = true; it = BB->begin(); e = BB->end(); continue; } + + Value *Inst = BI->getOperand(0); + if (Inst == P) + Inst = BI->getOperand(1); + + if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) { + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } + continue; } - if (ShouldStartVectorizeHorAtStore) { - if (StoreInst *SI = dyn_cast<StoreInst>(it)) { - // Try to match and vectorize a horizontal reduction. - if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R, - TTI)) { - Changed = true; - it = BB->begin(); - e = BB->end(); - continue; + if (ShouldStartVectorizeHorAtStore) + if (StoreInst *SI = dyn_cast<StoreInst>(it)) + if (BinaryOperator *BinOp = + dyn_cast<BinaryOperator>(SI->getValueOperand())) { + if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI, + R.getMinVecRegSize()) || + tryToVectorize(BinOp, R)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } } - } - } // Try to vectorize horizontal reductions feeding into a return. - if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) { - if (RI->getNumOperands() != 0) { - // Try to match and vectorize a horizontal reduction. - if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) { - Changed = true; - it = BB->begin(); - e = BB->end(); - continue; + if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) + if (RI->getNumOperands() != 0) + if (BinaryOperator *BinOp = + dyn_cast<BinaryOperator>(RI->getOperand(0))) { + DEBUG(dbgs() << "SLP: Found a return to vectorize.\n"); + if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI, + R.getMinVecRegSize()) || + tryToVectorizePair(BinOp->getOperand(0), BinOp->getOperand(1), + R)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } } - } - } // Try to vectorize trees that start at compare instructions. if (CmpInst *CI = dyn_cast<CmpInst>(it)) { @@ -4765,14 +4672,16 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { continue; } - for (int I = 0; I < 2; ++I) { - if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) { - Changed = true; - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. - it = BB->begin(); - e = BB->end(); - break; + for (int i = 0; i < 2; ++i) { + if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) { + if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) { + Changed = true; + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + it = BB->begin(); + e = BB->end(); + break; + } } } continue; |
