diff options
Diffstat (limited to 'gnu/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp')
| -rw-r--r-- | gnu/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp | 1208 |
1 files changed, 935 insertions, 273 deletions
diff --git a/gnu/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/gnu/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index cf430f860fd..26817ac330b 100644 --- a/gnu/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/gnu/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -65,6 +65,8 @@ public: for (auto &C : CS->captures()) { if (C.capturesVariable() || C.capturesVariableByCopy()) { auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); DeclRefExpr DRE(const_cast<VarDecl *>(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -119,6 +121,14 @@ public: /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { + CodeGenFunction::OMPPrivateScope PreCondScope(CGF); + for (auto *E : S.counters()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + (void)PreCondScope.addPrivate(VD, [&CGF, VD]() { + return CGF.CreateMemTemp(VD->getType().getNonReferenceType()); + }); + } + (void)PreCondScope.Privatize(); if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { for (const auto *I : PreInits->decls()) @@ -134,8 +144,93 @@ public: } }; +class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { + CodeGenFunction::OMPPrivateScope InlinedShareds; + + static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { + return CGF.LambdaCaptureFields.lookup(VD) || + (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || + (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && + cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); + } + +public: + OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), + InlinedShareds(CGF) { + for (const auto *C : S.clauses()) { + if (auto *CPI = OMPClauseWithPreInit::get(C)) { + if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) + CGF.EmitVarDecl(cast<VarDecl>(*I)); + else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { + for (const Expr *E : UDP->varlists()) { + const Decl *D = cast<DeclRefExpr>(E)->getDecl(); + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) + CGF.EmitVarDecl(*OED); + } + } + } + if (!isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPPrivateClause(S, InlinedShareds); + if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { + if (const Expr *E = TG->getReductionRef()) + CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); + } + const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); + while (CS) { + for (auto &C : CS->captures()) { + if (C.capturesVariable() || C.capturesVariableByCopy()) { + auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); + DeclRefExpr DRE(const_cast<VarDecl *>(VD), + isCapturedVar(CGF, VD) || + (CGF.CapturedStmtInfo && + InlinedShareds.isGlobalVarCaptured(VD)), + VD->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); + InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } + } + CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); + } + (void)InlinedShareds.Privatize(); + } +}; + } // namespace +static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen); + +LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { + if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { + if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { + OrigVD = OrigVD->getCanonicalDecl(); + bool IsCaptured = + LambdaCaptureFields.lookup(OrigVD) || + (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || + (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, + OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); + return EmitLValue(&DRE); + } + } + return EmitLValue(E); +} + llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { auto &C = getContext(); llvm::Value *Size = nullptr; @@ -236,6 +331,12 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) { } if (T->isPointerType()) return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); + if (auto *A = T->getAsArrayTypeUnsafe()) { + if (auto *VLA = dyn_cast<VariableArrayType>(A)) + return getCanonicalParamType(C, VLA->getElementType()); + else if (!A->isVariablyModifiedType()) + return C.getCanonicalType(T); + } return C.getCanonicalParamType(T); } @@ -246,12 +347,12 @@ namespace { const CapturedStmt *S = nullptr; /// true if cast to/from UIntPtr is required for variables captured by /// value. - bool UIntPtrCastRequired = true; - /// true if only casted argumefnts must be registered as local args or VLA + const bool UIntPtrCastRequired = true; + /// true if only casted arguments must be registered as local args or VLA /// sizes. - bool RegisterCastedArgsOnly = false; + const bool RegisterCastedArgsOnly = false; /// Name of the generated function. - StringRef FunctionName; + const StringRef FunctionName; explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, bool RegisterCastedArgsOnly, StringRef FunctionName) @@ -261,9 +362,9 @@ namespace { }; } -static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( +static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, - llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> &LocalAddrs, llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> &VLASizes, @@ -276,10 +377,23 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( // Build the argument list. CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); - bool HasUIntPtrArgs = false; + FunctionArgList TargetArgs; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); + TargetArgs.append( + CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); auto I = FO.S->captures().begin(); + FunctionDecl *DebugFunctionDecl = nullptr; + if (!FO.UIntPtrCastRequired) { + FunctionProtoType::ExtProtoInfo EPI; + DebugFunctionDecl = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(), + SourceLocation(), DeclarationName(), Ctx.VoidTy, + Ctx.getTrivialTypeSourceInfo( + Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), + SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); + } for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; @@ -292,7 +406,6 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( // outlined function. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || I->capturesVariableArrayType()) { - HasUIntPtrArgs = true; if (FO.UIntPtrCastRequired) ArgType = Ctx.getUIntPtrType(); } @@ -307,20 +420,36 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( II = &Ctx.Idents.get("vla"); } if (ArgType->isVariablyModifiedType()) - ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); - Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, - FD->getLocation(), II, ArgType, - ImplicitParamDecl::Other)); + ArgType = getCanonicalParamType(Ctx, ArgType); + VarDecl *Arg; + if (DebugFunctionDecl && (CapVar || I->capturesThis())) { + Arg = ParmVarDecl::Create( + Ctx, DebugFunctionDecl, + CapVar ? CapVar->getLocStart() : FD->getLocStart(), + CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, + /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); + } else { + Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), + II, ArgType, ImplicitParamDecl::Other); + } + Args.emplace_back(Arg); + // Do not cast arguments if we emit function with non-original types. + TargetArgs.emplace_back( + FO.UIntPtrCastRequired + ? Arg + : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); ++I; } Args.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); + TargetArgs.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); // Create the function declaration. - FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Function *F = @@ -328,19 +457,26 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( FO.FunctionName, &CGM.getModule()); CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); if (CD->isNothrow()) - F->addFnAttr(llvm::Attribute::NoUnwind); + F->setDoesNotThrow(); // Generate the function. - CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), - CD->getBody()->getLocStart()); + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, + FO.S->getLocStart(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { + // Do not map arguments if we emit function with non-original types. + Address LocalAddr(Address::invalid()); + if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { + LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], + TargetArgs[Cnt]); + } else { + LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); + } // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If the variable is a reference we need to materialize it here. if (CurVD->getType()->isReferenceType()) { Address RefAddr = CGF.CreateMemTemp( @@ -356,15 +492,14 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( continue; } - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), - Args[Cnt]->getType(), BaseInfo); + LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), + AlignmentSource::Decl); if (FD->hasCapturedVLAType()) { if (FO.UIntPtrCastRequired) { ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), ArgLVal), - FD->getType(), BaseInfo); + FD->getType(), AlignmentSource::Decl); } auto *ExprArg = CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); @@ -376,8 +511,7 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { if (ArgLVal.getType()->isLValueReferenceType()) { - ArgAddr = CGF.EmitLoadOfReference( - ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { assert(ArgLVal.getType()->isPointerType()); ArgAddr = CGF.EmitLoadOfPointer( @@ -412,7 +546,7 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( ++I; } - return {F, HasUIntPtrArgs}; + return F; } llvm::Function * @@ -426,14 +560,17 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { getDebugInfo() && CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; FunctionArgList Args; - llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << CapturedStmtInfo->getHelperName(); + if (NeedWrapperFunction) + Out << "_debug__"; FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - CapturedStmtInfo->getHelperName()); - llvm::Function *F; - bool HasUIntPtrArgs; - std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( - *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); + Out.str()); + llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, + VLASizes, CXXThisValue, FO); for (const auto &LocalAddrPair : LocalAddrs) { if (LocalAddrPair.second.first) { setAddrOfLocalVar(LocalAddrPair.second.first, @@ -445,28 +582,28 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); FinishFunction(CD->getBodyRBrace()); - if (!NeedWrapperFunction || !HasUIntPtrArgs) + if (!NeedWrapperFunction) return F; FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, /*RegisterCastedArgsOnly=*/true, - ".nondebug_wrapper."); + CapturedStmtInfo->getHelperName()); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); - WrapperCGF.disableDebugInfo(); Args.clear(); LocalAddrs.clear(); VLASizes.clear(); llvm::Function *WrapperF = emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, - WrapperCGF.CXXThisValue, WrapperFO).first; - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + WrapperCGF.CXXThisValue, WrapperFO); llvm::SmallVector<llvm::Value *, 4> CallArgs; for (const auto *Arg : Args) { llvm::Value *CallArg; auto I = LocalAddrs.find(Arg); if (I != LocalAddrs.end()) { - LValue LV = - WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); + LValue LV = WrapperCGF.MakeAddrLValue( + I->second.second, + I->second.first ? I->second.first->getType() : Arg->getType(), + AlignmentSource::Decl); CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); } else { auto EI = VLASizes.find(Arg); @@ -474,13 +611,15 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { CallArg = EI->second.second; else { LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), - Arg->getType(), BaseInfo); + Arg->getType(), + AlignmentSource::Decl); CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); } } - CallArgs.emplace_back(CallArg); + CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); } - WrapperCGF.Builder.CreateCall(F, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), + F, CallArgs); WrapperCGF.FinishFunction(); return WrapperF; } @@ -789,7 +928,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { HasAtLeastOneLastprivate = true; - if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) + if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && + !getLangOpts().OpenMPSimd) break; auto IRef = C->varlist_begin(); auto IDestRef = C->destination_exprs().begin(); @@ -956,7 +1096,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - if (isa<OMPArraySectionExpr>(IRef)) { + QualType Type = PrivateVD->getType(); + bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); + if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { @@ -965,7 +1107,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { return GetAddrOfLocalVar(PrivateVD); }); - } else if (isa<ArraySubscriptExpr>(IRef)) { + } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || + isa<ArraySubscriptExpr>(IRef)) { // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { @@ -1023,8 +1166,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( if (HasAtLeastOneReduction) { bool WithNowait = D.getSingleClause<OMPNowaitClause>() || isOpenMPParallelDirective(D.getDirectiveKind()) || - D.getDirectiveKind() == OMPD_simd; - bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; + ReductionKind == OMPD_simd; + bool SimpleReduction = ReductionKind == OMPD_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( @@ -1146,9 +1289,13 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, EmitIgnoredExpr(I); } // Update the linear variables. - for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { - for (auto *U : C->updates()) - EmitIgnoredExpr(U); + // In distribute directives only loop counters may be marked as linear, no + // need to generate the code for them. + if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { + for (auto *U : C->updates()) + EmitIgnoredExpr(U); + } } // On a continue in the body, jump to the end. @@ -1488,83 +1635,104 @@ static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, CGF.EmitStopPoint(&S); } -void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - // if (PreCond) { - // for (IV in 0..LastIteration) BODY; - // <Final counter/linear vars updates>; - // } - // +/// Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast<VarDecl>(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} - // Emit: if (PreCond) - begin. - // If the condition constant folds and can be elided, avoid emitting the - // whole loop. - bool CondConstant; - llvm::BasicBlock *ContBlock = nullptr; - if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { - if (!CondConstant) - return; - } else { - auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); - ContBlock = CGF.createBasicBlock("simd.if.end"); - emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, - CGF.getProfileCount(&S)); - CGF.EmitBlock(ThenBlock); - CGF.incrementProfileCounter(&S); - } +static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + assert(isOpenMPSimdDirective(S.getDirectiveKind()) && + "Expected simd directive"); + OMPLoopScope PreInitScope(CGF, S); + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // <Final counter/linear vars updates>; + // } + // + if (isOpenMPDistributeDirective(S.getDirectiveKind()) || + isOpenMPWorksharingDirective(S.getDirectiveKind()) || + isOpenMPTaskLoopDirective(S.getDirectiveKind())) { + (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); + (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); + } - // Emit the loop iteration variable. - const Expr *IVExpr = S.getIterationVariable(); - const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); - CGF.EmitVarDecl(*IVDecl); - CGF.EmitIgnoredExpr(S.getInit()); + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); + ContBlock = CGF.createBasicBlock("simd.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } - // Emit the iterations count variable. - // If it is not a variable, Sema decided to calculate iterations count on - // each iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { - CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); - // Emit calculation of the iterations count. - CGF.EmitIgnoredExpr(S.getCalcLastIteration()); - } + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); - CGF.EmitOMPSimdInit(S); + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } - emitAlignedClause(CGF, S); - (void)CGF.EmitOMPLinearClauseInit(S); - { - OMPPrivateScope LoopScope(CGF); - CGF.EmitOMPPrivateLoopCounters(S, LoopScope); - CGF.EmitOMPLinearClause(S, LoopScope); - CGF.EmitOMPPrivateClause(S, LoopScope); - CGF.EmitOMPReductionClauseInit(S, LoopScope); - bool HasLastprivateClause = - CGF.EmitOMPLastprivateClauseInit(S, LoopScope); - (void)LoopScope.Privatize(); - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, JumpDest()); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); - CGF.EmitOMPSimdFinal( - S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); - // Emit final copy of the lastprivate variables at the end of loops. - if (HasLastprivateClause) - CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); - CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); - emitPostUpdateForReductionClause( - CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); - } - CGF.EmitOMPLinearClauseFinal( + CGF.EmitOMPSimdInit(S); + + emitAlignedClause(CGF, S); + (void)CGF.EmitOMPLinearClauseInit(S); + { + CodeGenFunction::OMPPrivateScope LoopScope(CGF); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + CGF.EmitOMPLinearClause(S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + CGF.EmitOMPReductionClauseInit(S, LoopScope); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + CGF.EmitOMPSimdFinal( S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); - // Emit: if (PreCond) - end. - if (ContBlock) { - CGF.EmitBranch(ContBlock); - CGF.EmitBlock(ContBlock, true); - } + // Emit final copy of the lastprivate variables at the end of loops. + if (HasLastprivateClause) + CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + } + CGF.EmitOMPLinearClauseFinal( + S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } +} + +void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); @@ -1669,7 +1837,8 @@ void CodeGenFunction::EmitOMPOuterLoop( // Tell the runtime we are done. auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { if (!DynamicOrOrdered) - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } @@ -1753,9 +1922,11 @@ void CodeGenFunction::EmitOMPForOuterLoop( RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, DipatchRTInputValues); } else { - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, - LoopArgs.ST, LoopArgs.Chunk); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, + LoopArgs.ST, LoopArgs.Chunk); + RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + ScheduleKind, StaticInit); } auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, @@ -1797,10 +1968,10 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, /* Ordered = */ false, LoopArgs.IL, - LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, - LoopArgs.Chunk); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, + LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); // for combined 'distribute' and 'for' the increment expression of distribute // is store in DistInc. For 'distribute' alone, it is in Inc. @@ -1841,14 +2012,6 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( emitEmptyOrdered); } -/// Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast<VarDecl>(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - static std::pair<LValue, LValue> emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { @@ -1929,13 +2092,27 @@ emitInnerParallelForWhenCombined(CodeGenFunction &CGF, CodeGenFunction::JumpDest LoopExit) { auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + bool HasCancel = false; + if (!isOpenMPSimdDirective(S.getDirectiveKind())) { + if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + else if (const auto *D = + dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + } + CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), + HasCancel); CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), emitDistributeParallelForInnerBounds, emitDistributeParallelForDispatchBounds); }; emitCommonOMPParallelDirective( - CGF, S, OMPD_for, CGInlinedWorksharingLoop, + CGF, S, + isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, + CGInlinedWorksharingLoop, emitDistributeParallelForDistributeInnerBoundParams); } @@ -1946,119 +2123,53 @@ void CodeGenFunction::EmitOMPDistributeParallelForDirective( S.getDistInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, - /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( const OMPDistributeParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_parallel_for_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } void CodeGenFunction::EmitOMPDistributeSimdDirective( const OMPDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( - const OMPTargetParallelForSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_parallel_for_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); +void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); } void CodeGenFunction::EmitOMPTargetSimdDirective( const OMPTargetSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeDirective( - const OMPTeamsDistributeDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( - const OMPTeamsDistributeSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( - const OMPTeamsDistributeParallelForSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute_parallel_for_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( - const OMPTeamsDistributeParallelForDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute_parallel_for, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( - const OMPTargetTeamsDistributeDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams_distribute, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( const OMPTargetTeamsDistributeParallelForDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute_parallel_for, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2069,6 +2180,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute_parallel_for_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2077,16 +2189,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( }); } -void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( - const OMPTargetTeamsDistributeSimdDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams_distribute_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2209,10 +2311,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, Ordered, - IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); + RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + ScheduleKind, StaticInit); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); @@ -2230,7 +2333,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } else { @@ -2444,10 +2548,11 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // Emit static non-chunked loop. OpenMPScheduleTy ScheduleKind; ScheduleKind.Schedule = OMPC_SCHEDULE_static; + CGOpenMPRuntime::StaticRTInput StaticInit( + /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); CGF.CGM.getOpenMPRuntime().emitForStaticInit( - CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, - /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); // UB = min(UB, GlobalUB); auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); auto *MinUBGlobalUB = CGF.Builder.CreateSelect( @@ -2460,7 +2565,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { [](CodeGenFunction &) {}); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); @@ -2731,6 +2837,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, OMPPrivateScope Scope(CGF); if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( @@ -2762,7 +2869,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.EmitRuntimeCall(CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); for (auto &&Pair : LastprivateDstsOrigs) { auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); DeclRefExpr DRE( @@ -2808,7 +2916,57 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, RedCG, Cnt); } } + // Privatize all private variables except for in_reduction items. (void)Scope.Privatize(); + SmallVector<const Expr *, 4> InRedVars; + SmallVector<const Expr *, 4> InRedPrivs; + SmallVector<const Expr *, 4> InRedOps; + SmallVector<const Expr *, 4> TaskgroupDescriptors; + for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ITD = C->taskgroup_descriptors().begin(); + for (const auto *Ref : C->varlists()) { + InRedVars.emplace_back(Ref); + InRedPrivs.emplace_back(*IPriv); + InRedOps.emplace_back(*IRed); + TaskgroupDescriptors.emplace_back(*ITD); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ITD, 1); + } + } + // Privatize in_reduction items here, because taskgroup descriptors must be + // privatized earlier. + OMPPrivateScope InRedScope(CGF); + if (!InRedVars.empty()) { + ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); + for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + // The taskgroup descriptor variable is always implicit firstprivate and + // privatized already during procoessing of the firstprivates. + llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( + CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = Address( + CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } + (void)InRedScope.Privatize(); Action.Enter(CGF); BodyGen(CGF); @@ -2820,6 +2978,151 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, TaskGen(*this, OutlinedFn, Data); } +static ImplicitParamDecl * +createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, + QualType Ty, CapturedDecl *CD) { + auto *OrigVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *OrigRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), Ty, VK_LValue); + auto *PrivateVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *PrivateRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, + /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty, + VK_LValue); + QualType ElemType = C.getBaseElementType(Ty); + auto *InitVD = + ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr, + ElemType, ImplicitParamDecl::Other); + auto *InitRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), ElemType, VK_LValue); + PrivateVD->setInitStyle(VarDecl::CInit); + PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, + InitRef, /*BasePath=*/nullptr, + VK_RValue)); + Data.FirstprivateVars.emplace_back(OrigRef); + Data.FirstprivateCopies.emplace_back(PrivateRef); + Data.FirstprivateInits.emplace_back(InitRef); + return OrigVD; +} + +void CodeGenFunction::EmitOMPTargetTaskBasedDirective( + const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo) { + // Emit outlined function for task construct. + auto CS = S.getCapturedStmt(OMPD_task); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + auto *I = CS->getCapturedDecl()->param_begin(); + auto *PartId = std::next(I); + auto *TaskT = std::next(I, 4); + OMPTaskDataTy Data; + // The task is not final. + Data.Final.setInt(/*IntVal=*/false); + // Get list of firstprivate variables. + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (auto *IInit : C->private_copies()) { + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); + ++IRef; + ++IElemInitRef; + } + } + OMPPrivateScope TargetScope(*this); + VarDecl *BPVD = nullptr; + VarDecl *PVD = nullptr; + VarDecl *SVD = nullptr; + if (InputInfo.NumberOfTargetItems > 0) { + auto *CD = CapturedDecl::Create( + getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); + llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); + QualType BaseAndPointersType = getContext().getConstantArrayType( + getContext().VoidPtrTy, ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + BPVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + PVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + QualType SizesType = getContext().getConstantArrayType( + getContext().getSizeType(), ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD); + TargetScope.addPrivate( + BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); + TargetScope.addPrivate(PVD, + [&InputInfo]() { return InputInfo.PointersArray; }); + TargetScope.addPrivate(SVD, + [&InputInfo]() { return InputInfo.SizesArray; }); + } + (void)TargetScope.Privatize(); + // Build list of dependences. + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) + for (auto *IRef : C->varlists()) + Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, + &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { + // Set proper addresses for generated private copies. + OMPPrivateScope Scope(CGF); + if (!Data.FirstprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; + llvm::SmallVector<llvm::Value *, 16> CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : Data.FirstprivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + // Privatize all private variables except for in_reduction items. + (void)Scope.Privatize(); + InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + + Action.Enter(CGF); + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true, + /*EmitPreInitStmt=*/false); + BodyGen(CGF); + }; + auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, + Data.NumberOfParts); + llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); + IntegerLiteral IfCond(getContext(), TrueOrFalse, + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + + CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, + SharedsTy, CapturedStruct, &IfCond, Data); +} + void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); @@ -2867,6 +3170,35 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + if (const Expr *E = S.getReductionRef()) { + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + OMPTaskDataTy Data; + for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + llvm::Value *ReductionDesc = + CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), + LHSs, RHSs, Data); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + CGF.EmitVarDecl(*VD); + CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), + /*Volatile=*/false, E->getType()); + } CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2923,6 +3255,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, incrementProfileCounter(&S); } + emitAlignedClause(*this, S); // Emit 'then' code. { // Emit helper vars inits. @@ -2944,14 +3277,18 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { - // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables and post-update of + // Emit implicit barrier to synchronize threads and avoid data races + // on initialization of firstprivate variables and post-update of // lastprivate variables. CGM.getOpenMPRuntime().emitBarrierCall( - *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); + if (isOpenMPSimdDirective(S.getDirectiveKind()) && + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPTeamsDirective(S.getDirectiveKind())) + EmitOMPReductionClauseInit(S, LoopScope); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); (void)LoopScope.Privatize(); @@ -2964,8 +3301,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, if (const auto *Ch = C->getChunkSize()) { Chunk = EmitScalarExpr(Ch); Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType(), - S.getLocStart()); + S.getIterationVariable()->getType(), + S.getLocStart()); } } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); @@ -2981,10 +3318,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // league. The size of the chunks is unspecified in this case. if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr)) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + EmitOMPSimdInit(S, /*IsMonotonic=*/true); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, /* Ordered = */ false, - IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + StaticInit); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); @@ -3011,7 +3351,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForStaticFinish(*this, S.getLocStart()); + RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. @@ -3021,13 +3361,38 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, CodeGenLoop); } - + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); + } + OpenMPDirectiveKind ReductionKind = OMPD_unknown; + if (isOpenMPParallelDirective(S.getDirectiveKind()) && + isOpenMPSimdDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_parallel_for_simd; + } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_parallel_for; + } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_simd; + } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && + S.hasClausesOfKind<OMPReductionClause>()) { + llvm_unreachable( + "No reduction clauses is allowed in distribute directive."); + } + EmitOMPReductionClauseFinal(S, ReductionKind); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause( + *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); // Emit final copy of the lastprivate variables if IsLastIter != 0. - if (HasLastprivateClause) + if (HasLastprivateClause) { EmitOMPLastprivateClauseFinal( S, /*NoFinals=*/false, - Builder.CreateIsNotNull( - EmitLoadOfScalar(IL, S.getLocStart()))); + Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); + } } // We're now done with the loop, so jump to the continuation block. @@ -3045,8 +3410,7 @@ void CodeGenFunction::EmitOMPDistributeDirective( CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, - false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, @@ -3073,7 +3437,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); - CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + OutlinedFn, CapturedVars); } else { Action.Enter(CGF); CGF.EmitStmt( @@ -3259,6 +3624,7 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, case BO_GE: case BO_EQ: case BO_NE: + case BO_Cmp: case BO_AddAssign: case BO_SubAssign: case BO_AndAssign: @@ -3470,6 +3836,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_lastprivate: case OMPC_reduction: case OMPC_task_reduction: + case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: @@ -3552,7 +3919,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) { assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); CodeGenModule &CGM = CGF.CGM; - const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); + const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -3675,7 +4042,7 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; - emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -3684,11 +4051,20 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, const OMPTargetTeamsDirective &S) { auto *CS = S.getCapturedStmt(OMPD_teams); Action.Enter(CGF); - auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { - // TODO: Add support for clauses. + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + Action.Enter(CGF); CGF.EmitStmt(CS->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( @@ -3713,6 +4089,183 @@ void CodeGenFunction::EmitOMPTargetTeamsDirective( emitCommonOMPTargetDirective(*this, S, CodeGen); } +static void +emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDistributeDirective &S) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( + const OMPTargetTeamsDistributeDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void emitTargetTeamsDistributeSimdRegion( + CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDistributeSimdDirective &S) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeSimdRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( + const OMPTargetTeamsDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeSimdRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +void CodeGenFunction::EmitOMPTeamsDistributeDirective( + const OMPTeamsDistributeDirective &S) { + + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( + const OMPTeamsDistributeSimdDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( + const OMPTeamsDistributeParallelForDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( + const OMPTeamsDistributeParallelForSimdDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + void CodeGenFunction::EmitOMPCancellationPointDirective( const OMPCancellationPointDirective &S) { CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), @@ -3740,7 +4293,9 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || Kind == OMPD_distribute_parallel_for || - Kind == OMPD_target_parallel_for); + Kind == OMPD_target_parallel_for || + Kind == OMPD_teams_distribute_parallel_for || + Kind == OMPD_target_teams_distribute_parallel_for); return OMPCancelStack.getExitBlock(); } @@ -3913,6 +4468,7 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } @@ -3933,6 +4489,7 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } @@ -3980,9 +4537,81 @@ void CodeGenFunction::EmitOMPTargetParallelDirective( emitCommonOMPTargetDirective(*this, S, CodeGen); } +static void emitTargetParallelForRegion(CodeGenFunction &CGF, + const OMPTargetParallelForDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, OMPD_target_parallel_for, S.hasCancel()); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + void CodeGenFunction::EmitOMPTargetParallelForDirective( const OMPTargetParallelForDirective &S) { - // TODO: codegen for target parallel for. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void +emitTargetParallelForSimdRegion(CodeGenFunction &CGF, + const OMPTargetParallelForSimdDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForSimdDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( + const OMPTargetParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } /// Emit a helper variable and return corresponding lvalue. @@ -4160,5 +4789,38 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } + +void CodeGenFunction::EmitSimpleOMPExecutableDirective( + const OMPExecutableDirective &D) { + if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) + return; + auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { + if (isOpenMPSimdDirective(D.getDirectiveKind())) { + emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); + } else { + if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { + for (const auto *E : LD->counters()) { + if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( + cast<DeclRefExpr>(E)->getDecl())) { + // Emit only those that were not explicitly referenced in clauses. + if (!CGF.LocalDeclMap.count(VD)) + CGF.EmitVarDecl(*VD); + } + } + } + const auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + while (const auto *CCS = dyn_cast<CapturedStmt>(CS->getCapturedStmt())) + CS = CCS; + CGF.EmitStmt(CS->getCapturedStmt()); + } + }; + OMPSimdLexicalScope Scope(*this, D); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, + isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd + : D.getDirectiveKind(), + CodeGen); +} |
