diff options
author | 2020-08-03 14:31:31 +0000 | |
---|---|---|
committer | 2020-08-03 14:31:31 +0000 | |
commit | e5dd70708596ae51455a0ffa086a00c5b29f8583 (patch) | |
tree | 5d676f27b570bacf71e786c3b5cff3e6f6679b59 /gnu/llvm/clang/lib/CodeGen/CGStmtOpenMP.cpp | |
parent | Import LLVM 10.0.0 release including clang, lld and lldb. (diff) | |
download | wireguard-openbsd-e5dd70708596ae51455a0ffa086a00c5b29f8583.tar.xz wireguard-openbsd-e5dd70708596ae51455a0ffa086a00c5b29f8583.zip |
Import LLVM 10.0.0 release including clang, lld and lldb.
ok hackroom
tested by plenty
Diffstat (limited to 'gnu/llvm/clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | gnu/llvm/clang/lib/CodeGen/CGStmtOpenMP.cpp | 5510 |
1 files changed, 5510 insertions, 0 deletions
diff --git a/gnu/llvm/clang/lib/CodeGen/CGStmtOpenMP.cpp b/gnu/llvm/clang/lib/CodeGen/CGStmtOpenMP.cpp new file mode 100644 index 00000000000..dc3899f0e4e --- /dev/null +++ b/gnu/llvm/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -0,0 +1,5510 @@ +//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit OpenMP nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CGCleanup.h" +#include "CGOpenMPRuntime.h" +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "TargetInfo.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclOpenMP.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/PrettyStackTrace.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +using namespace clang; +using namespace CodeGen; +using namespace llvm::omp; + +namespace { +/// Lexical scope for OpenMP executable constructs, that handles correct codegen +/// for captured expressions. +class OMPLexicalScope : public CodeGenFunction::LexicalScope { + void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + for (const auto *C : S.clauses()) { + if (const auto *CPI = OMPClauseWithPreInit::get(C)) { + if (const auto *PreInit = + cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + } + } + } + CodeGenFunction::OMPPrivateScope InlinedShareds; + + static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { + return CGF.LambdaCaptureFields.lookup(VD) || + (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || + (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); + } + +public: + OMPLexicalScope( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, + const bool EmitPreInitStmt = true) + : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), + InlinedShareds(CGF) { + if (EmitPreInitStmt) + emitPreInitStmt(CGF, S); + if (!CapturedRegion.hasValue()) + return; + assert(S.hasAssociatedStmt() && + "Expected associated statement for inlined directive."); + const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); + for (const auto &C : CS->captures()) { + if (C.capturesVariable() || C.capturesVariableByCopy()) { + auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); + DeclRefExpr DRE( + CGF.getContext(), const_cast<VarDecl *>(VD), + isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && + InlinedShareds.isGlobalVarCaptured(VD)), + VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); + InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(CGF); + }); + } + } + (void)InlinedShareds.Privatize(); + } +}; + +/// Lexical scope for OpenMP parallel construct, that handles correct codegen +/// for captured expressions. +class OMPParallelScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !(isOpenMPTargetExecutionDirective(Kind) || + isOpenMPLoopBoundSharingDirective(Kind)) && + isOpenMPParallelDirective(Kind); + } + +public: + OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + EmitPreInitStmt(S)) {} +}; + +/// Lexical scope for OpenMP teams construct, that handles correct codegen +/// for captured expressions. +class OMPTeamsScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !isOpenMPTargetExecutionDirective(Kind) && + isOpenMPTeamsDirective(Kind); + } + +public: + OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, + EmitPreInitStmt(S)) {} +}; + +/// Private scope for OpenMP loop-based directives, that supports capturing +/// of used expression from loop statement. +class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { + void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { + CodeGenFunction::OMPMapVars PreCondVars; + llvm::DenseSet<const VarDecl *> EmittedAsPrivate; + for (const auto *E : S.counters()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + EmittedAsPrivate.insert(VD->getCanonicalDecl()); + (void)PreCondVars.setVarAddr( + CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); + } + // Mark private vars as undefs. + for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { + for (const Expr *IRef : C->varlists()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + (void)PreCondVars.setVarAddr( + CGF, OrigVD, + Address(llvm::UndefValue::get( + CGF.ConvertTypeForMem(CGF.getContext().getPointerType( + OrigVD->getType().getNonReferenceType()))), + CGF.getContext().getDeclAlign(OrigVD))); + } + } + } + (void)PreCondVars.apply(CGF); + // Emit init, __range and __end variables for C++ range loops. + const Stmt *Body = + S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { + Body = OMPLoopDirective::tryToFindNextInnerLoop( + Body, /*TryImperfectlyNestedLoops=*/true); + if (auto *For = dyn_cast<ForStmt>(Body)) { + Body = For->getBody(); + } else { + assert(isa<CXXForRangeStmt>(Body) && + "Expected canonical for loop or range-based for loop."); + auto *CXXFor = cast<CXXForRangeStmt>(Body); + if (const Stmt *Init = CXXFor->getInit()) + CGF.EmitStmt(Init); + CGF.EmitStmt(CXXFor->getRangeStmt()); + CGF.EmitStmt(CXXFor->getEndStmt()); + Body = CXXFor->getBody(); + } + } + if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { + for (const auto *I : PreInits->decls()) + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } + PreCondVars.restore(CGF); + } + +public: + OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) + : CodeGenFunction::RunCleanupsScope(CGF) { + emitPreInitStmt(CGF, S); + } +}; + +class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { + CodeGenFunction::OMPPrivateScope InlinedShareds; + + static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { + return CGF.LambdaCaptureFields.lookup(VD) || + (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || + (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && + cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); + } + +public: + OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), + InlinedShareds(CGF) { + for (const auto *C : S.clauses()) { + if (const auto *CPI = OMPClauseWithPreInit::get(C)) { + if (const auto *PreInit = + cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { + for (const Expr *E : UDP->varlists()) { + const Decl *D = cast<DeclRefExpr>(E)->getDecl(); + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) + CGF.EmitVarDecl(*OED); + } + } + } + if (!isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPPrivateClause(S, InlinedShareds); + if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { + if (const Expr *E = TG->getReductionRef()) + CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); + } + const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); + while (CS) { + for (auto &C : CS->captures()) { + if (C.capturesVariable() || C.capturesVariableByCopy()) { + auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), + isCapturedVar(CGF, VD) || + (CGF.CapturedStmtInfo && + InlinedShareds.isGlobalVarCaptured(VD)), + VD->getType().getNonReferenceType(), VK_LValue, + C.getLocation()); + InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(CGF); + }); + } + } + CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); + } + (void)InlinedShareds.Privatize(); + } +}; + +} // namespace + +static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen); + +LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { + if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { + if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { + OrigVD = OrigVD->getCanonicalDecl(); + bool IsCaptured = + LambdaCaptureFields.lookup(OrigVD) || + (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || + (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, + OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); + return EmitLValue(&DRE); + } + } + return EmitLValue(E); +} + +llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { + ASTContext &C = getContext(); + llvm::Value *Size = nullptr; + auto SizeInChars = C.getTypeSizeInChars(Ty); + if (SizeInChars.isZero()) { + // getTypeSizeInChars() returns 0 for a VLA. + while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { + VlaSizePair VlaSize = getVLASize(VAT); + Ty = VlaSize.Type; + Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) + : VlaSize.NumElts; + } + SizeInChars = C.getTypeSizeInChars(Ty); + if (SizeInChars.isZero()) + return llvm::ConstantInt::get(SizeTy, /*V=*/0); + return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); + } + return CGM.getSize(SizeInChars); +} + +void CodeGenFunction::GenerateOpenMPCapturedVars( + const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + auto CurField = RD->field_begin(); + auto CurCap = S.captures().begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField, ++CurCap) { + if (CurField->hasCapturedVLAType()) { + const VariableArrayType *VAT = CurField->getCapturedVLAType(); + llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; + CapturedVars.push_back(Val); + } else if (CurCap->capturesThis()) { + CapturedVars.push_back(CXXThisValue); + } else if (CurCap->capturesVariableByCopy()) { + llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); + + // If the field is not a pointer, we need to save the actual value + // and load it as a void pointer. + if (!CurField->getType()->isAnyPointerType()) { + ASTContext &Ctx = getContext(); + Address DstAddr = CreateMemTemp( + Ctx.getUIntPtrType(), + Twine(CurCap->getCapturedVar()->getName(), ".casted")); + LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); + + llvm::Value *SrcAddrVal = EmitScalarConversion( + DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), + Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); + LValue SrcLV = + MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); + + // Store the value using the source type pointer. + EmitStoreThroughLValue(RValue::get(CV), SrcLV); + + // Load the value using the destination type pointer. + CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); + } + CapturedVars.push_back(CV); + } else { + assert(CurCap->capturesVariable() && "Expected capture by reference."); + CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); + } + } +} + +static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, + QualType DstType, StringRef Name, + LValue AddrLV) { + ASTContext &Ctx = CGF.getContext(); + + llvm::Value *CastedPtr = CGF.EmitScalarConversion( + AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), + Ctx.getPointerType(DstType), Loc); + Address TmpAddr = + CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) + .getAddress(CGF); + return TmpAddr; +} + +static QualType getCanonicalParamType(ASTContext &C, QualType T) { + if (T->isLValueReferenceType()) + return C.getLValueReferenceType( + getCanonicalParamType(C, T.getNonReferenceType()), + /*SpelledAsLValue=*/false); + if (T->isPointerType()) + return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); + if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { + if (const auto *VLA = dyn_cast<VariableArrayType>(A)) + return getCanonicalParamType(C, VLA->getElementType()); + if (!A->isVariablyModifiedType()) + return C.getCanonicalType(T); + } + return C.getCanonicalParamType(T); +} + +namespace { + /// Contains required data for proper outlined function codegen. + struct FunctionOptions { + /// Captured statement for which the function is generated. + const CapturedStmt *S = nullptr; + /// true if cast to/from UIntPtr is required for variables captured by + /// value. + const bool UIntPtrCastRequired = true; + /// true if only casted arguments must be registered as local args or VLA + /// sizes. + const bool RegisterCastedArgsOnly = false; + /// Name of the generated function. + const StringRef FunctionName; + explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, + bool RegisterCastedArgsOnly, + StringRef FunctionName) + : S(S), UIntPtrCastRequired(UIntPtrCastRequired), + RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), + FunctionName(FunctionName) {} + }; +} + +static llvm::Function *emitOutlinedFunctionPrologue( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> + &LocalAddrs, + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> + &VLASizes, + llvm::Value *&CXXThisValue, const FunctionOptions &FO) { + const CapturedDecl *CD = FO.S->getCapturedDecl(); + const RecordDecl *RD = FO.S->getCapturedRecordDecl(); + assert(CD->hasBody() && "missing CapturedDecl body"); + + CXXThisValue = nullptr; + // Build the argument list. + CodeGenModule &CGM = CGF.CGM; + ASTContext &Ctx = CGM.getContext(); + FunctionArgList TargetArgs; + Args.append(CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); + TargetArgs.append( + CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); + auto I = FO.S->captures().begin(); + FunctionDecl *DebugFunctionDecl = nullptr; + if (!FO.UIntPtrCastRequired) { + FunctionProtoType::ExtProtoInfo EPI; + QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); + DebugFunctionDecl = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), + SourceLocation(), DeclarationName(), FunctionTy, + Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, + /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); + } + for (const FieldDecl *FD : RD->fields()) { + QualType ArgType = FD->getType(); + IdentifierInfo *II = nullptr; + VarDecl *CapVar = nullptr; + + // If this is a capture by copy and the type is not a pointer, the outlined + // function argument type should be uintptr and the value properly casted to + // uintptr. This is necessary given that the runtime library is only able to + // deal with pointers. We can pass in the same way the VLA type sizes to the + // outlined function. + if (FO.UIntPtrCastRequired && + ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || + I->capturesVariableArrayType())) + ArgType = Ctx.getUIntPtrType(); + + if (I->capturesVariable() || I->capturesVariableByCopy()) { + CapVar = I->getCapturedVar(); + II = CapVar->getIdentifier(); + } else if (I->capturesThis()) { + II = &Ctx.Idents.get("this"); + } else { + assert(I->capturesVariableArrayType()); + II = &Ctx.Idents.get("vla"); + } + if (ArgType->isVariablyModifiedType()) + ArgType = getCanonicalParamType(Ctx, ArgType); + VarDecl *Arg; + if (DebugFunctionDecl && (CapVar || I->capturesThis())) { + Arg = ParmVarDecl::Create( + Ctx, DebugFunctionDecl, + CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), + CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, + /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); + } else { + Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), + II, ArgType, ImplicitParamDecl::Other); + } + Args.emplace_back(Arg); + // Do not cast arguments if we emit function with non-original types. + TargetArgs.emplace_back( + FO.UIntPtrCastRequired + ? Arg + : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); + ++I; + } + Args.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); + TargetArgs.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); + + // Create the function declaration. + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + auto *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FO.FunctionName, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->setDoesNotThrow(); + F->setDoesNotRecurse(); + + // Generate the function. + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, + FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); + unsigned Cnt = CD->getContextParamPosition(); + I = FO.S->captures().begin(); + for (const FieldDecl *FD : RD->fields()) { + // Do not map arguments if we emit function with non-original types. + Address LocalAddr(Address::invalid()); + if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { + LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], + TargetArgs[Cnt]); + } else { + LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); + } + // If we are capturing a pointer by copy we don't need to do anything, just + // use the value that we get from the arguments. + if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { + const VarDecl *CurVD = I->getCapturedVar(); + if (!FO.RegisterCastedArgsOnly) + LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); + ++Cnt; + ++I; + continue; + } + + LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), + AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + if (FO.UIntPtrCastRequired) { + ArgLVal = CGF.MakeAddrLValue( + castValueFromUintptr(CGF, I->getLocation(), FD->getType(), + Args[Cnt]->getName(), ArgLVal), + FD->getType(), AlignmentSource::Decl); + } + llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + const VariableArrayType *VAT = FD->getCapturedVLAType(); + VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); + } else if (I->capturesVariable()) { + const VarDecl *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(CGF); + if (ArgLVal.getType()->isLValueReferenceType()) { + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); + } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + assert(ArgLVal.getType()->isPointerType()); + ArgAddr = CGF.EmitLoadOfPointer( + ArgAddr, ArgLVal.getType()->castAs<PointerType>()); + } + if (!FO.RegisterCastedArgsOnly) { + LocalAddrs.insert( + {Args[Cnt], + {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); + } + } else if (I->capturesVariableByCopy()) { + assert(!FD->getType()->isAnyPointerType() && + "Not expecting a captured pointer."); + const VarDecl *Var = I->getCapturedVar(); + LocalAddrs.insert({Args[Cnt], + {Var, FO.UIntPtrCastRequired + ? castValueFromUintptr( + CGF, I->getLocation(), FD->getType(), + Args[Cnt]->getName(), ArgLVal) + : ArgLVal.getAddress(CGF)}}); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); + } + ++Cnt; + ++I; + } + + return F; +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + bool NeedWrapperFunction = + getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); + FunctionArgList Args; + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << CapturedStmtInfo->getHelperName(); + if (NeedWrapperFunction) + Out << "_debug__"; + FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, + Out.str()); + llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, + VLASizes, CXXThisValue, FO); + CodeGenFunction::OMPPrivateScope LocalScope(*this); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { + return LocalAddrPair.second.second; + }); + } + } + (void)LocalScope.Privatize(); + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; + PGO.assignRegionCounters(GlobalDecl(CD), F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + (void)LocalScope.ForceCleanup(); + FinishFunction(CD->getBodyRBrace()); + if (!NeedWrapperFunction) + return F; + + FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, + /*RegisterCastedArgsOnly=*/true, + CapturedStmtInfo->getHelperName()); + CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); + WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; + Args.clear(); + LocalAddrs.clear(); + VLASizes.clear(); + llvm::Function *WrapperF = + emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, + WrapperCGF.CXXThisValue, WrapperFO); + llvm::SmallVector<llvm::Value *, 4> CallArgs; + for (const auto *Arg : Args) { + llvm::Value *CallArg; + auto I = LocalAddrs.find(Arg); + if (I != LocalAddrs.end()) { + LValue LV = WrapperCGF.MakeAddrLValue( + I->second.second, + I->second.first ? I->second.first->getType() : Arg->getType(), + AlignmentSource::Decl); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); + } else { + auto EI = VLASizes.find(Arg); + if (EI != VLASizes.end()) { + CallArg = EI->second.second; + } else { + LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), + Arg->getType(), + AlignmentSource::Decl); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); + } + } + CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); + } + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), + F, CallArgs); + WrapperCGF.FinishFunction(); + return WrapperF; +} + +//===----------------------------------------------------------------------===// +// OpenMP Directive Emission +//===----------------------------------------------------------------------===// +void CodeGenFunction::EmitOMPAggregateAssign( + Address DestAddr, Address SrcAddr, QualType OriginalType, + const llvm::function_ref<void(Address, Address)> CopyGen) { + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); + llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); + SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + + llvm::Value *SrcBegin = SrcAddr.getPointer(); + llvm::Value *DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); + llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); + llvm::Value *IsEmpty = + Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); + Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); + EmitBlock(BodyBB); + + CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *SrcElementPHI = + Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + Address SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + llvm::PHINode *DestElementPHI = + Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + CopyGen(DestElementCurrent, SrcElementCurrent); + + // Shift the address forward by one element. + llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); + // Check whether we've reached the end. + llvm::Value *Done = + Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); + SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); + + // Done. + EmitBlock(DoneBB, /*IsFinished=*/true); +} + +void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, + Address SrcAddr, const VarDecl *DestVD, + const VarDecl *SrcVD, const Expr *Copy) { + if (OriginalType->isArrayType()) { + const auto *BO = dyn_cast<BinaryOperator>(Copy); + if (BO && BO->getOpcode() == BO_Assign) { + // Perform simple memcpy for simple copying. + LValue Dest = MakeAddrLValue(DestAddr, OriginalType); + LValue Src = MakeAddrLValue(SrcAddr, OriginalType); + EmitAggregateAssign(Dest, Src, OriginalType); + } else { + // For arrays with complex element types perform element by element + // copying. + EmitOMPAggregateAssign( + DestAddr, SrcAddr, OriginalType, + [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { + // Working with the single array element, so have to remap + // destination and source variables to corresponding array + // elements. + CodeGenFunction::OMPPrivateScope Remap(*this); + Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); + Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); + (void)Remap.Privatize(); + EmitIgnoredExpr(Copy); + }); + } + } else { + // Remap pseudo source variable to private copy. + CodeGenFunction::OMPPrivateScope Remap(*this); + Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); + Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); + (void)Remap.Privatize(); + // Emit copying of the whole variable. + EmitIgnoredExpr(Copy); + } +} + +bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, + OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return false; + bool DeviceConstTarget = + getLangOpts().OpenMPIsDevice && + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); + bool FirstprivateIsLastprivate = false; + llvm::DenseSet<const VarDecl *> Lastprivates; + for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { + for (const auto *D : C->varlists()) + Lastprivates.insert( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + } + llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; + llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); + // Force emission of the firstprivate copy if the directive does not emit + // outlined function, like omp for, omp simd, omp distribute etc. + bool MustEmitFirstprivateCopy = + CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; + for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { + auto IRef = C->varlist_begin(); + auto InitsRef = C->inits().begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + bool ThisFirstprivateIsLastprivate = + Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; + const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); + if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && + !FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { + EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); + ++IRef; + ++InitsRef; + continue; + } + // Do not emit copy for firstprivate constant variables in target regions, + // captured by reference. + if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && + FD && FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { + (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, + OrigVD); + ++IRef; + ++InitsRef; + continue; + } + FirstprivateIsLastprivate = + FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; + if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { + const auto *VDInit = + cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); + bool IsRegistered; + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + LValue OriginalLVal; + if (!FD) { + // Check if the firstprivate variable is just a constant value. + ConstantEmission CE = tryEmitAsConstant(&DRE); + if (CE && !CE.isReference()) { + // Constant value, no need to create a copy. + ++IRef; + ++InitsRef; + continue; + } + if (CE && CE.isReference()) { + OriginalLVal = CE.getReferenceLValue(*this, &DRE); + } else { + assert(!CE && "Expected non-constant firstprivate."); + OriginalLVal = EmitLValue(&DRE); + } + } else { + OriginalLVal = EmitLValue(&DRE); + } + QualType Type = VD->getType(); + if (Type->isArrayType()) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { + AutoVarEmission Emission = EmitAutoVarAlloca(*VD); + const Expr *Init = VD->getInit(); + if (!isa<CXXConstructExpr>(Init) || + isTrivialInitializer(Init)) { + // Perform simple memcpy. + LValue Dest = + MakeAddrLValue(Emission.getAllocatedAddress(), Type); + EmitAggregateAssign(Dest, OriginalLVal, Type); + } else { + EmitOMPAggregateAssign( + Emission.getAllocatedAddress(), + OriginalLVal.getAddress(*this), Type, + [this, VDInit, Init](Address DestElement, + Address SrcElement) { + // Clean up any temporaries needed by the + // initialization. + RunCleanupsScope InitScope(*this); + // Emit initialization for single element. + setAddrOfLocalVar(VDInit, SrcElement); + EmitAnyExprToMem(Init, DestElement, + Init->getType().getQualifiers(), + /*IsInitializer*/ false); + LocalDeclMap.erase(VDInit); + }); + } + EmitAutoVarCleanups(Emission); + return Emission.getAllocatedAddress(); + }); + } else { + Address OriginalAddr = OriginalLVal.getAddress(*this); + IsRegistered = PrivateScope.addPrivate( + OrigVD, [this, VDInit, OriginalAddr, VD]() { + // Emit private VarDecl with copy init. + // Remap temp VDInit variable to the address of the original + // variable (for proper handling of captured global variables). + setAddrOfLocalVar(VDInit, OriginalAddr); + EmitDecl(*VD); + LocalDeclMap.erase(VDInit); + return GetAddrOfLocalVar(VD); + }); + } + assert(IsRegistered && + "firstprivate var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } + ++IRef; + ++InitsRef; + } + } + return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); +} + +void CodeGenFunction::EmitOMPPrivateClause( + const OMPExecutableDirective &D, + CodeGenFunction::OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return; + llvm::DenseSet<const VarDecl *> EmittedAsPrivate; + for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { + auto IRef = C->varlist_begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } + ++IRef; + } + } +} + +bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { + if (!HaveInsertPoint()) + return false; + // threadprivate_var1 = master_threadprivate_var1; + // operator=(threadprivate_var2, master_threadprivate_var2); + // ... + // __kmpc_barrier(&loc, global_tid); + llvm::DenseSet<const VarDecl *> CopiedVars; + llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; + for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { + auto IRef = C->varlist_begin(); + auto ISrcRef = C->source_exprs().begin(); + auto IDestRef = C->destination_exprs().begin(); + for (const Expr *AssignOp : C->assignment_ops()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + QualType Type = VD->getType(); + if (CopiedVars.insert(VD->getCanonicalDecl()).second) { + // Get the address of the master variable. If we are emitting code with + // TLS support, the address is passed from the master as field in the + // captured declaration. + Address MasterAddr = Address::invalid(); + if (getLangOpts().OpenMPUseTLS && + getContext().getTargetInfo().isTLSSupported()) { + assert(CapturedStmtInfo->lookup(VD) && + "Copyin threadprivates should have been captured!"); + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + MasterAddr = EmitLValue(&DRE).getAddress(*this); + LocalDeclMap.erase(VD); + } else { + MasterAddr = + Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) + : CGM.GetAddrOfGlobal(VD), + getContext().getDeclAlign(VD)); + } + // Get the address of the threadprivate variable. + Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); + if (CopiedVars.size() == 1) { + // At first check if current thread is a master thread. If it is, no + // need to copy data. + CopyBegin = createBasicBlock("copyin.not.master"); + CopyEnd = createBasicBlock("copyin.not.master.end"); + Builder.CreateCondBr( + Builder.CreateICmpNE( + Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), + Builder.CreatePtrToInt(PrivateAddr.getPointer(), + CGM.IntPtrTy)), + CopyBegin, CopyEnd); + EmitBlock(CopyBegin); + } + const auto *SrcVD = + cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); + const auto *DestVD = + cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); + EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); + } + ++IRef; + ++ISrcRef; + ++IDestRef; + } + } + if (CopyEnd) { + // Exit out of copying procedure for non-master thread. + EmitBlock(CopyEnd, /*IsFinished=*/true); + return true; + } + return false; +} + +bool CodeGenFunction::EmitOMPLastprivateClauseInit( + const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return false; + bool HasAtLeastOneLastprivate = false; + llvm::DenseSet<const VarDecl *> SIMDLCVs; + if (isOpenMPSimdDirective(D.getDirectiveKind())) { + const auto *LoopDirective = cast<OMPLoopDirective>(&D); + for (const Expr *C : LoopDirective->counters()) { + SIMDLCVs.insert( + cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); + } + } + llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; + for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { + HasAtLeastOneLastprivate = true; + if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && + !getLangOpts().OpenMPSimd) + break; + auto IRef = C->varlist_begin(); + auto IDestRef = C->destination_exprs().begin(); + for (const Expr *IInit : C->private_copies()) { + // Keep the address of the original variable for future update at the end + // of the loop. + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + // Taskloops do not require additional initialization, it is done in + // runtime support library. + if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { + const auto *DestVD = + cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); + PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/ + CapturedStmtInfo->lookup(OrigVD) != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + return EmitLValue(&DRE).getAddress(*this); + }); + // Check if the variable is also a firstprivate: in this case IInit is + // not generated. Initialization of this variable will happen in codegen + // for 'firstprivate' clause. + if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); + assert(IsRegistered && + "lastprivate var already registered as private"); + (void)IsRegistered; + } + } + ++IRef; + ++IDestRef; + } + } + return HasAtLeastOneLastprivate; +} + +void CodeGenFunction::EmitOMPLastprivateClauseFinal( + const OMPExecutableDirective &D, bool NoFinals, + llvm::Value *IsLastIterCond) { + if (!HaveInsertPoint()) + return; + // Emit following code: + // if (<IsLastIterCond>) { + // orig_var1 = private_orig_var1; + // ... + // orig_varn = private_orig_varn; + // } + llvm::BasicBlock *ThenBB = nullptr; + llvm::BasicBlock *DoneBB = nullptr; + if (IsLastIterCond) { + // Emit implicit barrier if at least one lastprivate conditional is found + // and this is not a simd mode. + if (!getLangOpts().OpenMPSimd && + llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == OMPC_LASTPRIVATE_conditional; + })) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), + OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + ThenBB = createBasicBlock(".omp.lastprivate.then"); + DoneBB = createBasicBlock(".omp.lastprivate.done"); + Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); + EmitBlock(ThenBB); + } + llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; + llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; + if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { + auto IC = LoopDirective->counters().begin(); + for (const Expr *F : LoopDirective->finals()) { + const auto *D = + cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); + if (NoFinals) + AlreadyEmittedVars.insert(D); + else + LoopCountersAndUpdates[D] = F; + ++IC; + } + } + for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { + auto IRef = C->varlist_begin(); + auto ISrcRef = C->source_exprs().begin(); + auto IDestRef = C->destination_exprs().begin(); + for (const Expr *AssignOp : C->assignment_ops()) { + const auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + QualType Type = PrivateVD->getType(); + const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); + if (AlreadyEmittedVars.insert(CanonicalVD).second) { + // If lastprivate variable is a loop control variable for loop-based + // directive, update its value before copyin back to original + // variable. + if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) + EmitIgnoredExpr(FinalExpr); + const auto *SrcVD = + cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); + const auto *DestVD = + cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); + // Get the address of the private variable. + Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); + if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) + PrivateAddr = + Address(Builder.CreateLoad(PrivateAddr), + getNaturalTypeAlignment(RefTy->getPointeeType())); + // Store the last value to the private copy in the last iteration. + if (C->getKind() == OMPC_LASTPRIVATE_conditional) + CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( + *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, + (*IRef)->getExprLoc()); + // Get the address of the original variable. + Address OriginalAddr = GetAddrOfLocalVar(DestVD); + EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); + } + ++IRef; + ++ISrcRef; + ++IDestRef; + } + if (const Expr *PostUpdate = C->getPostUpdateExpr()) + EmitIgnoredExpr(PostUpdate); + } + if (IsLastIterCond) + EmitBlock(DoneBB, /*IsFinished=*/true); +} + +void CodeGenFunction::EmitOMPReductionClauseInit( + const OMPExecutableDirective &D, + CodeGenFunction::OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return; + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const Expr *Ref : C->varlists()) { + Shareds.emplace_back(Ref); + Privates.emplace_back(*IPriv); + ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto ILHS = LHSs.begin(); + auto IRHS = RHSs.begin(); + auto IPriv = Privates.begin(); + for (const Expr *IRef : Shareds) { + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + // Emit private VarDecl with reduction init. + RedCG.emitSharedLValue(*this, Count); + RedCG.emitAggregateType(*this, Count); + AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); + RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), + RedCG.getSharedLValue(Count), + [&Emission](CodeGenFunction &CGF) { + CGF.EmitAutoVarInit(Emission); + return true; + }); + EmitAutoVarCleanups(Emission); + Address BaseAddr = RedCG.adjustPrivateAddress( + *this, Count, Emission.getAllocatedAddress()); + bool IsRegistered = PrivateScope.addPrivate( + RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + QualType Type = PrivateVD->getType(); + bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); + if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { + return RedCG.getSharedLValue(Count).getAddress(*this); + }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); + } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || + isa<ArraySubscriptExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count, this]() { + return RedCG.getSharedLValue(Count).getAddress(*this); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { + return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); + }); + } else { + QualType Type = PrivateVD->getType(); + bool IsArray = getContext().getAsArrayType(Type) != nullptr; + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); + // Store the address of the original variable associated with the LHS + // implicit variable. + if (IsArray) { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); + } + PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD, RHSVD, IsArray]() { + return IsArray + ? Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + : GetAddrOfLocalVar(PrivateVD); + }); + } + ++ILHS; + ++IRHS; + ++IPriv; + ++Count; + } +} + +void CodeGenFunction::EmitOMPReductionClauseFinal( + const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { + if (!HaveInsertPoint()) + return; + llvm::SmallVector<const Expr *, 8> Privates; + llvm::SmallVector<const Expr *, 8> LHSExprs; + llvm::SmallVector<const Expr *, 8> RHSExprs; + llvm::SmallVector<const Expr *, 8> ReductionOps; + bool HasAtLeastOneReduction = false; + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + HasAtLeastOneReduction = true; + Privates.append(C->privates().begin(), C->privates().end()); + LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + } + if (HasAtLeastOneReduction) { + bool WithNowait = D.getSingleClause<OMPNowaitClause>() || + isOpenMPParallelDirective(D.getDirectiveKind()) || + ReductionKind == OMPD_simd; + bool SimpleReduction = ReductionKind == OMPD_simd; + // Emit nowait reduction if nowait clause is present or directive is a + // parallel directive (it always has implicit barrier). + CGM.getOpenMPRuntime().emitReduction( + *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, + {WithNowait, SimpleReduction, ReductionKind}); + } +} + +static void emitPostUpdateForReductionClause( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { + if (!CGF.HaveInsertPoint()) + return; + llvm::BasicBlock *DoneBB = nullptr; + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + if (const Expr *PostUpdate = C->getPostUpdateExpr()) { + if (!DoneBB) { + if (llvm::Value *Cond = CondGen(CGF)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); + DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); + CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); + CGF.EmitBlock(ThenBB); + } + } + CGF.EmitIgnoredExpr(PostUpdate); + } + } + if (DoneBB) + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + +namespace { +/// Codegen lambda for appending distribute lower and upper bounds to outlined +/// parallel function. This is necessary for combined constructs such as +/// 'distribute parallel for' +typedef llvm::function_ref<void(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &)> + CodeGenBoundParametersTy; +} // anonymous namespace + +static void emitCommonOMPParallelDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const CodeGenBoundParametersTy &CodeGenBoundParameters) { + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + llvm::Function *OutlinedFn = + CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { + CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + llvm::Value *NumThreads = + CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign=*/true); + CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( + CGF, NumThreads, NumThreadsClause->getBeginLoc()); + } + if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { + CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); + CGF.CGM.getOpenMPRuntime().emitProcBindClause( + CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); + } + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfCond = C->getCondition(); + break; + } + } + + OMPParallelScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk + // lower and upper bounds with the pragma 'for' chunking mechanism. + // The following lambda takes care of appending the lower and upper bound + // parameters when necessary + CodeGenBoundParameters(CGF, S, CapturedVars); + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, + CapturedVars, IfCond); +} + +static void emitEmptyBoundParameters(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &) {} + +void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { + + if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + // Check if we have any if clause associated with the directive. + llvm::Value *IfCond = nullptr; + if (const auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = EmitScalarExpr(C->getCondition(), + /*IgnoreResultAssign=*/true); + + llvm::Value *NumThreads = nullptr; + if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) + NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign=*/true); + + ProcBindKind ProcBind = OMP_PROC_BIND_default; + if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) + ProcBind = ProcBindClause->getProcBindKind(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + // The cleanup callback that finalizes all variabels at the given location, + // thus calls destructors etc. + auto FiniCB = [this](InsertPointTy IP) { + CGBuilderTy::InsertPointGuard IPG(Builder); + assert(IP.getBlock()->end() != IP.getPoint() && + "OpenMP IR Builder should cause terminated block!"); + llvm::BasicBlock *IPBB = IP.getBlock(); + llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); + IPBB->getTerminator()->eraseFromParent(); + Builder.SetInsertPoint(IPBB); + CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); + EmitBranchThroughCleanup(Dest); + }; + + // Privatization callback that performs appropriate action for + // shared/private/firstprivate/lastprivate/copyin/... variables. + // + // TODO: This defaults to shared right now. + auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::Value &Val, llvm::Value *&ReplVal) { + // The next line is appropriate only for variables (Val) with the + // data-sharing attribute "shared". + ReplVal = &Val; + + return CodeGenIP; + }; + + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); + + auto BodyGenCB = [ParallelRegionBodyStmt, + this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::BasicBlock &ContinuationBB) { + auto OldAllocaIP = AllocaInsertPt; + AllocaInsertPt = &*AllocaIP.getPoint(); + + auto OldReturnBlock = ReturnBlock; + ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); + + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); + llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); + CodeGenIPBBTI->removeFromParent(); + + Builder.SetInsertPoint(CodeGenIPBB); + + EmitStmt(ParallelRegionBodyStmt); + + Builder.Insert(CodeGenIPBBTI); + + AllocaInsertPt = OldAllocaIP; + ReturnBlock = OldReturnBlock; + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, + FiniCB, IfCond, NumThreads, + ProcBind, S.hasCancel())); + return; + } + + // Emit parallel region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + bool Copyins = CGF.EmitOMPCopyinClause(S); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + if (Copyins) { + // Emit implicit barrier to synchronize threads and avoid data races on + // propagation master's thread values of threadprivate variables to local + // instances of that variables of all other implicit threads. + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, + int MaxLevel, int Level = 0) { + assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); + const Stmt *SimplifiedS = S->IgnoreContainers(); + if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { + PrettyStackTraceLoc CrashInfo( + CGF.getContext().getSourceManager(), CS->getLBracLoc(), + "LLVM IR generation of compound statement ('{}')"); + + // Keep track of the current cleanup stack depth, including debug scopes. + CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); + for (const Stmt *CurStmt : CS->body()) + emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); + return; + } + if (SimplifiedS == NextLoop) { + if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { + S = For->getBody(); + } else { + assert(isa<CXXForRangeStmt>(SimplifiedS) && + "Expected canonical for loop or range-based for loop."); + const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); + CGF.EmitStmt(CXXFor->getLoopVarStmt()); + S = CXXFor->getBody(); + } + if (Level + 1 < MaxLevel) { + NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( + S, /*TryImperfectlyNestedLoops=*/true); + emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); + return; + } + } + CGF.EmitStmt(S); +} + +void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, + JumpDest LoopExit) { + RunCleanupsScope BodyScope(*this); + // Update counters values on current iteration. + for (const Expr *UE : D.updates()) + EmitIgnoredExpr(UE); + // Update the linear variables. + // In distribute directives only loop counters may be marked as linear, no + // need to generate the code for them. + if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { + for (const Expr *UE : C->updates()) + EmitIgnoredExpr(UE); + } + } + + // On a continue in the body, jump to the end. + JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + for (const Expr *E : D.finals_conditions()) { + if (!E) + continue; + // Check that loop counter in non-rectangular nest fits into the iteration + // space. + llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); + EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), + getProfileCount(D.getBody())); + EmitBlock(NextBB); + } + // Emit loop variables for C++ range loops. + const Stmt *Body = + D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + // Emit loop body. + emitBody(*this, Body, + OMPLoopDirective::tryToFindNextInnerLoop( + Body, /*TryImperfectlyNestedLoops=*/true), + D.getCollapsedNumber()); + + // The end (updates/cleanups). + EmitBlock(Continue.getBlock()); + BreakContinueStack.pop_back(); +} + +void CodeGenFunction::EmitOMPInnerLoop( + const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const Expr *IncExpr, + const llvm::function_ref<void(CodeGenFunction &)> BodyGen, + const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { + auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); + + // Start the loop with a block that tests the condition. + auto CondBlock = createBasicBlock("omp.inner.for.cond"); + EmitBlock(CondBlock); + const SourceRange R = S.getSourceRange(); + LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); + + // If there are any cleanups between here and the loop-exit scope, + // create a block to stage a loop exit along. + llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); + if (RequiresCleanup) + ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); + + llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); + + // Emit condition. + EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); + if (ExitBlock != LoopExit.getBlock()) { + EmitBlock(ExitBlock); + EmitBranchThroughCleanup(LoopExit); + } + + EmitBlock(LoopBody); + incrementProfileCounter(&S); + + // Create a block for the increment. + JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + + BodyGen(*this); + + // Emit "IV = IV + 1" and a back-edge to the condition block. + EmitBlock(Continue.getBlock()); + EmitIgnoredExpr(IncExpr); + PostIncGen(*this); + BreakContinueStack.pop_back(); + EmitBranch(CondBlock); + LoopStack.pop(); + // Emit the fall-through block. + EmitBlock(LoopExit.getBlock()); +} + +bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { + if (!HaveInsertPoint()) + return false; + // Emit inits for the linear variables. + bool HasLinears = false; + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { + for (const Expr *Init : C->inits()) { + HasLinears = true; + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); + if (const auto *Ref = + dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { + AutoVarEmission Emission = EmitAutoVarAlloca(*VD); + const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + VD->getInit()->getType(), VK_LValue, + VD->getInit()->getExprLoc()); + EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), + VD->getType()), + /*capturedByInit=*/false); + EmitAutoVarCleanups(Emission); + } else { + EmitVarDecl(*VD); + } + } + // Emit the linear steps for the linear clauses. + // If a step is not constant, it is pre-calculated before the loop. + if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) + if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { + EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); + // Emit calculation of the linear step. + EmitIgnoredExpr(CS); + } + } + return HasLinears; +} + +void CodeGenFunction::EmitOMPLinearClauseFinal( + const OMPLoopDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { + if (!HaveInsertPoint()) + return; + llvm::BasicBlock *DoneBB = nullptr; + // Emit the final values of the linear variables. + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { + auto IC = C->varlist_begin(); + for (const Expr *F : C->finals()) { + if (!DoneBB) { + if (llvm::Value *Cond = CondGen(*this)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); + DoneBB = createBasicBlock(".omp.linear.pu.done"); + Builder.CreateCondBr(Cond, ThenBB, DoneBB); + EmitBlock(ThenBB); + } + } + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); + Address OrigAddr = EmitLValue(&DRE).getAddress(*this); + CodeGenFunction::OMPPrivateScope VarScope(*this); + VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); + (void)VarScope.Privatize(); + EmitIgnoredExpr(F); + ++IC; + } + if (const Expr *PostUpdate = C->getPostUpdateExpr()) + EmitIgnoredExpr(PostUpdate); + } + if (DoneBB) + EmitBlock(DoneBB, /*IsFinished=*/true); +} + +static void emitAlignedClause(CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + if (!CGF.HaveInsertPoint()) + return; + for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { + llvm::APInt ClauseAlignment(64, 0); + if (const Expr *AlignmentExpr = Clause->getAlignment()) { + auto *AlignmentCI = + cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); + ClauseAlignment = AlignmentCI->getValue(); + } + for (const Expr *E : Clause->varlists()) { + llvm::APInt Alignment(ClauseAlignment); + if (Alignment == 0) { + // OpenMP [2.8.1, Description] + // If no optional parameter is specified, implementation-defined default + // alignments for SIMD instructions on the target platforms are assumed. + Alignment = + CGF.getContext() + .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( + E->getType()->getPointeeType())) + .getQuantity(); + } + assert((Alignment == 0 || Alignment.isPowerOf2()) && + "alignment is not power of 2"); + if (Alignment != 0) { + llvm::Value *PtrValue = CGF.EmitScalarExpr(E); + CGF.EmitAlignmentAssumption( + PtrValue, E, /*No second loc needed*/ SourceLocation(), + llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); + } + } + } +} + +void CodeGenFunction::EmitOMPPrivateLoopCounters( + const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { + if (!HaveInsertPoint()) + return; + auto I = S.private_counters().begin(); + for (const Expr *E : S.counters()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); + // Emit var without initialization. + AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); + EmitAutoVarCleanups(VarEmission); + LocalDeclMap.erase(PrivateVD); + (void)LoopScope.addPrivate(VD, [&VarEmission]() { + return VarEmission.getAllocatedAddress(); + }); + if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || + VD->hasGlobalStorage()) { + (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), + LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), + E->getType(), VK_LValue, E->getExprLoc()); + return EmitLValue(&DRE).getAddress(*this); + }); + } else { + (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { + return VarEmission.getAllocatedAddress(); + }); + } + ++I; + } + // Privatize extra loop counters used in loops for ordered(n) clauses. + for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { + if (!C->getNumForLoops()) + continue; + for (unsigned I = S.getCollapsedNumber(), + E = C->getLoopNumIterations().size(); + I < E; ++I) { + const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); + const auto *VD = cast<VarDecl>(DRE->getDecl()); + // Override only those variables that can be captured to avoid re-emission + // of the variables declared within the loops. + if (DRE->refersToEnclosingVariableOrCapture()) { + (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { + return CreateMemTemp(DRE->getType(), VD->getName()); + }); + } + } + } +} + +static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, + const Expr *Cond, llvm::BasicBlock *TrueBlock, + llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { + if (!CGF.HaveInsertPoint()) + return; + { + CodeGenFunction::OMPPrivateScope PreCondScope(CGF); + CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); + (void)PreCondScope.Privatize(); + // Get initial values of real counters. + for (const Expr *I : S.inits()) { + CGF.EmitIgnoredExpr(I); + } + } + // Create temp loop control variables with their init values to support + // non-rectangular loops. + CodeGenFunction::OMPMapVars PreCondVars; + for (const Expr * E: S.dependent_counters()) { + if (!E) + continue; + assert(!E->getType().getNonReferenceType()->isRecordType() && + "dependent counter must not be an iterator."); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address CounterAddr = + CGF.CreateMemTemp(VD->getType().getNonReferenceType()); + (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); + } + (void)PreCondVars.apply(CGF); + for (const Expr *E : S.dependent_inits()) { + if (!E) + continue; + CGF.EmitIgnoredExpr(E); + } + // Check that loop is executed at least one time. + CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); + PreCondVars.restore(CGF); +} + +void CodeGenFunction::EmitOMPLinearClause( + const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { + if (!HaveInsertPoint()) + return; + llvm::DenseSet<const VarDecl *> SIMDLCVs; + if (isOpenMPSimdDirective(D.getDirectiveKind())) { + const auto *LoopDirective = cast<OMPLoopDirective>(&D); + for (const Expr *C : LoopDirective->counters()) { + SIMDLCVs.insert( + cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); + } + } + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { + auto CurPrivate = C->privates().begin(); + for (const Expr *E : C->varlists()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *PrivateVD = + cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); + if (!SIMDLCVs.count(VD->getCanonicalDecl())) { + bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { + // Emit private VarDecl with copy init. + EmitVarDecl(*PrivateVD); + return GetAddrOfLocalVar(PrivateVD); + }); + assert(IsRegistered && "linear var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + } else { + EmitVarDecl(*PrivateVD); + } + ++CurPrivate; + } + } +} + +static void emitSimdlenSafelenClause(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + bool IsMonotonic) { + if (!CGF.HaveInsertPoint()) + return; + if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { + RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), + /*ignoreResult=*/true); + auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); + CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); + // In presence of finite 'safelen', it may be unsafe to mark all + // the memory instructions parallel, because loop-carried + // dependences of 'safelen' iterations are possible. + if (!IsMonotonic) + CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); + } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { + RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), + /*ignoreResult=*/true); + auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); + CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); + // In presence of finite 'safelen', it may be unsafe to mark all + // the memory instructions parallel, because loop-carried + // dependences of 'safelen' iterations are possible. + CGF.LoopStack.setParallel(/*Enable=*/false); + } +} + +void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, + bool IsMonotonic) { + // Walk clauses and process safelen/lastprivate. + LoopStack.setParallel(!IsMonotonic); + LoopStack.setVectorizeEnable(); + emitSimdlenSafelenClause(*this, D, IsMonotonic); +} + +void CodeGenFunction::EmitOMPSimdFinal( + const OMPLoopDirective &D, + const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { + if (!HaveInsertPoint()) + return; + llvm::BasicBlock *DoneBB = nullptr; + auto IC = D.counters().begin(); + auto IPC = D.private_counters().begin(); + for (const Expr *F : D.finals()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); + const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); + if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || + OrigVD->hasGlobalStorage() || CED) { + if (!DoneBB) { + if (llvm::Value *Cond = CondGen(*this)) { + // If the first post-update expression is found, emit conditional + // block if it was requested. + llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); + DoneBB = createBasicBlock(".omp.final.done"); + Builder.CreateCondBr(Cond, ThenBB, DoneBB); + EmitBlock(ThenBB); + } + } + Address OrigAddr = Address::invalid(); + if (CED) { + OrigAddr = + EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); + } else { + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), + /*RefersToEnclosingVariableOrCapture=*/false, + (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); + OrigAddr = EmitLValue(&DRE).getAddress(*this); + } + OMPPrivateScope VarScope(*this); + VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); + (void)VarScope.Privatize(); + EmitIgnoredExpr(F); + } + ++IC; + ++IPC; + } + if (DoneBB) + EmitBlock(DoneBB, /*IsFinished=*/true); +} + +static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); +} + +/// Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast<VarDecl>(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + +static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, + const RegionCodeGenTy &SimdInitGen, + const RegionCodeGenTy &BodyCodeGen) { + auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, + PrePostActionTy &) { + CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + SimdInitGen(CGF); + + BodyCodeGen(CGF); + }; + auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); + + BodyCodeGen(CGF); + }; + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (CGF.getLangOpts().OpenMP >= 50 && + (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_simd)) { + IfCond = C->getCondition(); + break; + } + } + if (IfCond) { + CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); + } else { + RegionCodeGenTy ThenRCG(ThenGen); + ThenRCG(CGF); + } +} + +static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + assert(isOpenMPSimdDirective(S.getDirectiveKind()) && + "Expected simd directive"); + OMPLoopScope PreInitScope(CGF, S); + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // <Final counter/linear vars updates>; + // } + // + if (isOpenMPDistributeDirective(S.getDirectiveKind()) || + isOpenMPWorksharingDirective(S.getDirectiveKind()) || + isOpenMPTaskLoopDirective(S.getDirectiveKind())) { + (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); + (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); + } + + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); + ContBlock = CGF.createBasicBlock("simd.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } + + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } + + emitAlignedClause(CGF, S); + (void)CGF.EmitOMPLinearClauseInit(S); + { + CodeGenFunction::OMPPrivateScope LoopScope(CGF); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + CGF.EmitOMPLinearClause(S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + CGF.EmitOMPReductionClauseInit(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + CGF, S, CGF.EmitLValue(S.getIterationVariable())); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); + + emitCommonSimdLoop( + CGF, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPSimdInit(S); + }, + [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter( + CGF, S); + CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + }); + CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); + // Emit final copy of the lastprivate variables at the end of loops. + if (HasLastprivateClause) + CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); + } + CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } +} + +void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); +} + +void CodeGenFunction::EmitOMPOuterLoop( + bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, + CodeGenFunction::OMPPrivateScope &LoopScope, + const CodeGenFunction::OMPLoopArguments &LoopArgs, + const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, + const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); + + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); + + // Start the loop with a block that tests the condition. + llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); + EmitBlock(CondBlock); + const SourceRange R = S.getSourceRange(); + LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); + + llvm::Value *BoolCondVal = nullptr; + if (!DynamicOrOrdered) { + // UB = min(UB, GlobalUB) or + // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. + // 'distribute parallel for') + EmitIgnoredExpr(LoopArgs.EUB); + // IV = LB + EmitIgnoredExpr(LoopArgs.Init); + // IV < UB + BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); + } else { + BoolCondVal = + RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); + } + + // If there are any cleanups between here and the loop-exit scope, + // create a block to stage a loop exit along. + llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); + if (LoopScope.requiresCleanups()) + ExitBlock = createBasicBlock("omp.dispatch.cleanup"); + + llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); + Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); + if (ExitBlock != LoopExit.getBlock()) { + EmitBlock(ExitBlock); + EmitBranchThroughCleanup(LoopExit); + } + EmitBlock(LoopBody); + + // Emit "IV = LB" (in case of static schedule, we have already calculated new + // LB for loop condition and emitted it above). + if (DynamicOrOrdered) + EmitIgnoredExpr(LoopArgs.Init); + + // Create a block for the increment. + JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); + BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + + emitCommonSimdLoop( + *this, S, + [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { + // Generate !llvm.loop.parallel metadata for loads and stores for loops + // with dynamic/guided scheduling and without ordered clause. + if (!isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.LoopStack.setParallel(!IsMonotonic); + else + CGF.EmitOMPSimdInit(S, IsMonotonic); + }, + [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, + &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + SourceLocation Loc = S.getBeginLoc(); + // when 'distribute' is not combined with a 'for': + // while (idx <= UB) { BODY; ++idx; } + // when 'distribute' is combined with a 'for' + // (e.g. 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { + CodeGenOrdered(CGF, Loc, IVSize, IVSigned); + }); + }); + + EmitBlock(Continue.getBlock()); + BreakContinueStack.pop_back(); + if (!DynamicOrOrdered) { + // Emit "LB = LB + Stride", "UB = UB + Stride". + EmitIgnoredExpr(LoopArgs.NextLB); + EmitIgnoredExpr(LoopArgs.NextUB); + } + + EmitBranch(CondBlock); + LoopStack.pop(); + // Emit the fall-through block. + EmitBlock(LoopExit.getBlock()); + + // Tell the runtime we are done. + auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { + if (!DynamicOrOrdered) + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), + S.getDirectiveKind()); + }; + OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); +} + +void CodeGenFunction::EmitOMPForOuterLoop( + const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); + + // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). + const bool DynamicOrOrdered = + Ordered || RT.isDynamic(ScheduleKind.Schedule); + + assert((Ordered || + !RT.isStaticNonchunked(ScheduleKind.Schedule, + LoopArgs.Chunk != nullptr)) && + "static non-chunked schedule does not need outer loop"); + + // Emit outer loop. + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(dynamic,chunk_size) is specified, the iterations are + // distributed to threads in the team in chunks as the threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be distributed. Each chunk contains chunk_size + // iterations, except for the last chunk to be distributed, which may have + // fewer iterations. When no chunk_size is specified, it defaults to 1. + // + // When schedule(guided,chunk_size) is specified, the iterations are assigned + // to threads in the team in chunks as the executing threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be assigned. For a chunk_size of 1, the size of + // each chunk is proportional to the number of unassigned iterations divided + // by the number of threads in the team, decreasing to 1. For a chunk_size + // with value k (greater than 1), the size of each chunk is determined in the + // same way, with the restriction that the chunks do not contain fewer than k + // iterations (except for the last chunk to be assigned, which may have fewer + // than k iterations). + // + // When schedule(auto) is specified, the decision regarding scheduling is + // delegated to the compiler and/or runtime system. The programmer gives the + // implementation the freedom to choose any possible mapping of iterations to + // threads in the team. + // + // When schedule(runtime) is specified, the decision regarding scheduling is + // deferred until run time, and the schedule and chunk size are taken from the + // run-sched-var ICV. If the ICV is set to auto, the schedule is + // implementation defined + // + // while(__kmpc_dispatch_next(&LB, &UB)) { + // idx = LB; + // while (idx <= UB) { BODY; ++idx; + // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. + // } // inner loop + // } + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(static, chunk_size) is specified, iterations are divided into + // chunks of size chunk_size, and the chunks are assigned to the threads in + // the team in a round-robin fashion in the order of the thread number. + // + // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { + // while (idx <= UB) { BODY; ++idx; } // inner loop + // LB = LB + ST; + // UB = UB + ST; + // } + // + + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + if (DynamicOrOrdered) { + const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = + CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); + llvm::Value *LBVal = DispatchBounds.first; + llvm::Value *UBVal = DispatchBounds.second; + CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, + LoopArgs.Chunk}; + RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, + IVSigned, Ordered, DipatchRTInputValues); + } else { + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, + LoopArgs.ST, LoopArgs.Chunk); + RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), + ScheduleKind, StaticInit); + } + + auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, + const unsigned IVSize, + const bool IVSigned) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, + IVSigned); + } + }; + + OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); + OuterLoopArgs.IncExpr = S.getInc(); + OuterLoopArgs.Init = S.getInit(); + OuterLoopArgs.Cond = S.getCond(); + OuterLoopArgs.NextLB = S.getNextLowerBound(); + OuterLoopArgs.NextUB = S.getNextUpperBound(); + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, + emitOMPLoopBodyWithStopPoint, CodeGenOrdered); +} + +static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, + const unsigned IVSize, const bool IVSigned) {} + +void CodeGenFunction::EmitOMPDistributeOuterLoop( + OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent) { + + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); + + // Emit outer loop. + // Same behavior as a OMPForOuterLoop, except that schedule cannot be + // dynamic + // + + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, + LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); + RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); + + // for combined 'distribute' and 'for' the increment expression of distribute + // is stored in DistInc. For 'distribute' alone, it is in Inc. + Expr *IncExpr; + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) + IncExpr = S.getDistInc(); + else + IncExpr = S.getInc(); + + // this routine is shared by 'omp distribute parallel for' and + // 'omp distribute': select the right EUB expression depending on the + // directive + OMPLoopArguments OuterLoopArgs; + OuterLoopArgs.LB = LoopArgs.LB; + OuterLoopArgs.UB = LoopArgs.UB; + OuterLoopArgs.ST = LoopArgs.ST; + OuterLoopArgs.IL = LoopArgs.IL; + OuterLoopArgs.Chunk = LoopArgs.Chunk; + OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound(); + OuterLoopArgs.IncExpr = IncExpr; + OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit(); + OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextLowerBound() + : S.getNextLowerBound(); + OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextUpperBound() + : S.getNextUpperBound(); + + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, + LoopScope, OuterLoopArgs, CodeGenLoopContent, + emitEmptyOrdered); +} + +static std::pair<LValue, LValue> +emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + + // When composing 'distribute' with 'for' (e.g. as in 'distribute + // parallel for') we need to use the 'distribute' + // chunk lower and upper bounds rather than the whole loop iteration + // space. These are parameters to the outlined function for 'parallel' + // and we copy the bounds of the previous schedule into the + // the current ones. + LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); + LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); + llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( + PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); + PrevLBVal = CGF.EmitScalarConversion( + PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), + LS.getIterationVariable()->getType(), + LS.getPrevLowerBoundVariable()->getExprLoc()); + llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( + PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); + PrevUBVal = CGF.EmitScalarConversion( + PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), + LS.getIterationVariable()->getType(), + LS.getPrevUpperBoundVariable()->getExprLoc()); + + CGF.EmitStoreOfScalar(PrevLBVal, LB); + CGF.EmitStoreOfScalar(PrevUBVal, UB); + + return {LB, UB}; +} + +/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then +/// we need to use the LB and UB expressions generated by the worksharing +/// code generation support, whereas in non combined situations we would +/// just emit 0 and the LastIteration expression +/// This function is necessary due to the difference of the LB and UB +/// types for the RT emission routines for 'for_static_init' and +/// 'for_dispatch_init' +static std::pair<llvm::Value *, llvm::Value *> +emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + // when implementing a dynamic schedule for a 'for' combined with a + // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop + // is not normalized as each team only executes its own assigned + // distribute chunk + QualType IteratorTy = IVExpr->getType(); + llvm::Value *LBVal = + CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); + llvm::Value *UBVal = + CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); + return {LBVal, UBVal}; +} + +static void emitDistributeParallelForDistributeInnerBoundParams( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { + const auto &Dir = cast<OMPLoopDirective>(S); + LValue LB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); + llvm::Value *LBCast = + CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), + CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(LBCast); + LValue UB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); + + llvm::Value *UBCast = + CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), + CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(UBCast); +} + +static void +emitInnerParallelForWhenCombined(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + bool HasCancel = false; + if (!isOpenMPSimdDirective(S.getDirectiveKind())) { + if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + else if (const auto *D = + dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + } + CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), + HasCancel); + CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), + emitDistributeParallelForInnerBounds, + emitDistributeParallelForDispatchBounds); + }; + + emitCommonOMPParallelDirective( + CGF, S, + isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, + CGInlinedWorksharingLoop, + emitDistributeParallelForDistributeInnerBoundParams); +} + +void CodeGenFunction::EmitOMPDistributeParallelForDirective( + const OMPDistributeParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + OMPLexicalScope Scope(*this, S, OMPD_parallel); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); +} + +void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( + const OMPDistributeParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + OMPLexicalScope Scope(*this, S, OMPD_parallel); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); +} + +void CodeGenFunction::EmitOMPDistributeSimdDirective( + const OMPDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetSimdDirective( + const OMPTargetSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +namespace { + struct ScheduleKindModifiersTy { + OpenMPScheduleClauseKind Kind; + OpenMPScheduleClauseModifier M1; + OpenMPScheduleClauseModifier M2; + ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, + OpenMPScheduleClauseModifier M1, + OpenMPScheduleClauseModifier M2) + : Kind(Kind), M1(M1), M2(M2) {} + }; +} // namespace + +bool CodeGenFunction::EmitOMPWorksharingLoop( + const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { + // Emit the loop iteration variable. + const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); + const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); + EmitVarDecl(*IVDecl); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on each + // iteration (e.g., it is foldable into a constant). + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + EmitIgnoredExpr(S.getCalcLastIteration()); + } + + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); + + bool HasLastprivateClause; + // Check pre-condition. + { + OMPLoopScope PreInitScope(*this, S); + // Skip the entire loop if we don't meet the precondition. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return false; + } else { + llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); + ContBlock = createBasicBlock("omp.precond.end"); + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, + getProfileCount(&S)); + EmitBlock(ThenBlock); + incrementProfileCounter(&S); + } + + RunCleanupsScope DoacrossCleanupScope(*this); + bool Ordered = false; + if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { + if (OrderedClause->getNumForLoops()) + RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); + else + Ordered = true; + } + + llvm::DenseSet<const Expr *> EmittedFinals; + emitAlignedClause(*this, S); + bool HasLinears = EmitOMPLinearClauseInit(S); + // Emit helper vars inits. + + std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); + LValue LB = Bounds.first; + LValue UB = Bounds.second; + LValue ST = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); + LValue IL = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); + + // Emit 'then' code. + { + OMPPrivateScope LoopScope(*this); + if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables and post-update of + // lastprivate variables. + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + *this, S, EmitLValue(S.getIterationVariable())); + HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); + EmitOMPReductionClauseInit(S, LoopScope); + EmitOMPPrivateLoopCounters(S, LoopScope); + EmitOMPLinearClause(S, LoopScope); + (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); + + // Detect the loop schedule kind and chunk. + const Expr *ChunkExpr = nullptr; + OpenMPScheduleTy ScheduleKind; + if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { + ScheduleKind.Schedule = C->getScheduleKind(); + ScheduleKind.M1 = C->getFirstScheduleModifier(); + ScheduleKind.M2 = C->getSecondScheduleModifier(); + ChunkExpr = C->getChunkSize(); + } else { + // Default behaviour for schedule clause. + CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( + *this, S, ScheduleKind.Schedule, ChunkExpr); + } + bool HasChunkSizeOne = false; + llvm::Value *Chunk = nullptr; + if (ChunkExpr) { + Chunk = EmitScalarExpr(ChunkExpr); + Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), + S.getIterationVariable()->getType(), + S.getBeginLoc()); + Expr::EvalResult Result; + if (ChunkExpr->EvaluateAsInt(Result, getContext())) { + llvm::APSInt EvaluatedChunk = Result.Val.getInt(); + HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); + } + } + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + // OpenMP 4.5, 2.7.1 Loop Construct, Description. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if no monotonic modifier is specified, the effect will + // be as if the monotonic modifier was specified. + bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + if ((RT.isStaticNonchunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) || + StaticChunkedOne) && + !Ordered) { + JumpDest LoopExit = + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); + emitCommonSimdLoop( + *this, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + }, + [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, + &S, ScheduleKind, LoopExit, + &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When no chunk_size is specified, the iteration space is divided + // into chunks that are approximately equal in size, and at most + // one chunk is distributed to each thread. Note that the size of + // the chunks is unspecified in this case. + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, IL.getAddress(CGF), + LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), + StaticChunkedOne ? Chunk : nullptr); + CGF.CGM.getOpenMPRuntime().emitForStaticInit( + CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, + StaticInit); + // UB = min(UB, GlobalUB); + if (!StaticChunkedOne) + CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); + // IV = LB; + CGF.EmitIgnoredExpr(S.getInit()); + // For unchunked static schedule generate: + // + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // For static schedule with chunk one: + // + // while (IV <= PrevUB) { + // BODY; + // IV += ST; + // } + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), + StaticChunkedOne ? S.getCombinedParForInDistCond() + : S.getCond(), + StaticChunkedOne ? S.getDistInc() : S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime() + .initLastprivateConditionalCounter(CGF, S); + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + }); + EmitBlock(LoopExit.getBlock()); + // Tell the runtime we are done. + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), + S.getDirectiveKind()); + }; + OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); + } else { + const bool IsMonotonic = + Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || + ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || + ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || + ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; + // Emit the outer loop, which requests its work chunk [LB..UB] from + // runtime and runs the inner loop to process it. + const OMPLoopArguments LoopArguments( + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), + IL.getAddress(*this), Chunk, EUB); + EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, + LoopArguments, CGDispatchBounds); + } + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); + } + EmitOMPReductionClauseFinal( + S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) + ? /*Parallel and Simd*/ OMPD_parallel_for_simd + : /*Parallel only*/ OMPD_parallel); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause( + *this, S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) + EmitOMPLastprivateClauseFinal( + S, isOpenMPSimdDirective(S.getDirectiveKind()), + Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); + } + EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); + DoacrossCleanupScope.ForceCleanup(); + // We're now done with the loop, so jump to the continuation block. + if (ContBlock) { + EmitBranch(ContBlock); + EmitBlock(ContBlock, /*IsFinished=*/true); + } + } + return HasLastprivateClause; +} + +/// The following two functions generate expressions for the loop lower +/// and upper bounds in case of static and dynamic (dispatch) schedule +/// of the associated 'for' or 'distribute' loop. +static std::pair<LValue, LValue> +emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + const auto &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + return {LB, UB}; +} + +/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not +/// consider the lower and upper bound expressions generated by the +/// worksharing loop support, but we use 0 and the iteration space size as +/// constants +static std::pair<llvm::Value *, llvm::Value *> +emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, + Address LB, Address UB) { + const auto &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); + llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); + llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); + return {LBVal, UBVal}; +} + +void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { + bool HasLastprivates = false; + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + }; + { + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, + S.hasCancel()); + } + + // Emit an implicit barrier at the end. + if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); +} + +void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { + bool HasLastprivates = false; + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + }; + { + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); + } + + // Emit an implicit barrier at the end. + if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); +} + +static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, + const Twine &Name, + llvm::Value *Init = nullptr) { + LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); + if (Init) + CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); + return LVal; +} + +void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { + const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); + const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); + bool HasLastprivates = false; + auto &&CodeGen = [&S, CapturedStmt, CS, + &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { + ASTContext &C = CGF.getContext(); + QualType KmpInt32Ty = + C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + // Emit helper vars inits. + LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", + CGF.Builder.getInt32(0)); + llvm::ConstantInt *GlobalUBVal = CS != nullptr + ? CGF.Builder.getInt32(CS->size() - 1) + : CGF.Builder.getInt32(0); + LValue UB = + createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); + LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", + CGF.Builder.getInt32(1)); + LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", + CGF.Builder.getInt32(0)); + // Loop counter. + LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); + OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); + CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); + OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); + CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); + // Generate condition for loop. + BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, + OK_Ordinary, S.getBeginLoc(), FPOptions()); + // Increment for loop counter. + UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, + S.getBeginLoc(), true); + auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { + // Iterate through all sections and emit a switch construct: + // switch (IV) { + // case 0: + // <SectionStmt[0]>; + // break; + // ... + // case <NumSection> - 1: + // <SectionStmt[<NumSection> - 1]>; + // break; + // } + // .omp.sections.exit: + CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); + llvm::SwitchInst *SwitchStmt = + CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), + ExitBB, CS == nullptr ? 1 : CS->size()); + if (CS) { + unsigned CaseNumber = 0; + for (const Stmt *SubStmt : CS->children()) { + auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); + CGF.EmitBlock(CaseBB); + SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); + CGF.EmitStmt(SubStmt); + CGF.EmitBranch(ExitBB); + ++CaseNumber; + } + } else { + llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); + CGF.EmitBlock(CaseBB); + SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); + CGF.EmitStmt(CapturedStmt); + CGF.EmitBranch(ExitBB); + } + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + + CodeGenFunction::OMPPrivateScope LoopScope(CGF); + if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables and post-update of lastprivate + // variables. + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + CGF.EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); + HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + CGF.EmitOMPReductionClauseInit(S, LoopScope); + (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); + + // Emit static non-chunked loop. + OpenMPScheduleTy ScheduleKind; + ScheduleKind.Schedule = OMPC_SCHEDULE_static; + CGOpenMPRuntime::StaticRTInput StaticInit( + /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), + LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); + CGF.CGM.getOpenMPRuntime().emitForStaticInit( + CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); + // UB = min(UB, GlobalUB); + llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); + llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); + CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); + // IV = LB; + CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); + // while (idx <= UB) { BODY; ++idx; } + CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, + [](CodeGenFunction &) {}); + // Tell the runtime we are done. + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), + S.getDirectiveKind()); + }; + CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); + + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivates) + CGF.EmitOMPLastprivateClauseFinal( + S, /*NoFinals=*/false, + CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); + }; + + bool HasCancel = false; + if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) + HasCancel = OSD->hasCancel(); + else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) + HasCancel = OPSD->hasCancel(); + OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, + HasCancel); + // Emit barrier for lastprivates only if 'sections' directive has 'nowait' + // clause. Otherwise the barrier will be generated by the codegen for the + // directive. + if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables. + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), + OMPD_unknown); + } +} + +void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { + { + OMPLexicalScope Scope(*this, S, OMPD_unknown); + EmitSections(S); + } + // Emit an implicit barrier at the end. + if (!S.getSingleClause<OMPNowaitClause>()) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), + OMPD_sections); + } +} + +void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, + S.hasCancel()); +} + +void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { + llvm::SmallVector<const Expr *, 8> CopyprivateVars; + llvm::SmallVector<const Expr *, 8> DestExprs; + llvm::SmallVector<const Expr *, 8> SrcExprs; + llvm::SmallVector<const Expr *, 8> AssignmentOps; + // Check if there are any 'copyprivate' clauses associated with this + // 'single' construct. + // Build a list of copyprivate variables along with helper expressions + // (<source>, <destination>, <destination>=<source> expressions) + for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { + CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); + DestExprs.append(C->destination_exprs().begin(), + C->destination_exprs().end()); + SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); + AssignmentOps.append(C->assignment_ops().begin(), + C->assignment_ops().end()); + } + // Emit code for 'single' region along with 'copyprivate' clauses + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope SingleScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); + CGF.EmitOMPPrivateClause(S, SingleScope); + (void)SingleScope.Privatize(); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }; + { + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), + CopyprivateVars, DestExprs, + SrcExprs, AssignmentOps); + } + // Emit an implicit barrier at the end (to avoid data race on firstprivate + // init or if no 'nowait' clause was specified and no 'copyprivate' clause). + if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getBeginLoc(), + S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); + } +} + +static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }; + CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { + OMPLexicalScope Scope(*this, S, OMPD_unknown); + emitMaster(*this, S); +} + +void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }; + const Expr *Hint = nullptr; + if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) + Hint = HintClause->getHint(); + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitCriticalRegion(*this, + S.getDirectiveName().getAsString(), + CodeGen, S.getBeginLoc(), Hint); +} + +void CodeGenFunction::EmitOMPParallelForDirective( + const OMPParallelForDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPParallelForSimdDirective( + const OMPParallelForSimdDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPParallelMasterDirective( + const OMPParallelMasterDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'master' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + bool Copyins = CGF.EmitOMPCopyinClause(S); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + if (Copyins) { + // Emit implicit barrier to synchronize threads and avoid data races on + // propagation master's thread values of threadprivate variables to local + // instances of that variables of all other implicit threads. + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + emitMaster(CGF, S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPParallelSectionsDirective( + const OMPParallelSectionsDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'sections' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitSections(S); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTaskBasedDirective( + const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, + const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, + OMPTaskDataTy &Data) { + // Emit outlined function for task construct. + const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); + auto I = CS->getCapturedDecl()->param_begin(); + auto PartId = std::next(I); + auto TaskT = std::next(I, 4); + // Check if the task is final + if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + const Expr *Cond = Clause->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Data.Final.setInt(CondConstant); + else + Data.Final.setPointer(EvaluateExprAsBool(Cond)); + } else { + // By default the task is not final. + Data.Final.setInt(/*IntVal=*/false); + } + // Check if the task has 'priority' clause. + if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { + const Expr *Prio = Clause->getPriority(); + Data.Priority.setInt(/*IntVal=*/true); + Data.Priority.setPointer(EmitScalarConversion( + EmitScalarExpr(Prio), Prio->getType(), + getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + Prio->getExprLoc())); + } + // The first function argument for tasks is a thread id, the second one is a + // part id (0 for tied tasks, >=0 for untied task). + llvm::DenseSet<const VarDecl *> EmittedAsPrivate; + // Get list of private variables. + for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { + auto IRef = C->varlist_begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + Data.PrivateVars.push_back(*IRef); + Data.PrivateCopies.push_back(IInit); + } + ++IRef; + } + } + EmittedAsPrivate.clear(); + // Get list of firstprivate variables. + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); + } + ++IRef; + ++IElemInitRef; + } + } + // Get list of lastprivate variables (for taskloops). + llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + auto IRef = C->varlist_begin(); + auto ID = C->destination_exprs().begin(); + for (const Expr *IInit : C->private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + Data.LastprivateVars.push_back(*IRef); + Data.LastprivateCopies.push_back(IInit); + } + LastprivateDstsOrigs.insert( + {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), + cast<DeclRefExpr>(*IRef)}); + ++IRef; + ++ID; + } + } + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const Expr *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, S.getBeginLoc(), LHSs, RHSs, Data); + // Build list of dependences. + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) + for (const Expr *IRef : C->varlists()) + Data.Dependences.emplace_back(C->getDependencyKind(), IRef); + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, + CapturedRegion](CodeGenFunction &CGF, + PrePostActionTy &Action) { + // Set proper addresses for generated private copies. + OMPPrivateScope Scope(CGF); + if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || + !Data.LastprivateVars.empty()) { + llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( + CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); + enum { PrivatesParam = 2, CopyFnParam = 3 }; + llvm::Value *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); + llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( + CS->getCapturedDecl()->getParam(PrivatesParam))); + // Map privates. + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; + llvm::SmallVector<llvm::Value *, 16> CallArgs; + CallArgs.push_back(PrivatesPtr); + for (const Expr *E : Data.PrivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = CGF.CreateMemTemp( + CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); + PrivatePtrs.emplace_back(VD, PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); + } + for (const Expr *E : Data.FirstprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); + PrivatePtrs.emplace_back(VD, PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); + } + for (const Expr *E : Data.LastprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".lastpriv.ptr.addr"); + PrivatePtrs.emplace_back(VD, PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); + for (const auto &Pair : LastprivateDstsOrigs) { + const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/ + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + Pair.second->getType(), VK_LValue, + Pair.second->getExprLoc()); + Scope.addPrivate(Pair.first, [&CGF, &DRE]() { + return CGF.EmitLValue(&DRE).getAddress(CGF); + }); + } + for (const auto &Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + if (Data.Reductions) { + OMPLexicalScope LexScope(CGF, S, CapturedRegion); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); + for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initializer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), + RedCG, Cnt); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = + Address(CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType( + Data.ReductionCopies[Cnt]->getType()), + Data.ReductionCopies[Cnt]->getExprLoc()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + Scope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + } + } + // Privatize all private variables except for in_reduction items. + (void)Scope.Privatize(); + SmallVector<const Expr *, 4> InRedVars; + SmallVector<const Expr *, 4> InRedPrivs; + SmallVector<const Expr *, 4> InRedOps; + SmallVector<const Expr *, 4> TaskgroupDescriptors; + for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ITD = C->taskgroup_descriptors().begin(); + for (const Expr *Ref : C->varlists()) { + InRedVars.emplace_back(Ref); + InRedPrivs.emplace_back(*IPriv); + InRedOps.emplace_back(*IRed); + TaskgroupDescriptors.emplace_back(*ITD); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ITD, 1); + } + } + // Privatize in_reduction items here, because taskgroup descriptors must be + // privatized earlier. + OMPPrivateScope InRedScope(CGF); + if (!InRedVars.empty()) { + ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); + for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + // The taskgroup descriptor variable is always implicit firstprivate and + // privatized already during processing of the firstprivates. + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initializer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), + RedCG, Cnt); + llvm::Value *ReductionsPtr = + CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), + TaskgroupDescriptors[Cnt]->getExprLoc()); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = Address( + CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), + InRedPrivs[Cnt]->getExprLoc()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + } + } + (void)InRedScope.Privatize(); + + Action.Enter(CGF); + BodyGen(CGF); + }; + llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, + Data.NumberOfParts); + OMPLexicalScope Scope(*this, S, llvm::None, + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPSimdDirective(S.getDirectiveKind())); + TaskGen(*this, OutlinedFn, Data); +} + +static ImplicitParamDecl * +createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, + QualType Ty, CapturedDecl *CD, + SourceLocation Loc) { + auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, + ImplicitParamDecl::Other); + auto *OrigRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, + /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); + auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, + ImplicitParamDecl::Other); + auto *PrivateRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, + /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); + QualType ElemType = C.getBaseElementType(Ty); + auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, + ImplicitParamDecl::Other); + auto *InitRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, + /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); + PrivateVD->setInitStyle(VarDecl::CInit); + PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, + InitRef, /*BasePath=*/nullptr, + VK_RValue)); + Data.FirstprivateVars.emplace_back(OrigRef); + Data.FirstprivateCopies.emplace_back(PrivateRef); + Data.FirstprivateInits.emplace_back(InitRef); + return OrigVD; +} + +void CodeGenFunction::EmitOMPTargetTaskBasedDirective( + const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo) { + // Emit outlined function for task construct. + const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); + Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + auto I = CS->getCapturedDecl()->param_begin(); + auto PartId = std::next(I); + auto TaskT = std::next(I, 4); + OMPTaskDataTy Data; + // The task is not final. + Data.Final.setInt(/*IntVal=*/false); + // Get list of firstprivate variables. + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (auto *IInit : C->private_copies()) { + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); + ++IRef; + ++IElemInitRef; + } + } + OMPPrivateScope TargetScope(*this); + VarDecl *BPVD = nullptr; + VarDecl *PVD = nullptr; + VarDecl *SVD = nullptr; + if (InputInfo.NumberOfTargetItems > 0) { + auto *CD = CapturedDecl::Create( + getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); + llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); + QualType BaseAndPointersType = getContext().getConstantArrayType( + getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); + BPVD = createImplicitFirstprivateForType( + getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); + PVD = createImplicitFirstprivateForType( + getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); + QualType SizesType = getContext().getConstantArrayType( + getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), + ArrSize, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); + SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, + S.getBeginLoc()); + TargetScope.addPrivate( + BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); + TargetScope.addPrivate(PVD, + [&InputInfo]() { return InputInfo.PointersArray; }); + TargetScope.addPrivate(SVD, + [&InputInfo]() { return InputInfo.SizesArray; }); + } + (void)TargetScope.Privatize(); + // Build list of dependences. + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) + for (const Expr *IRef : C->varlists()) + Data.Dependences.emplace_back(C->getDependencyKind(), IRef); + auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, + &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { + // Set proper addresses for generated private copies. + OMPPrivateScope Scope(CGF); + if (!Data.FirstprivateVars.empty()) { + llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( + CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); + enum { PrivatesParam = 2, CopyFnParam = 3 }; + llvm::Value *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); + llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( + CS->getCapturedDecl()->getParam(PrivatesParam))); + // Map privates. + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; + llvm::SmallVector<llvm::Value *, 16> CallArgs; + CallArgs.push_back(PrivatesPtr); + for (const Expr *E : Data.FirstprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); + PrivatePtrs.emplace_back(VD, PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); + for (const auto &Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + // Privatize all private variables except for in_reduction items. + (void)Scope.Privatize(); + if (InputInfo.NumberOfTargetItems > 0) { + InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); + InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); + InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); + } + + Action.Enter(CGF); + OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); + BodyGen(CGF); + }; + llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, + Data.NumberOfParts); + llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); + IntegerLiteral IfCond(getContext(), TrueOrFalse, + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + + CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, + SharedsTy, CapturedStruct, &IfCond, Data); +} + +void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + // Emit outlined function for task construct. + const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); + Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_task) { + IfCond = C->getCondition(); + break; + } + } + + OMPTaskDataTy Data; + // Check if we should emit tied or untied task. + Data.Tied = !S.getSingleClause<OMPUntiedClause>(); + auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(CS->getCapturedStmt()); + }; + auto &&TaskGen = [&S, SharedsTy, CapturedStruct, + IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, + const OMPTaskDataTy &Data) { + CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, + SharedsTy, CapturedStruct, IfCond, + Data); + }; + EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); +} + +void CodeGenFunction::EmitOMPTaskyieldDirective( + const OMPTaskyieldDirective &S) { + CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); +} + +void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { + CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPTaskgroupDirective( + const OMPTaskgroupDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + if (const Expr *E = S.getReductionRef()) { + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + OMPTaskDataTy Data; + for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const Expr *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + llvm::Value *ReductionDesc = + CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), + LHSs, RHSs, Data); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + CGF.EmitVarDecl(*VD); + CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), + /*Volatile=*/false, E->getType()); + } + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { + CGM.getOpenMPRuntime().emitFlush( + *this, + [&S]() -> ArrayRef<const Expr *> { + if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) + return llvm::makeArrayRef(FlushClause->varlist_begin(), + FlushClause->varlist_end()); + return llvm::None; + }(), + S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, + Expr *IncExpr) { + // Emit the loop iteration variable. + const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); + const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); + EmitVarDecl(*IVDecl); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on each + // iteration (e.g., it is foldable into a constant). + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + EmitIgnoredExpr(S.getCalcLastIteration()); + } + + CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); + + bool HasLastprivateClause = false; + // Check pre-condition. + { + OMPLoopScope PreInitScope(*this, S); + // Skip the entire loop if we don't meet the precondition. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); + ContBlock = createBasicBlock("omp.precond.end"); + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, + getProfileCount(&S)); + EmitBlock(ThenBlock); + incrementProfileCounter(&S); + } + + emitAlignedClause(*this, S); + // Emit 'then' code. + { + // Emit helper vars inits. + + LValue LB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedLowerBoundVariable() + : S.getLowerBoundVariable()))); + LValue UB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedUpperBoundVariable() + : S.getUpperBoundVariable()))); + LValue ST = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); + LValue IL = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); + + OMPPrivateScope LoopScope(*this); + if (EmitOMPFirstprivateClause(S, LoopScope)) { + // Emit implicit barrier to synchronize threads and avoid data races + // on initialization of firstprivate variables and post-update of + // lastprivate variables. + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + EmitOMPPrivateClause(S, LoopScope); + if (isOpenMPSimdDirective(S.getDirectiveKind()) && + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPTeamsDirective(S.getDirectiveKind())) + EmitOMPReductionClauseInit(S, LoopScope); + HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); + EmitOMPPrivateLoopCounters(S, LoopScope); + (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); + + // Detect the distribute schedule kind and chunk. + llvm::Value *Chunk = nullptr; + OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; + if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { + ScheduleKind = C->getDistScheduleKind(); + if (const Expr *Ch = C->getChunkSize()) { + Chunk = EmitScalarExpr(Ch); + Chunk = EmitScalarConversion(Chunk, Ch->getType(), + S.getIterationVariable()->getType(), + S.getBeginLoc()); + } + } else { + // Default behaviour for dist_schedule clause. + CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( + *this, S, ScheduleKind, Chunk); + } + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + // OpenMP [2.10.8, distribute Construct, Description] + // If dist_schedule is specified, kind must be static. If specified, + // iterations are divided into chunks of size chunk_size, chunks are + // assigned to the teams of the league in a round-robin fashion in the + // order of the team number. When no chunk_size is specified, the + // iteration space is divided into chunks that are approximately equal + // in size, and at most one chunk is distributed to each team of the + // league. The size of the chunks is unspecified in this case. + bool StaticChunked = RT.isStaticChunked( + ScheduleKind, /* Chunked */ Chunk != nullptr) && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + if (RT.isStaticNonchunked(ScheduleKind, + /* Chunked */ Chunk != nullptr) || + StaticChunked) { + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), + StaticChunked ? Chunk : nullptr); + RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, + StaticInit); + JumpDest LoopExit = + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); + // UB = min(UB, GlobalUB); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound()); + // IV = LB; + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit()); + + const Expr *Cond = + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + + if (StaticChunked) + Cond = S.getCombinedDistCond(); + + // For static unchunked schedules generate: + // + // 1. For distribute alone, codegen + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // 2. When combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { + // <CodeGen rest of pragma>(LB, UB); + // idx += ST; + // } + // + // For static chunk one schedule generate: + // + // while (IV <= GlobalUB) { + // <CodeGen rest of pragma>(LB, UB); + // LB += ST; + // UB += ST; + // UB = min(UB, GlobalUB); + // IV = LB; + // } + // + emitCommonSimdLoop( + *this, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + }, + [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, + StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), Cond, IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [&S, StaticChunked](CodeGenFunction &CGF) { + if (StaticChunked) { + CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); + CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedInit()); + } + }); + }); + EmitBlock(LoopExit.getBlock()); + // Tell the runtime we are done. + RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); + } else { + // Emit the outer loop, which requests its work chunk [LB..UB] from + // runtime and runs the inner loop to process it. + const OMPLoopArguments LoopArguments = { + LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), + IL.getAddress(*this), Chunk}; + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, + CodeGenLoop); + } + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); + } + if (isOpenMPSimdDirective(S.getDirectiveKind()) && + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPTeamsDirective(S.getDirectiveKind())) { + EmitOMPReductionClauseFinal(S, OMPD_simd); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause( + *this, S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); + } + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) { + EmitOMPLastprivateClauseFinal( + S, /*NoFinals=*/false, + Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); + } + } + + // We're now done with the loop, so jump to the continuation block. + if (ContBlock) { + EmitBranch(ContBlock); + EmitBlock(ContBlock, true); + } + } +} + +void CodeGenFunction::EmitOMPDistributeDirective( + const OMPDistributeDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); +} + +static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, + const CapturedStmt *S) { + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); + CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; + CGF.CapturedStmtInfo = &CapStmtInfo; + llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); + Fn->setDoesNotRecurse(); + return Fn; +} + +void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { + if (S.hasClausesOfKind<OMPDependClause>()) { + assert(!S.getAssociatedStmt() && + "No associated statement must be in ordered depend construct."); + for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) + CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); + return; + } + const auto *C = S.getSingleClause<OMPSIMDClause>(); + auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, + PrePostActionTy &Action) { + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + if (C) { + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), + OutlinedFn, CapturedVars); + } else { + Action.Enter(CGF); + CGF.EmitStmt(CS->getCapturedStmt()); + } + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); +} + +static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, + QualType SrcType, QualType DestType, + SourceLocation Loc) { + assert(CGF.hasScalarEvaluationKind(DestType) && + "DestType must have scalar evaluation kind."); + assert(!Val.isAggregate() && "Must be a scalar or complex."); + return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, + DestType, Loc) + : CGF.EmitComplexToScalarConversion( + Val.getComplexVal(), SrcType, DestType, Loc); +} + +static CodeGenFunction::ComplexPairTy +convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, + QualType DestType, SourceLocation Loc) { + assert(CGF.getEvaluationKind(DestType) == TEK_Complex && + "DestType must have complex evaluation kind."); + CodeGenFunction::ComplexPairTy ComplexVal; + if (Val.isScalar()) { + // Convert the input element to the element type of the complex. + QualType DestElementType = + DestType->castAs<ComplexType>()->getElementType(); + llvm::Value *ScalarVal = CGF.EmitScalarConversion( + Val.getScalarVal(), SrcType, DestElementType, Loc); + ComplexVal = CodeGenFunction::ComplexPairTy( + ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); + } else { + assert(Val.isComplex() && "Must be a scalar or complex."); + QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); + QualType DestElementType = + DestType->castAs<ComplexType>()->getElementType(); + ComplexVal.first = CGF.EmitScalarConversion( + Val.getComplexVal().first, SrcElementType, DestElementType, Loc); + ComplexVal.second = CGF.EmitScalarConversion( + Val.getComplexVal().second, SrcElementType, DestElementType, Loc); + } + return ComplexVal; +} + +static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, + LValue LVal, RValue RVal) { + if (LVal.isGlobalReg()) { + CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); + } else { + CGF.EmitAtomicStore(RVal, LVal, + IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic, + LVal.isVolatile(), /*isInit=*/false); + } +} + +void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, + QualType RValTy, SourceLocation Loc) { + switch (getEvaluationKind(LVal.getType())) { + case TEK_Scalar: + EmitStoreThroughLValue(RValue::get(convertToScalarValue( + *this, RVal, RValTy, LVal.getType(), Loc)), + LVal); + break; + case TEK_Complex: + EmitStoreOfComplex( + convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, + /*isInit=*/false); + break; + case TEK_Aggregate: + llvm_unreachable("Must be a scalar or complex."); + } +} + +static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *V, + SourceLocation Loc) { + // v = x; + assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); + assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + LValue VLValue = CGF.EmitLValue(V); + RValue Res = XLValue.isGlobalReg() + ? CGF.EmitLoadOfLValue(XLValue, Loc) + : CGF.EmitAtomicLoad( + XLValue, Loc, + IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic, + XLValue.isVolatile()); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); +} + +static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *E, + SourceLocation Loc) { + // x = expr; + assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); + emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +} + +static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, + RValue Update, + BinaryOperatorKind BO, + llvm::AtomicOrdering AO, + bool IsXLHSInRHSPart) { + ASTContext &Context = CGF.getContext(); + // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' + // expression is simple and atomic is allowed for the given type for the + // target platform. + if (BO == BO_Comma || !Update.isScalar() || + !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || + (!isa<llvm::ConstantInt>(Update.getScalarVal()) && + (Update.getScalarVal()->getType() != + X.getAddress(CGF).getElementType())) || + !X.getAddress(CGF).getElementType()->isIntegerTy() || + !Context.getTargetInfo().hasBuiltinAtomic( + Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) + return std::make_pair(false, RValue::get(nullptr)); + + llvm::AtomicRMWInst::BinOp RMWOp; + switch (BO) { + case BO_Add: + RMWOp = llvm::AtomicRMWInst::Add; + break; + case BO_Sub: + if (!IsXLHSInRHSPart) + return std::make_pair(false, RValue::get(nullptr)); + RMWOp = llvm::AtomicRMWInst::Sub; + break; + case BO_And: + RMWOp = llvm::AtomicRMWInst::And; + break; + case BO_Or: + RMWOp = llvm::AtomicRMWInst::Or; + break; + case BO_Xor: + RMWOp = llvm::AtomicRMWInst::Xor; + break; + case BO_LT: + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::Max) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin + : llvm::AtomicRMWInst::UMax); + break; + case BO_GT: + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::Min) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax + : llvm::AtomicRMWInst::UMin); + break; + case BO_Assign: + RMWOp = llvm::AtomicRMWInst::Xchg; + break; + case BO_Mul: + case BO_Div: + case BO_Rem: + case BO_Shl: + case BO_Shr: + case BO_LAnd: + case BO_LOr: + return std::make_pair(false, RValue::get(nullptr)); + case BO_PtrMemD: + case BO_PtrMemI: + case BO_LE: + case BO_GE: + case BO_EQ: + case BO_NE: + case BO_Cmp: + case BO_AddAssign: + case BO_SubAssign: + case BO_AndAssign: + case BO_OrAssign: + case BO_XorAssign: + case BO_MulAssign: + case BO_DivAssign: + case BO_RemAssign: + case BO_ShlAssign: + case BO_ShrAssign: + case BO_Comma: + llvm_unreachable("Unsupported atomic update operation"); + } + llvm::Value *UpdateVal = Update.getScalarVal(); + if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { + UpdateVal = CGF.Builder.CreateIntCast( + IC, X.getAddress(CGF).getElementType(), + X.getType()->hasSignedIntegerRepresentation()); + } + llvm::Value *Res = + CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); + return std::make_pair(true, RValue::get(Res)); +} + +std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( + LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, + llvm::AtomicOrdering AO, SourceLocation Loc, + const llvm::function_ref<RValue(RValue)> CommonGen) { + // Update expressions are allowed to have the following forms: + // x binop= expr; -> xrval + expr; + // x++, ++x -> xrval + 1; + // x--, --x -> xrval - 1; + // x = x binop expr; -> xrval binop expr + // x = expr Op x; - > expr binop xrval; + auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); + if (!Res.first) { + if (X.isGlobalReg()) { + // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop + // 'xrval'. + EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); + } else { + // Perform compare-and-swap procedure. + EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); + } + } + return Res; +} + +static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *E, + const Expr *UE, bool IsXLHSInRHSPart, + SourceLocation Loc) { + assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && + "Update expr in 'atomic update' must be a binary operator."); + const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); + // Update expressions are allowed to have the following forms: + // x binop= expr; -> xrval + expr; + // x++, ++x -> xrval + 1; + // x--, --x -> xrval - 1; + // x = x binop expr; -> xrval binop expr + // x = expr Op x; - > expr binop xrval; + assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + RValue ExprRValue = CGF.EmitAnyExpr(E); + llvm::AtomicOrdering AO = IsSeqCst + ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic; + const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); + const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); + const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; + const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; + auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); + return CGF.EmitAnyExpr(UE); + }; + (void)CGF.EmitOMPAtomicSimpleUpdateExpr( + XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +} + +static RValue convertToType(CodeGenFunction &CGF, RValue Value, + QualType SourceType, QualType ResType, + SourceLocation Loc) { + switch (CGF.getEvaluationKind(ResType)) { + case TEK_Scalar: + return RValue::get( + convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); + case TEK_Complex: { + auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); + return RValue::getComplex(Res.first, Res.second); + } + case TEK_Aggregate: + break; + } + llvm_unreachable("Must be a scalar or complex."); +} + +static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, + bool IsPostfixUpdate, const Expr *V, + const Expr *X, const Expr *E, + const Expr *UE, bool IsXLHSInRHSPart, + SourceLocation Loc) { + assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); + assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); + RValue NewVVal; + LValue VLValue = CGF.EmitLValue(V); + LValue XLValue = CGF.EmitLValue(X); + RValue ExprRValue = CGF.EmitAnyExpr(E); + llvm::AtomicOrdering AO = IsSeqCst + ? llvm::AtomicOrdering::SequentiallyConsistent + : llvm::AtomicOrdering::Monotonic; + QualType NewVValType; + if (UE) { + // 'x' is updated with some additional value. + assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && + "Update expr in 'atomic capture' must be a binary operator."); + const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); + // Update expressions are allowed to have the following forms: + // x binop= expr; -> xrval + expr; + // x++, ++x -> xrval + 1; + // x--, --x -> xrval - 1; + // x = x binop expr; -> xrval binop expr + // x = expr Op x; - > expr binop xrval; + const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); + const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); + const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; + NewVValType = XRValExpr->getType(); + const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; + auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, + IsPostfixUpdate](RValue XRValue) { + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); + RValue Res = CGF.EmitAnyExpr(UE); + NewVVal = IsPostfixUpdate ? XRValue : Res; + return Res; + }; + auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( + XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); + if (Res.first) { + // 'atomicrmw' instruction was generated. + if (IsPostfixUpdate) { + // Use old value from 'atomicrmw'. + NewVVal = Res.second; + } else { + // 'atomicrmw' does not provide new value, so evaluate it using old + // value of 'x'. + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); + NewVVal = CGF.EmitAnyExpr(UE); + } + } + } else { + // 'x' is simply rewritten with some 'expr'. + NewVValType = X->getType().getNonReferenceType(); + ExprRValue = convertToType(CGF, ExprRValue, E->getType(), + X->getType().getNonReferenceType(), Loc); + auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { + NewVVal = XRValue; + return ExprRValue; + }; + // Try to perform atomicrmw xchg, otherwise simple exchange. + auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( + XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, + Loc, Gen); + if (Res.first) { + // 'atomicrmw' instruction was generated. + NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; + } + } + // Emit post-update store to 'v' of old/new 'x' value. + CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +} + +static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, + bool IsSeqCst, bool IsPostfixUpdate, + const Expr *X, const Expr *V, const Expr *E, + const Expr *UE, bool IsXLHSInRHSPart, + SourceLocation Loc) { + switch (Kind) { + case OMPC_read: + emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); + break; + case OMPC_write: + emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); + break; + case OMPC_unknown: + case OMPC_update: + emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); + break; + case OMPC_capture: + emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, + IsXLHSInRHSPart, Loc); + break; + case OMPC_if: + case OMPC_final: + case OMPC_num_threads: + case OMPC_private: + case OMPC_firstprivate: + case OMPC_lastprivate: + case OMPC_reduction: + case OMPC_task_reduction: + case OMPC_in_reduction: + case OMPC_safelen: + case OMPC_simdlen: + case OMPC_allocator: + case OMPC_allocate: + case OMPC_collapse: + case OMPC_default: + case OMPC_seq_cst: + case OMPC_shared: + case OMPC_linear: + case OMPC_aligned: + case OMPC_copyin: + case OMPC_copyprivate: + case OMPC_flush: + case OMPC_proc_bind: + case OMPC_schedule: + case OMPC_ordered: + case OMPC_nowait: + case OMPC_untied: + case OMPC_threadprivate: + case OMPC_depend: + case OMPC_mergeable: + case OMPC_device: + case OMPC_threads: + case OMPC_simd: + case OMPC_map: + case OMPC_num_teams: + case OMPC_thread_limit: + case OMPC_priority: + case OMPC_grainsize: + case OMPC_nogroup: + case OMPC_num_tasks: + case OMPC_hint: + case OMPC_dist_schedule: + case OMPC_defaultmap: + case OMPC_uniform: + case OMPC_to: + case OMPC_from: + case OMPC_use_device_ptr: + case OMPC_is_device_ptr: + case OMPC_unified_address: + case OMPC_unified_shared_memory: + case OMPC_reverse_offload: + case OMPC_dynamic_allocators: + case OMPC_atomic_default_mem_order: + case OMPC_device_type: + case OMPC_match: + case OMPC_nontemporal: + llvm_unreachable("Clause is not allowed in 'omp atomic'."); + } +} + +void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { + bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); + OpenMPClauseKind Kind = OMPC_unknown; + for (const OMPClause *C : S.clauses()) { + // Find first clause (skip seq_cst clause, if it is first). + if (C->getClauseKind() != OMPC_seq_cst) { + Kind = C->getClauseKind(); + break; + } + } + + const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); + if (const auto *FE = dyn_cast<FullExpr>(CS)) + enterFullExpression(FE); + // Processing for statements under 'atomic capture'. + if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { + for (const Stmt *C : Compound->body()) { + if (const auto *FE = dyn_cast<FullExpr>(C)) + enterFullExpression(FE); + } + } + + auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.EmitStopPoint(CS); + emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), + S.getV(), S.getExpr(), S.getUpdateExpr(), + S.isXLHSInRHSPart(), S.getBeginLoc()); + }; + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); +} + +static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen) { + assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); + CodeGenModule &CGM = CGF.CGM; + + // On device emit this construct as inlined code. + if (CGM.getLangOpts().OpenMPIsDevice) { + OMPLexicalScope Scope(CGF, S, OMPD_target); + CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }); + return; + } + + llvm::Function *Fn = nullptr; + llvm::Constant *FnID = nullptr; + + const Expr *IfCond = nullptr; + // Check for the at most one if clause associated with the target region. + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_target) { + IfCond = C->getCondition(); + break; + } + } + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + // Check if we have an if clause whose conditional always evaluates to false + // or if we do not have any targets specified. If so the target region is not + // an offload entry point. + bool IsOffloadEntry = true; + if (IfCond) { + bool Val; + if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) + IsOffloadEntry = false; + } + if (CGM.getLangOpts().OMPTargetTriples.empty()) + IsOffloadEntry = false; + + assert(CGF.CurFuncDecl && "No parent declaration for target region!"); + StringRef ParentName; + // In case we have Ctors/Dtors we use the complete type variant to produce + // the mangling of the device outlined kernel. + if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) + ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); + else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) + ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); + else + ParentName = + CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); + + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, + IsOffloadEntry, CodeGen); + OMPLexicalScope Scope(CGF, S, OMPD_task); + auto &&SizeEmitter = + [IsOffloadEntry](CodeGenFunction &CGF, + const OMPLoopDirective &D) -> llvm::Value * { + if (IsOffloadEntry) { + OMPLoopScope(CGF, D); + // Emit calculation of the iterations count. + llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); + NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, + /*isSigned=*/false); + return NumIterations; + } + return nullptr; + }; + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, + SizeEmitter); +} + +static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); + + CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); +} + +void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { + const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); + llvm::Function *OutlinedFn = + CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + + const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); + const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); + if (NT || TL) { + const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; + const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; + + CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, + S.getBeginLoc()); + } + + OMPTeamsScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, + CapturedVars); +} + +void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { + // Emit teams region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDirective &S) { + auto *CS = S.getCapturedStmt(OMPD_teams); + Action.Enter(CGF); + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); + CGF.EmitStmt(CS->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDirective( + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void +emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDistributeDirective &S) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( + const OMPTargetTeamsDistributeDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void emitTargetTeamsDistributeSimdRegion( + CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDistributeSimdDirective &S) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeSimdRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( + const OMPTargetTeamsDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeSimdRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +void CodeGenFunction::EmitOMPTeamsDistributeDirective( + const OMPTeamsDistributeDirective &S) { + + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( + const OMPTeamsDistributeSimdDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( + const OMPTeamsDistributeParallelForDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( + const OMPTeamsDistributeParallelForSimdDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, + CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +static void emitTargetTeamsDistributeParallelForRegion( + CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, + CodeGenTeams); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeParallelForDirective &S) { + // Emit SPMD target teams distribute parallel for region as a standalone + // region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( + const OMPTargetTeamsDistributeParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void emitTargetTeamsDistributeParallelForSimdRegion( + CodeGenFunction &CGF, + const OMPTargetTeamsDistributeParallelForSimdDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, + CodeGenTeams); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + // Emit SPMD target teams distribute parallel for simd region as a standalone + // region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( + const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +void CodeGenFunction::EmitOMPCancellationPointDirective( + const OMPCancellationPointDirective &S) { + CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), + S.getCancelRegion()); +} + +void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_cancel) { + IfCond = C->getCondition(); + break; + } + } + if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + // TODO: This check is necessary as we only generate `omp parallel` through + // the OpenMPIRBuilder for now. + if (S.getCancelRegion() == OMPD_parallel) { + llvm::Value *IfCondition = nullptr; + if (IfCond) + IfCondition = EmitScalarExpr(IfCond, + /*IgnoreResultAssign=*/true); + return Builder.restoreIP( + OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); + } + } + + CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, + S.getCancelRegion()); +} + +CodeGenFunction::JumpDest +CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { + if (Kind == OMPD_parallel || Kind == OMPD_task || + Kind == OMPD_target_parallel) + return ReturnBlock; + assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || + Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || + Kind == OMPD_distribute_parallel_for || + Kind == OMPD_target_parallel_for || + Kind == OMPD_teams_distribute_parallel_for || + Kind == OMPD_target_teams_distribute_parallel_for); + return OMPCancelStack.getExitBlock(); +} + +void CodeGenFunction::EmitOMPUseDevicePtrClause( + const OMPClause &NC, OMPPrivateScope &PrivateScope, + const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { + const auto &C = cast<OMPUseDevicePtrClause>(NC); + auto OrigVarIt = C.varlist_begin(); + auto InitIt = C.inits().begin(); + for (const Expr *PvtVarIt : C.private_copies()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); + const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); + const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); + + // In order to identify the right initializer we need to match the + // declaration used by the mapping logic. In some cases we may get + // OMPCapturedExprDecl that refers to the original declaration. + const ValueDecl *MatchingVD = OrigVD; + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { + // OMPCapturedExprDecl are used to privative fields of the current + // structure. + const auto *ME = cast<MemberExpr>(OED->getInit()); + assert(isa<CXXThisExpr>(ME->getBase()) && + "Base should be the current struct!"); + MatchingVD = ME->getMemberDecl(); + } + + // If we don't have information about the current list item, move on to + // the next one. + auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); + if (InitAddrIt == CaptureDeviceAddrMap.end()) + continue; + + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, + InitAddrIt, InitVD, + PvtVD]() { + // Initialize the temporary initialization variable with the address we + // get from the runtime library. We have to cast the source address + // because it is always a void *. References are materialized in the + // privatization scope, so the initialization here disregards the fact + // the original variable is a reference. + QualType AddrQTy = + getContext().getPointerType(OrigVD->getType().getNonReferenceType()); + llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); + Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); + setAddrOfLocalVar(InitVD, InitAddr); + + // Emit private declaration, it will be initialized by the value we + // declaration we just added to the local declarations map. + EmitDecl(*PvtVD); + + // The initialization variables reached its purpose in the emission + // of the previous declaration, so we don't need it anymore. + LocalDeclMap.erase(InitVD); + + // Return the address of the private variable. + return GetAddrOfLocalVar(PvtVD); + }); + assert(IsRegistered && "firstprivate var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + ++OrigVarIt; + ++InitIt; + } +} + +// Generate the instructions for '#pragma omp target data' directive. +void CodeGenFunction::EmitOMPTargetDataDirective( + const OMPTargetDataDirective &S) { + CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); + + // Create a pre/post action to signal the privatization of the device pointer. + // This action can be replaced by the OpenMP runtime code generation to + // deactivate privatization. + bool PrivatizeDevicePointers = false; + class DevicePointerPrivActionTy : public PrePostActionTy { + bool &PrivatizeDevicePointers; + + public: + explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) + : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} + void Enter(CodeGenFunction &CGF) override { + PrivatizeDevicePointers = true; + } + }; + DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); + + auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + }; + + // Codegen that selects whether to generate the privatization code or not. + auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, + &InnermostCodeGen](CodeGenFunction &CGF, + PrePostActionTy &Action) { + RegionCodeGenTy RCG(InnermostCodeGen); + PrivatizeDevicePointers = false; + + // Call the pre-action to change the status of PrivatizeDevicePointers if + // needed. + Action.Enter(CGF); + + if (PrivatizeDevicePointers) { + OMPPrivateScope PrivateScope(CGF); + // Emit all instances of the use_device_ptr clause. + for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) + CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, + Info.CaptureDeviceAddrMap); + (void)PrivateScope.Privatize(); + RCG(CGF); + } else { + RCG(CGF); + } + }; + + // Forward the provided action to the privatization codegen. + RegionCodeGenTy PrivRCG(PrivCodeGen); + PrivRCG.setAction(Action); + + // Notwithstanding the body of the region is emitted as inlined directive, + // we don't use an inline scope as changes in the references inside the + // region are expected to be visible outside, so we do not privative them. + OMPLexicalScope Scope(CGF, S); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, + PrivRCG); + }; + + RegionCodeGenTy RCG(CodeGen); + + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) { + RCG(*this); + return; + } + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (const auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + // Set the action to signal privatization of device pointers. + RCG.setAction(PrivAction); + + // Emit region code. + CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, + Info); +} + +void CodeGenFunction::EmitOMPTargetEnterDataDirective( + const OMPTargetEnterDataDirective &S) { + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) + return; + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (const auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + OMPLexicalScope Scope(*this, S, OMPD_task); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); +} + +void CodeGenFunction::EmitOMPTargetExitDataDirective( + const OMPTargetExitDataDirective &S) { + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) + return; + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (const auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + OMPLexicalScope Scope(*this, S, OMPD_task); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); +} + +static void emitTargetParallelRegion(CodeGenFunction &CGF, + const OMPTargetParallelDirective &S, + PrePostActionTy &Action) { + // Get the captured statement associated with the 'parallel' region. + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + Action.Enter(CGF); + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetParallelDirective( + const OMPTargetParallelDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void emitTargetParallelForRegion(CodeGenFunction &CGF, + const OMPTargetParallelForDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, OMPD_target_parallel_for, S.hasCancel()); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetParallelForDirective( + const OMPTargetParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void +emitTargetParallelForSimdRegion(CodeGenFunction &CGF, + const OMPTargetParallelForSimdDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForSimdDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( + const OMPTargetParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +/// Emit a helper variable and return corresponding lvalue. +static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, + const ImplicitParamDecl *PVD, + CodeGenFunction::OMPPrivateScope &Privates) { + const auto *VDecl = cast<VarDecl>(Helper->getDecl()); + Privates.addPrivate(VDecl, + [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); +} + +void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { + assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); + // Emit outlined function for task construct. + const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); + Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_taskloop) { + IfCond = C->getCondition(); + break; + } + } + + OMPTaskDataTy Data; + // Check if taskloop must be emitted without taskgroup. + Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); + // TODO: Check if we should emit tied or untied task. + Data.Tied = true; + // Set scheduling for taskloop + if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { + // grainsize clause + Data.Schedule.setInt(/*IntVal=*/false); + Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); + } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { + // num_tasks clause + Data.Schedule.setInt(/*IntVal=*/true); + Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); + } + + auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // <Final counter/linear vars updates>; + // } + // + + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + OMPLoopScope PreInitScope(CGF, S); + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); + ContBlock = CGF.createBasicBlock("taskloop.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } + + (void)CGF.EmitOMPLinearClauseInit(S); + + OMPPrivateScope LoopScope(CGF); + // Emit helper vars inits. + enum { LowerBound = 5, UpperBound, Stride, LastIter }; + auto *I = CS->getCapturedDecl()->param_begin(); + auto *LBP = std::next(I, LowerBound); + auto *UBP = std::next(I, UpperBound); + auto *STP = std::next(I, Stride); + auto *LIP = std::next(I, LastIter); + mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, + LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, + LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, + LoopScope); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + CGF.EmitOMPLinearClause(S, LoopScope); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + (void)LoopScope.Privatize(); + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } + + { + OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); + emitCommonSimdLoop( + CGF, S, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + CGF.EmitOMPSimdInit(S); + }, + [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + }); + } + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) { + CGF.EmitOMPLastprivateClauseFinal( + S, isOpenMPSimdDirective(S.getDirectiveKind()), + CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( + CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, + (*LIP)->getType(), S.getBeginLoc()))); + } + CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, + (*LIP)->getType(), S.getBeginLoc())); + }); + }; + auto &&TaskGen = [&S, SharedsTy, CapturedStruct, + IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, + const OMPTaskDataTy &Data) { + auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, + &Data](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, + OutlinedFn, SharedsTy, + CapturedStruct, IfCond, Data); + }; + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, + CodeGen); + }; + if (Data.Nogroup) { + EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); + } else { + CGM.getOpenMPRuntime().emitTaskgroupRegion( + *this, + [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, + Data); + }, + S.getBeginLoc()); + } +} + +void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { + EmitOMPTaskLoopBasedDirective(S); +} + +void CodeGenFunction::EmitOMPTaskLoopSimdDirective( + const OMPTaskLoopSimdDirective &S) { + OMPLexicalScope Scope(*this, S); + EmitOMPTaskLoopBasedDirective(S); +} + +void CodeGenFunction::EmitOMPMasterTaskLoopDirective( + const OMPMasterTaskLoopDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( + const OMPMasterTaskLoopSimdDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(*this, S); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( + const OMPParallelMasterTaskLoopDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, + S.getBeginLoc()); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( + const OMPParallelMasterTaskLoopSimdDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, + S.getBeginLoc()); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, + emitEmptyBoundParameters); +} + +// Generate the instructions for '#pragma omp target update' directive. +void CodeGenFunction::EmitOMPTargetUpdateDirective( + const OMPTargetUpdateDirective &S) { + // If we don't have target devices, don't bother emitting the data mapping + // code. + if (CGM.getLangOpts().OMPTargetTriples.empty()) + return; + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (const auto *C = S.getSingleClause<OMPIfClause>()) + IfCond = C->getCondition(); + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (const auto *C = S.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + + OMPLexicalScope Scope(*this, S, OMPD_task); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); +} + +void CodeGenFunction::EmitSimpleOMPExecutableDirective( + const OMPExecutableDirective &D) { + if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) + return; + auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { + if (isOpenMPSimdDirective(D.getDirectiveKind())) { + emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); + } else { + OMPPrivateScope LoopGlobals(CGF); + if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { + for (const Expr *E : LD->counters()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { + LValue GlobLVal = CGF.EmitLValue(E); + LoopGlobals.addPrivate( + VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); + } + if (isa<OMPCapturedExprDecl>(VD)) { + // Emit only those that were not explicitly referenced in clauses. + if (!CGF.LocalDeclMap.count(VD)) + CGF.EmitVarDecl(*VD); + } + } + for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { + if (!C->getNumForLoops()) + continue; + for (unsigned I = LD->getCollapsedNumber(), + E = C->getLoopNumIterations().size(); + I < E; ++I) { + if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( + cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { + // Emit only those that were not explicitly referenced in clauses. + if (!CGF.LocalDeclMap.count(VD)) + CGF.EmitVarDecl(*VD); + } + } + } + } + LoopGlobals.Privatize(); + CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); + } + }; + OMPSimdLexicalScope Scope(*this, D); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, + isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd + : D.getDirectiveKind(), + CodeGen); +} |