1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// \brief This file implements semantic analysis for CUDA constructs.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Lex/Preprocessor.h"
18 #include "clang/Sema/Lookup.h"
19 #include "clang/Sema/Sema.h"
20 #include "clang/Sema/SemaDiagnostic.h"
21 #include "clang/Sema/Template.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/SmallVector.h"
24 using namespace clang;
25
ActOnCUDAExecConfigExpr(Scope * S,SourceLocation LLLLoc,MultiExprArg ExecConfig,SourceLocation GGGLoc)26 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
27 MultiExprArg ExecConfig,
28 SourceLocation GGGLoc) {
29 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
30 if (!ConfigDecl)
31 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
32 << "cudaConfigureCall");
33 QualType ConfigQTy = ConfigDecl->getType();
34
35 DeclRefExpr *ConfigDR = new (Context)
36 DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
37 MarkFunctionReferenced(LLLLoc, ConfigDecl);
38
39 return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
40 /*IsExecConfig=*/true);
41 }
42
43 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
IdentifyCUDATarget(const FunctionDecl * D)44 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
45 if (D->hasAttr<CUDAInvalidTargetAttr>())
46 return CFT_InvalidTarget;
47
48 if (D->hasAttr<CUDAGlobalAttr>())
49 return CFT_Global;
50
51 if (D->hasAttr<CUDADeviceAttr>()) {
52 if (D->hasAttr<CUDAHostAttr>())
53 return CFT_HostDevice;
54 return CFT_Device;
55 } else if (D->hasAttr<CUDAHostAttr>()) {
56 return CFT_Host;
57 } else if (D->isImplicit()) {
58 // Some implicit declarations (like intrinsic functions) are not marked.
59 // Set the most lenient target on them for maximal flexibility.
60 return CFT_HostDevice;
61 }
62
63 return CFT_Host;
64 }
65
66 // * CUDA Call preference table
67 //
68 // F - from,
69 // T - to
70 // Ph - preference in host mode
71 // Pd - preference in device mode
72 // H - handled in (x)
73 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
74 //
75 // | F | T | Ph | Pd | H |
76 // |----+----+-----+-----+-----+
77 // | d | d | N | N | (c) |
78 // | d | g | -- | -- | (a) |
79 // | d | h | -- | -- | (e) |
80 // | d | hd | HD | HD | (b) |
81 // | g | d | N | N | (c) |
82 // | g | g | -- | -- | (a) |
83 // | g | h | -- | -- | (e) |
84 // | g | hd | HD | HD | (b) |
85 // | h | d | -- | -- | (e) |
86 // | h | g | N | N | (c) |
87 // | h | h | N | N | (c) |
88 // | h | hd | HD | HD | (b) |
89 // | hd | d | WS | SS | (d) |
90 // | hd | g | SS | -- |(d/a)|
91 // | hd | h | SS | WS | (d) |
92 // | hd | hd | HD | HD | (b) |
93
94 Sema::CUDAFunctionPreference
IdentifyCUDAPreference(const FunctionDecl * Caller,const FunctionDecl * Callee)95 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
96 const FunctionDecl *Callee) {
97 assert(Callee && "Callee must be valid.");
98 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
99 CUDAFunctionTarget CallerTarget =
100 (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
101
102 // If one of the targets is invalid, the check always fails, no matter what
103 // the other target is.
104 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
105 return CFP_Never;
106
107 // (a) Can't call global from some contexts until we support CUDA's
108 // dynamic parallelism.
109 if (CalleeTarget == CFT_Global &&
110 (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
111 (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
112 return CFP_Never;
113
114 // (b) Calling HostDevice is OK for everyone.
115 if (CalleeTarget == CFT_HostDevice)
116 return CFP_HostDevice;
117
118 // (c) Best case scenarios
119 if (CalleeTarget == CallerTarget ||
120 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
121 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
122 return CFP_Native;
123
124 // (d) HostDevice behavior depends on compilation mode.
125 if (CallerTarget == CFT_HostDevice) {
126 // It's OK to call a compilation-mode matching function from an HD one.
127 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
128 (!getLangOpts().CUDAIsDevice &&
129 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
130 return CFP_SameSide;
131
132 // Calls from HD to non-mode-matching functions (i.e., to host functions
133 // when compiling in device mode or to device functions when compiling in
134 // host mode) are allowed at the sema level, but eventually rejected if
135 // they're ever codegened. TODO: Reject said calls earlier.
136 return CFP_WrongSide;
137 }
138
139 // (e) Calling across device/host boundary is not something you should do.
140 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
141 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
142 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
143 return CFP_Never;
144
145 llvm_unreachable("All cases should've been handled by now.");
146 }
147
148 template <typename T>
EraseUnwantedCUDAMatchesImpl(Sema & S,const FunctionDecl * Caller,llvm::SmallVectorImpl<T> & Matches,std::function<const FunctionDecl * (const T &)> FetchDecl)149 static void EraseUnwantedCUDAMatchesImpl(
150 Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
151 std::function<const FunctionDecl *(const T &)> FetchDecl) {
152 if (Matches.size() <= 1)
153 return;
154
155 // Gets the CUDA function preference for a call from Caller to Match.
156 auto GetCFP = [&](const T &Match) {
157 return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
158 };
159
160 // Find the best call preference among the functions in Matches.
161 Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
162 Matches.begin(), Matches.end(),
163 [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
164
165 // Erase all functions with lower priority.
166 Matches.erase(
167 llvm::remove_if(Matches,
168 [&](const T &Match) { return GetCFP(Match) < BestCFP; }),
169 Matches.end());
170 }
171
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<FunctionDecl * > & Matches)172 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
173 SmallVectorImpl<FunctionDecl *> &Matches){
174 EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
175 *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
176 }
177
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<DeclAccessPair> & Matches)178 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
179 SmallVectorImpl<DeclAccessPair> &Matches) {
180 EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
181 *this, Caller, Matches, [](const DeclAccessPair &item) {
182 return dyn_cast<FunctionDecl>(item.getDecl());
183 });
184 }
185
EraseUnwantedCUDAMatches(const FunctionDecl * Caller,SmallVectorImpl<std::pair<DeclAccessPair,FunctionDecl * >> & Matches)186 void Sema::EraseUnwantedCUDAMatches(
187 const FunctionDecl *Caller,
188 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
189 EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
190 *this, Caller, Matches,
191 [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
192 return dyn_cast<FunctionDecl>(item.second);
193 });
194 }
195
196 /// When an implicitly-declared special member has to invoke more than one
197 /// base/field special member, conflicts may occur in the targets of these
198 /// members. For example, if one base's member __host__ and another's is
199 /// __device__, it's a conflict.
200 /// This function figures out if the given targets \param Target1 and
201 /// \param Target2 conflict, and if they do not it fills in
202 /// \param ResolvedTarget with a target that resolves for both calls.
203 /// \return true if there's a conflict, false otherwise.
204 static bool
resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,Sema::CUDAFunctionTarget Target2,Sema::CUDAFunctionTarget * ResolvedTarget)205 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
206 Sema::CUDAFunctionTarget Target2,
207 Sema::CUDAFunctionTarget *ResolvedTarget) {
208 // Only free functions and static member functions may be global.
209 assert(Target1 != Sema::CFT_Global);
210 assert(Target2 != Sema::CFT_Global);
211
212 if (Target1 == Sema::CFT_HostDevice) {
213 *ResolvedTarget = Target2;
214 } else if (Target2 == Sema::CFT_HostDevice) {
215 *ResolvedTarget = Target1;
216 } else if (Target1 != Target2) {
217 return true;
218 } else {
219 *ResolvedTarget = Target1;
220 }
221
222 return false;
223 }
224
inferCUDATargetForImplicitSpecialMember(CXXRecordDecl * ClassDecl,CXXSpecialMember CSM,CXXMethodDecl * MemberDecl,bool ConstRHS,bool Diagnose)225 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
226 CXXSpecialMember CSM,
227 CXXMethodDecl *MemberDecl,
228 bool ConstRHS,
229 bool Diagnose) {
230 llvm::Optional<CUDAFunctionTarget> InferredTarget;
231
232 // We're going to invoke special member lookup; mark that these special
233 // members are called from this one, and not from its caller.
234 ContextRAII MethodContext(*this, MemberDecl);
235
236 // Look for special members in base classes that should be invoked from here.
237 // Infer the target of this member base on the ones it should call.
238 // Skip direct and indirect virtual bases for abstract classes.
239 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
240 for (const auto &B : ClassDecl->bases()) {
241 if (!B.isVirtual()) {
242 Bases.push_back(&B);
243 }
244 }
245
246 if (!ClassDecl->isAbstract()) {
247 for (const auto &VB : ClassDecl->vbases()) {
248 Bases.push_back(&VB);
249 }
250 }
251
252 for (const auto *B : Bases) {
253 const RecordType *BaseType = B->getType()->getAs<RecordType>();
254 if (!BaseType) {
255 continue;
256 }
257
258 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
259 Sema::SpecialMemberOverloadResult *SMOR =
260 LookupSpecialMember(BaseClassDecl, CSM,
261 /* ConstArg */ ConstRHS,
262 /* VolatileArg */ false,
263 /* RValueThis */ false,
264 /* ConstThis */ false,
265 /* VolatileThis */ false);
266
267 if (!SMOR || !SMOR->getMethod()) {
268 continue;
269 }
270
271 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
272 if (!InferredTarget.hasValue()) {
273 InferredTarget = BaseMethodTarget;
274 } else {
275 bool ResolutionError = resolveCalleeCUDATargetConflict(
276 InferredTarget.getValue(), BaseMethodTarget,
277 InferredTarget.getPointer());
278 if (ResolutionError) {
279 if (Diagnose) {
280 Diag(ClassDecl->getLocation(),
281 diag::note_implicit_member_target_infer_collision)
282 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
283 }
284 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
285 return true;
286 }
287 }
288 }
289
290 // Same as for bases, but now for special members of fields.
291 for (const auto *F : ClassDecl->fields()) {
292 if (F->isInvalidDecl()) {
293 continue;
294 }
295
296 const RecordType *FieldType =
297 Context.getBaseElementType(F->getType())->getAs<RecordType>();
298 if (!FieldType) {
299 continue;
300 }
301
302 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
303 Sema::SpecialMemberOverloadResult *SMOR =
304 LookupSpecialMember(FieldRecDecl, CSM,
305 /* ConstArg */ ConstRHS && !F->isMutable(),
306 /* VolatileArg */ false,
307 /* RValueThis */ false,
308 /* ConstThis */ false,
309 /* VolatileThis */ false);
310
311 if (!SMOR || !SMOR->getMethod()) {
312 continue;
313 }
314
315 CUDAFunctionTarget FieldMethodTarget =
316 IdentifyCUDATarget(SMOR->getMethod());
317 if (!InferredTarget.hasValue()) {
318 InferredTarget = FieldMethodTarget;
319 } else {
320 bool ResolutionError = resolveCalleeCUDATargetConflict(
321 InferredTarget.getValue(), FieldMethodTarget,
322 InferredTarget.getPointer());
323 if (ResolutionError) {
324 if (Diagnose) {
325 Diag(ClassDecl->getLocation(),
326 diag::note_implicit_member_target_infer_collision)
327 << (unsigned)CSM << InferredTarget.getValue()
328 << FieldMethodTarget;
329 }
330 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
331 return true;
332 }
333 }
334 }
335
336 if (InferredTarget.hasValue()) {
337 if (InferredTarget.getValue() == CFT_Device) {
338 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
339 } else if (InferredTarget.getValue() == CFT_Host) {
340 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
341 } else {
342 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
343 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
344 }
345 } else {
346 // If no target was inferred, mark this member as __host__ __device__;
347 // it's the least restrictive option that can be invoked from any target.
348 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
349 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
350 }
351
352 return false;
353 }
354
isEmptyCudaConstructor(SourceLocation Loc,CXXConstructorDecl * CD)355 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
356 if (!CD->isDefined() && CD->isTemplateInstantiation())
357 InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
358
359 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
360 // empty at a point in the translation unit, if it is either a
361 // trivial constructor
362 if (CD->isTrivial())
363 return true;
364
365 // ... or it satisfies all of the following conditions:
366 // The constructor function has been defined.
367 // The constructor function has no parameters,
368 // and the function body is an empty compound statement.
369 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
370 return false;
371
372 // Its class has no virtual functions and no virtual base classes.
373 if (CD->getParent()->isDynamicClass())
374 return false;
375
376 // The only form of initializer allowed is an empty constructor.
377 // This will recursively check all base classes and member initializers
378 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
379 if (const CXXConstructExpr *CE =
380 dyn_cast<CXXConstructExpr>(CI->getInit()))
381 return isEmptyCudaConstructor(Loc, CE->getConstructor());
382 return false;
383 }))
384 return false;
385
386 return true;
387 }
388
isEmptyCudaDestructor(SourceLocation Loc,CXXDestructorDecl * DD)389 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
390 // No destructor -> no problem.
391 if (!DD)
392 return true;
393
394 if (!DD->isDefined() && DD->isTemplateInstantiation())
395 InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
396
397 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
398 // empty at a point in the translation unit, if it is either a
399 // trivial constructor
400 if (DD->isTrivial())
401 return true;
402
403 // ... or it satisfies all of the following conditions:
404 // The destructor function has been defined.
405 // and the function body is an empty compound statement.
406 if (!DD->hasTrivialBody())
407 return false;
408
409 const CXXRecordDecl *ClassDecl = DD->getParent();
410
411 // Its class has no virtual functions and no virtual base classes.
412 if (ClassDecl->isDynamicClass())
413 return false;
414
415 // Only empty destructors are allowed. This will recursively check
416 // destructors for all base classes...
417 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
418 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
419 return isEmptyCudaDestructor(Loc, RD->getDestructor());
420 return true;
421 }))
422 return false;
423
424 // ... and member fields.
425 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
426 if (CXXRecordDecl *RD = Field->getType()
427 ->getBaseElementTypeUnsafe()
428 ->getAsCXXRecordDecl())
429 return isEmptyCudaDestructor(Loc, RD->getDestructor());
430 return true;
431 }))
432 return false;
433
434 return true;
435 }
436
437 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
438 // treated as implicitly __host__ __device__, unless:
439 // * it is a variadic function (device-side variadic functions are not
440 // allowed), or
441 // * a __device__ function with this signature was already declared, in which
442 // case in which case we output an error, unless the __device__ decl is in a
443 // system header, in which case we leave the constexpr function unattributed.
maybeAddCUDAHostDeviceAttrs(Scope * S,FunctionDecl * NewD,const LookupResult & Previous)444 void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
445 const LookupResult &Previous) {
446 assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
447 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
448 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
449 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
450 return;
451
452 // Is D a __device__ function with the same signature as NewD, ignoring CUDA
453 // attributes?
454 auto IsMatchingDeviceFn = [&](NamedDecl *D) {
455 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
456 D = Using->getTargetDecl();
457 FunctionDecl *OldD = D->getAsFunction();
458 return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
459 !OldD->hasAttr<CUDAHostAttr>() &&
460 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
461 /* ConsiderCudaAttrs = */ false);
462 };
463 auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
464 if (It != Previous.end()) {
465 // We found a __device__ function with the same name and signature as NewD
466 // (ignoring CUDA attrs). This is an error unless that function is defined
467 // in a system header, in which case we simply return without making NewD
468 // host+device.
469 NamedDecl *Match = *It;
470 if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
471 Diag(NewD->getLocation(),
472 diag::err_cuda_unattributed_constexpr_cannot_overload_device)
473 << NewD->getName();
474 Diag(Match->getLocation(),
475 diag::note_cuda_conflicting_device_function_declared_here);
476 }
477 return;
478 }
479
480 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
481 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
482 }
483