• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
2 // RUN:   -triple x86_64-linux-gnu \
3 // RUN:   | FileCheck -check-prefix=HOST %s
4 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
5 // RUN:   -triple amdgcn-amd-amdhsa -fcuda-is-device \
6 // RUN:   | FileCheck -check-prefix=DEV %s
7 
8 #include "Inputs/cuda.h"
9 
10 // Device side kernel name.
11 // HOST: @[[KERN_CAPTURE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_capturevEUlvE_EvT_\00"
12 // HOST: @[[KERN_RESOLVE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_resolvevEUlvE_EvT_\00"
13 
14 // Check functions emitted for test_capture in host compilation.
15 // Check lambda is not emitted in host compilation.
16 // HOST-LABEL: define void @_Z12test_capturev
17 // HOST:  call void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
18 // HOST-LABEL: define internal void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
19 // HOST:  call void @_Z16__device_stub__gIZ12test_capturevEUlvE_EvT_
20 // HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv
21 
22 // Check functions emitted for test_resolve in host compilation.
23 // Check host version of template function 'overloaded' is emitted and called
24 // by the lambda function.
25 // HOST-LABEL: define void @_Z12test_resolvev
26 // HOST:  call void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_()
27 // HOST-LABEL: define internal void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_
28 // HOST:  call void @_Z16__device_stub__gIZ12test_resolvevEUlvE_EvT_
29 // HOST:  call void @_ZZ12test_resolvevENKUlvE_clEv
30 // HOST-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
31 // HOST:  call i32 @_Z10overloadedIiET_v
32 // HOST-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
33 // HOST:  ret i32 2
34 
35 // Check kernel is registered with correct device side kernel name.
36 // HOST: @__hipRegisterFunction({{.*}}@[[KERN_CAPTURE]]
37 // HOST: @__hipRegisterFunction({{.*}}@[[KERN_RESOLVE]]
38 
39 // DEV: @a = addrspace(1) externally_initialized global i32 0
40 
41 // Check functions emitted for test_capture in device compilation.
42 // Check lambda is emitted in device compilation and accessing device variable.
43 // DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_
44 // DEV:  call void @_ZZ12test_capturevENKUlvE_clEv
45 // DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv
46 // DEV:  store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*)
47 
48 // Check functions emitted for test_resolve in device compilation.
49 // Check device version of template function 'overloaded' is emitted and called
50 // by the lambda function.
51 // DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_
52 // DEV:  call void @_ZZ12test_resolvevENKUlvE_clEv
53 // DEV-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
54 // DEV:  call i32 @_Z10overloadedIiET_v
55 // DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
56 // DEV:  ret i32 1
57 
58 __device__ int a;
59 
60 template<class T>
overloaded()61 __device__ T overloaded() { return 1; }
62 
63 template<class T>
overloaded()64 __host__ T overloaded() { return 2; }
65 
66 template<class F>
g(F f)67 __global__ void g(F f) { f(); }
68 
69 template<class F>
test_capture_helper(F f)70 void test_capture_helper(F f) { g<<<1,1>>>(f); }
71 
72 template<class F>
test_resolve_helper(F f)73 void test_resolve_helper(F f) { g<<<1,1>>>(f); f(); }
74 
75 // Test capture of device variable in lambda function.
test_capture(void)76 void test_capture(void) {
77   test_capture_helper([](){ a = 1;});
78 }
79 
80 // Test resolving host/device function in lambda function.
81 // Callee should resolve to correct host/device function based on where
82 // the lambda function is called, not where it is defined.
test_resolve(void)83 void test_resolve(void) {
84   test_resolve_helper([](){ overloaded<int>();});
85 }
86