• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // REQUIRES: powerpc-registered-target
2 // REQUIRES: nvptx-registered-target
3 
4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST
5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
6 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes=CLASS,FUN,CHECK
7 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
8 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=CLASS,CHECK
9 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefixes=FUN,CHECK
10 
11 // expected-no-diagnostics
12 #ifndef HEADER
13 #define HEADER
14 
15 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4]
16 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 288, i64 673, i64 673, i64 544, i64 33, i64 673, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720]
17 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 4, i64 4, i64 4, i64 0, i64 4, i64 40, i64 4, i64 4, i64 4, i64 8, i64 4]
18 // HOST-DAG: = private unnamed_addr constant [11 x i64] [i64 673, i64 673, i64 673, i64 544, i64 673, i64 673, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720, i64 1688849860264720]
19 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
20 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096]
21 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
22 // HOST-DAG: = private unnamed_addr constant [3 x i64] [i64 547, i64 673, i64 562949953422096]
23 // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 8, i64 8]
24 // HOST-DAG: = private unnamed_addr constant [2 x i64] [i64 673, i64 281474976711440]
25 // CHECK-DAG: [[S:%.+]] = type { i32 }
26 // CHECK-DAG: [[CAP1:%.+]] = type { [[S]]* }
27 // CHECK-DAG: [[CAP2:%.+]] = type { i32*, i32*, i32*, i32**, i32* }
28 
29 // CLASS: define internal void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67_worker()
30 // CLASS: define weak void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67([[S]]* {{%.+}}, [[CAP1]]* nonnull align 8 dereferenceable(8) {{%.+}})
31 // CLASS-NOT: getelementptr
32 // CLASS: br i1 %
33 // CLASS: call void @__omp_offloading_{{.*}}_{{.*}}foo{{.*}}_l67_worker()
34 // CLASS: br label %
35 // CLASS: br i1 %
36 // CLASS: call void @__kmpc_kernel_init(
37 // CLASS: call void @__kmpc_data_sharing_init_stack()
38 // CLASS: call void @llvm.memcpy.
39 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]],
40 // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0
41 // CLASS: store [[S]]* [[S_:%.+]], [[S]]** [[THIS_REF]],
42 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]],
43 // CLASS: call i32 [[LAMBDA1:@.+foo.+]]([[CAP1]]* {{[^,]*}} [[L]])
44 // CLASS: ret void
45 
46 // CLASS: define weak void @__omp_offloading_{{.+}}foo{{.+}}_l69([[S]]* %{{.+}}, [[CAP1]]* nonnull align 8 dereferenceable(8) %{{.+}})
47 // CLASS-NOT: getelementptr
48 // CLASS: call void @llvm.memcpy.
49 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR:%.+]],
50 // CLASS: [[THIS_REF:%.+]] = getelementptr inbounds [[CAP1]], [[CAP1]]* [[L]], i32 0, i32 0
51 // CLASS: store [[S]]* %{{.+}}, [[S]]** [[THIS_REF]],
52 // CLASS: [[L:%.+]] = load [[CAP1]]*, [[CAP1]]** [[L_ADDR]],
53 // CLASS: call i32 [[LAMBDA1]]([[CAP1]]* {{[^,]*}} [[L]])
54 // CLASS: ret void
55 
56 template <typename T>
foo(const T & t)57 int foo(const T &t) {
58   #pragma omp target parallel
59   t();
60   return 0;
61 }
62 
63 struct S {
64   int a = 15;
fooS65   int foo() {
66     auto &&L = [&]() { return a; };
67 #pragma omp target
68     L();
69 #pragma omp target parallel
70     L();
71     return a + ::foo(L);
72   }
73 } s;
74 
75 // FUN: define internal void @__omp_offloading_{{.+}}_main_l124_worker()
76 // FUN: define weak void @__omp_offloading_{{.+}}_main_l124(i64 %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, [[CAP2]]* nonnull align 8 dereferenceable(40) %{{.+}})
77 // FUN-NOT: getelementptr
78 // FUN: br i1 %
79 // FUN: call void @__omp_offloading_{{.*}}_{{.*}}main{{.*}}_l124_worker()
80 // FUN: br label %
81 // FUN: br i1 %
82 // FUN: call void @__kmpc_kernel_init(
83 // FUN: call void @__kmpc_data_sharing_init_stack()
84 // FUN: call void @llvm.memcpy.
85 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
86 // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0
87 // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]],
88 // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1
89 // FUN: store i32* %{{.+}}, i32** [[B_CAP]],
90 // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2
91 // FUN: store i32* %{{.+}}, i32** [[C_CAP]],
92 // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3
93 // FUN: store i32** %{{.+}}, i32*** [[D_CAP]],
94 // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4
95 // FUN: store i32* %{{.+}}, i32** [[A_CAP]],
96 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
97 // FUN: call i64 [[LAMBDA2:@.+main.+]]([[CAP2]]* {{[^,]*}} [[L]])
98 // FUN: ret void
99 
100 // FUN: define weak void @__omp_offloading_{{.+}}_main_l126(i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}} i32* nonnull align 4 dereferenceable(4) %{{.+}}, i32* %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}}, [[CAP2]]* nonnull align 8 dereferenceable(40) %{{.+}})
101 // FUN-NOT: getelementptr
102 // FUN: call void @llvm.memcpy.
103 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR:%.+]],
104 // FUN: [[ARGC_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 0
105 // FUN: store i32* %{{.+}}, i32** [[ARGC_CAP]],
106 // FUN: [[B_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 1
107 // FUN: store i32* %{{.+}}, i32** [[B_CAP]],
108 // FUN: [[C_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 2
109 // FUN: store i32* %{{.+}}, i32** [[C_CAP]],
110 // FUN: [[D_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 3
111 // FUN: store i32** %{{.+}}, i32*** [[D_CAP]],
112 // FUN: [[A_CAP:%.+]] = getelementptr inbounds [[CAP2]], [[CAP2]]* [[L]], i32 0, i32 4
113 // FUN: store i32* %{{.+}}, i32** [[A_CAP]],
114 // FUN: [[L:%.+]] = load [[CAP2]]*, [[CAP2]]** [[L_ADDR]],
115 // FUN: call i64 [[LAMBDA2]]([[CAP2]]* {{[^,]*}} [[L]])
116 // FUN: ret void
117 
main(int argc,char ** argv)118 int main(int argc, char **argv) {
119   int &b = argc;
120   int &&c = 1;
121   int *d = &argc;
122   int a;
123   auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; };
124 #pragma omp target firstprivate(argc) map(to : a)
125   L();
126 #pragma omp target parallel
127   L();
128   return argc + s.foo();
129 }
130 
131 
132 // HOST-LABEL: @main
133 
134 // HOST-DAG: call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]],
135 // HOST-DAG: [[BASES:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR:%.+]], i32 0, i32 0
136 // HOST-DAG: [[PTRS:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR:%.+]], i32 0, i32 0
137 // HOST-DAG: [[BASE_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR]], i32 0, i32 6
138 // HOST-DAG: [[BASE_REF_CAST:%.+]] = bitcast i8** [[BASE_REF]] to i32***
139 // HOST-DAG: store i32** [[BASE:%.+]], i32*** [[BASE_REF_CAST]],
140 // HOST-DAG: [[BASE]] = getelementptr inbounds [[LAMBDA:%.+]], [[LAMBDA]]* [[LAMBDA_ADDR:%.+]], i32 0, i32 0
141 // HOST-DAG: [[PTR_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR]], i32 0, i32 6
142 // HOST-DAG: [[PTR_REF_CAST:%.+]] = bitcast i8** [[PTR_REF]] to i32**
143 // HOST-DAG: store i32* [[PTR:%.+]], i32** [[PTR_REF_CAST]],
144 // HOST-DAG: [[PTR]] = load i32*, i32** [[PTR_REF:%.+]],
145 // HOST-DAG: [[PTR_REF]] = getelementptr inbounds [[LAMBDA]], [[LAMBDA]]* [[LAMBDA_ADDR]], i32 0, i32 0
146 #endif // HEADER
147