• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
2 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
3 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
6 
7 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
8 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
9 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
10 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
11 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
12 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
13 // expected-no-diagnostics
14 #ifndef HEADER
15 #define HEADER
16 
17 volatile int g __attribute__((aligned(128))) = 1212;
18 
19 template <class T>
20 struct S {
21   T f;
SS22   S(T a) : f(a + g) {}
SS23   S() : f(g) {}
operator TS24   operator T() { return T(); }
operator &S25   S &operator&(const S &) { return *this; }
~SS26   ~S() {}
27 };
28 
29 struct SS {
30   int a;
31   int b : 4;
32   int &c;
SSSS33   SS(int &d) : a(0), b(0), c(d) {
34 #pragma omp parallel reduction(default, +: a, b, c)
35 #ifdef LAMBDA
36     [&]() {
37       ++this->a, --b, (this)->c /= 1;
38 #pragma omp parallel reduction(&: a, b, c)
39       ++(this)->a, --b, this->c /= 1;
40     }();
41 #elif defined(BLOCKS)
42     ^{
43       ++a;
44       --this->b;
45       (this)->c /= 1;
46 #pragma omp parallel reduction(-: a, b, c)
47       ++(this)->a, --b, this->c /= 1;
48     }();
49 #else
50     ++this->a, --b, c /= 1;
51 #endif
52   }
53 };
54 
55 template<typename T>
56 struct SST {
57   T a;
SSTSST58   SST() : a(T()) {
59 #pragma omp parallel reduction(*: a)
60 #ifdef LAMBDA
61     [&]() {
62       [&]() {
63         ++this->a;
64 #pragma omp parallel reduction(&& :a)
65         ++(this)->a;
66       }();
67     }();
68 #elif defined(BLOCKS)
69     ^{
70       ^{
71         ++a;
72 #pragma omp parallel reduction(|: a)
73         ++(this)->a;
74       }();
75     }();
76 #else
77     ++(this)->a;
78 #endif
79   }
80 };
81 
82 // CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
83 // LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
84 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
85 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
86 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
87 // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
88 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
89 
90 //CHECK: foo_array_sect
91 //CHECK: call void {{.+}}@__kmpc_fork_call(
92 //CHECK: ret void
foo_array_sect(short x[1])93 void foo_array_sect(short x[1]) {
94 #pragma omp parallel reduction(default, + : x[:])
95   {}
96 }
97 
98 template <typename T>
tmain()99 T tmain() {
100   T t;
101   S<T> test;
102   SST<T> sst;
103   T t_var __attribute__((aligned(128))) = T(), t_var1 __attribute__((aligned(128)));
104   T vec[] = {1, 2};
105   S<T> s_arr[]  = {1, 2};
106   S<T> var __attribute__((aligned(128))) (3), var1 __attribute__((aligned(128)));
107 #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1)
108   {
109     vec[0] = t_var;
110     s_arr[0] = var;
111   }
112   return T();
113 }
114 
115 int sivar;
main()116 int main() {
117   SS ss(sivar);
118 #ifdef LAMBDA
119   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
120   // LAMBDA-LABEL: @main
121   // LAMBDA: alloca [[SS_TY]],
122   // LAMBDA: alloca [[CAP_TY:%.+]],
123   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@[^(]+]]([[CAP_TY]]*
124   [&]() {
125   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
126   // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
127 #pragma omp parallel reduction(+:g)
128   {
129     // LAMBDA: define {{.+}} @{{.+}}([[SS_TY]]*
130     // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
131     // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
132     // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
133     // LAMBDA: store i8
134     // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
135     // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
136     // LAMBDA-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
137     // LAMBDA: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
138     // LAMBDA: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i32*, i32*, i32*)* [[SS_MICROTASK:@.+]] to void
139     // LAMBDA: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1
140     // LAMBDA: store i8 %{{.+}}, i8* [[B_REF]],
141     // LAMBDA: ret
142 
143     // LAMBDA: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}})
144     // LAMBDA-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %
145     // LAMBDA: call{{.*}} void
146     // LAMBDA: ret void
147 
148     // LAMBDA: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]*
149     // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
150     // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
151     // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
152     // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[A_PRIV]],
153     // LAMBDA: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]],
154     // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[B_PRIV]],
155     // LAMBDA: store i{{[0-9]+}} -1, i{{[0-9]+}}* [[C_PRIV]],
156     // LAMBDA: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]],
157     // LAMBDA: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
158     // LAMBDA-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
159     // LAMBDA-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
160     // LAMBDA-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
161     // LAMBDA-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]],
162     // LAMBDA-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1
163     // LAMBDA-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]],
164     // LAMBDA-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]],
165     // LAMBDA-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]],
166     // LAMBDA-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1
167     // LAMBDA-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]],
168     // LAMBDA: call i32 @__kmpc_reduce_nowait(
169     // LAMBDA: ret void
170 
171     // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}})
172     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
173 
174     // Reduction list for runtime.
175     // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x i8*],
176 
177     // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
178     // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
179     g = 1;
180     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
181     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
182     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
183     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* {{[^,]*}} [[ARG]])
184 
185     // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
186     // LAMBDA: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8*
187     // LAMBDA: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
188     // LAMBDA: call i32 @__kmpc_reduce_nowait(
189     // LAMBDA: switch i32 %{{.+}}, label %[[REDUCTION_DONE:.+]] [
190     // LAMBDA: i32 1, label %[[CASE1:.+]]
191     // LAMBDA: i32 2, label %[[CASE2:.+]]
192     // LAMBDA: [[CASE1]]
193     // LAMBDA: [[G_VAL:%.+]] = load i32, i32* [[G_REF]]
194     // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
195     // LAMBDA: [[ADD:%.+]] = add nsw i32 [[G_VAL]], [[G_PRIV_VAL]]
196     // LAMBDA: store i32 [[ADD]], i32* [[G_REF]]
197     // LAMBDA: call void @__kmpc_end_reduce_nowait(
198     // LAMBDA: br label %[[REDUCTION_DONE]]
199     // LAMBDA: [[CASE2]]
200     // LAMBDA: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
201     // LAMBDA: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic
202     // LAMBDA: br label %[[REDUCTION_DONE]]
203     // LAMBDA: [[REDUCTION_DONE]]
204     // LAMBDA: ret void
205     [&]() {
206       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* {{[^,]*}} [[ARG_PTR:%.+]])
207       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
208       g = 2;
209       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
210       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
211       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
212       // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
213     }();
214   }
215   }();
216   return 0;
217 #elif defined(BLOCKS)
218   // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
219   // BLOCKS-LABEL: @main
220   // BLOCKS: call
221   // BLOCKS: call void {{%.+}}(i8
222   ^{
223   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
224   // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
225 #pragma omp parallel reduction(-:g)
226   {
227     // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* nonnull align 4 dereferenceable(4) %{{.+}})
228     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
229 
230     // Reduction list for runtime.
231     // BLOCKS: [[RED_LIST:%.+]] = alloca [1 x i8*],
232 
233     // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
234     // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
235     g = 1;
236     // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
237     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
238     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
239     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
240     // BLOCKS: call void {{%.+}}(i8
241 
242     // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
243     // BLOCKS: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8*
244     // BLOCKS: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
245     // BLOCKS: call i32 @__kmpc_reduce_nowait(
246     // BLOCKS: switch i32 %{{.+}}, label %[[REDUCTION_DONE:.+]] [
247     // BLOCKS: i32 1, label %[[CASE1:.+]]
248     // BLOCKS: i32 2, label %[[CASE2:.+]]
249     // BLOCKS: [[CASE1]]
250     // BLOCKS: [[G_VAL:%.+]] = load i32, i32* [[G_REF]]
251     // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
252     // BLOCKS: [[ADD:%.+]] = add nsw i32 [[G_VAL]], [[G_PRIV_VAL]]
253     // BLOCKS: store i32 [[ADD]], i32* [[G_REF]]
254     // BLOCKS: call void @__kmpc_end_reduce_nowait(
255     // BLOCKS: br label %[[REDUCTION_DONE]]
256     // BLOCKS: [[CASE2]]
257     // BLOCKS: [[G_PRIV_VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]]
258     // BLOCKS: atomicrmw add i32* [[G_REF]], i32 [[G_PRIV_VAL]] monotonic
259     // BLOCKS: br label %[[REDUCTION_DONE]]
260     // BLOCKS: [[REDUCTION_DONE]]
261     // BLOCKS: ret void
262     ^{
263       // BLOCKS: define {{.+}} void {{@.+}}(i8*
264       g = 2;
265       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
266       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
267       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
268       // BLOCKS: ret
269     }();
270   }
271   }();
272   return 0;
273 // BLOCKS: define {{.+}} @{{.+}}([[SS_TY]]*
274 // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
275 // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
276 // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
277 // BLOCKS: store i8
278 // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
279 // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
280 // BLOCKS-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
281 // BLOCKS: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
282 // BLOCKS: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i32*, i32*, i32*)* [[SS_MICROTASK:@.+]] to void
283 // BLOCKS: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1
284 // BLOCKS: store i8 %{{.+}}, i8* [[B_REF]],
285 // BLOCKS: ret
286 
287 // BLOCKS: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}})
288 // BLOCKS-NOT: getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %
289 // BLOCKS: call{{.*}} void
290 // BLOCKS: ret void
291 
292 // BLOCKS: define internal void @{{.+}}(i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, i32* {{.+}}, i32* {{.+}}, i32* {{.+}})
293 // BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
294 // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
295 // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
296 // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[A_PRIV]],
297 // BLOCKS: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]],
298 // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[B_PRIV]],
299 // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[C_PRIV]],
300 // BLOCKS: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]],
301 // BLOCKS: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
302 // BLOCKS-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
303 // BLOCKS-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
304 // BLOCKS-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
305 // BLOCKS-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]],
306 // BLOCKS-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1
307 // BLOCKS-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]],
308 // BLOCKS-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]],
309 // BLOCKS-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]],
310 // BLOCKS-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1
311 // BLOCKS-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]],
312 // BLOCKS: call i32 @__kmpc_reduce_nowait(
313 // BLOCKS: ret void
314 #else
315   S<float> test;
316   float t_var = 0, t_var1;
317   int vec[] = {1, 2};
318   S<float> s_arr[] = {1, 2};
319   S<float> var(3), var1;
320   float _Complex cf;
321 #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1)
322   {
323     vec[0] = t_var;
324     s_arr[0] = var;
325   }
326   if (var1)
327 #pragma omp parallel reduction(+ : t_var) reduction(& : var) reduction(&& : var1) reduction(min : t_var1)
328     while (1) {
329       vec[0] = t_var;
330       s_arr[0] = var;
331     }
332 #pragma omp parallel reduction(+ : cf)
333     ;
334   return tmain<int>();
335 #endif
336 }
337 
338 // CHECK: define {{.*}}i{{[0-9]+}} @main()
339 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
340 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{[^,]*}} [[TEST]])
341 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK:@.+]] to void
342 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK1:@.+]] to void
343 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, { float, float }*)* [[MAIN_MICROTASK2:@.+]] to void
344 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
345 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
346 // CHECK: ret
347 //
348 // CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
349 // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
350 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
351 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
352 // CHECK: [[T_VAR1_PRIV:%.+]] = alloca float,
353 
354 // Reduction list for runtime.
355 // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
356 
357 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
358 
359 // CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
360 // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
361 // CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
362 // CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
363 
364 // For + reduction operation initial value of private variable is 0.
365 // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
366 
367 // For & reduction operation initial value of private variable is ones in all bits.
368 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{[^,]*}} [[VAR_PRIV]])
369 
370 // For && reduction operation initial value of private variable is 1.0.
371 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_PRIV]])
372 
373 // For min reduction operation initial value of private variable is largest repesentable value.
374 // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
375 
376 // Skip checks for internal operations.
377 
378 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
379 
380 // CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
381 // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR_PRIV]] to i8*
382 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
383 // CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
384 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to i8*
385 // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
386 // CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
387 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_PRIV]] to i8*
388 // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
389 // CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
390 // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR1_PRIV]] to i8*
391 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
392 
393 // res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>);
394 
395 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
396 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
397 // CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8*
398 // CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]])
399 
400 // switch(res)
401 // CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [
402 // CHECK: i32 1, label %[[CASE1:.+]]
403 // CHECK: i32 2, label %[[CASE2:.+]]
404 // CHECK: ]
405 
406 // case 1:
407 // t_var += t_var_reduction;
408 // CHECK: [[T_VAR_VAL:%.+]] = load float, float* [[T_VAR_REF]],
409 // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load float, float* [[T_VAR_PRIV]],
410 // CHECK: [[UP:%.+]] = fadd float [[T_VAR_VAL]], [[T_VAR_PRIV_VAL]]
411 // CHECK: store float [[UP]], float* [[T_VAR_REF]],
412 
413 // var = var.operator &(var_reduction);
414 // CHECK: [[UP:%.+]] = call nonnull align 4 dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR_REF]], [[S_FLOAT_TY]]* nonnull align 4 dereferenceable(4) [[VAR_PRIV]])
415 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
416 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
417 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
418 
419 // var1 = var1.operator &&(var1_reduction);
420 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_REF]])
421 // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
422 // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
423 // CHECK: [[TRUE]]
424 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_PRIV]])
425 // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
426 // CHECK: br label %[[END2]]
427 // CHECK: [[END2]]
428 // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
429 // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float
430 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[COND_LVALUE:%.+]], float [[CONV]])
431 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
432 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
433 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
434 
435 // t_var1 = min(t_var1, t_var1_reduction);
436 // CHECK: [[T_VAR1_VAL:%.+]] = load float, float* [[T_VAR1_REF]],
437 // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load float, float* [[T_VAR1_PRIV]],
438 // CHECK: [[CMP:%.+]] = fcmp olt float [[T_VAR1_VAL]], [[T_VAR1_PRIV_VAL]]
439 // CHECK: br i1 [[CMP]]
440 // CHECK: [[UP:%.+]] = phi float
441 // CHECK: store float [[UP]], float* [[T_VAR1_REF]],
442 
443 // __kmpc_end_reduce_nowait(<loc>, <gtid>, &<lock>);
444 // CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]])
445 
446 // break;
447 // CHECK: br label %[[RED_DONE]]
448 
449 // case 2:
450 // t_var += t_var_reduction;
451 // CHECK: load float, float* [[T_VAR_PRIV]]
452 // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
453 // CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR_REF_INT]] monotonic,
454 // CHECK: br label %[[CONT:.+]]
455 // CHECK: [[CONT]]
456 // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %[[CONT]] ]
457 // CHECK: fadd float
458 // CHECK: [[UP_INT:%.+]] = load i32
459 // CHECK: [[T_VAR_REF_INT:%.+]] = bitcast float* [[T_VAR_REF]] to i32*
460 // CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
461 // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
462 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
463 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
464 // CHECK: [[ATOMIC_DONE]]
465 
466 // var = var.operator &(var_reduction);
467 // CHECK: call void @__kmpc_critical(
468 // CHECK: [[UP:%.+]] = call nonnull align 4 dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR_REF]], [[S_FLOAT_TY]]* nonnull align 4 dereferenceable(4) [[VAR_PRIV]])
469 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_REF]] to i8*
470 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
471 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
472 // CHECK: call void @__kmpc_end_critical(
473 
474 // var1 = var1.operator &&(var1_reduction);
475 // CHECK: call void @__kmpc_critical(
476 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_REF]])
477 // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
478 // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
479 // CHECK: [[TRUE]]
480 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_PRIV]])
481 // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
482 // CHECK: br label %[[END2]]
483 // CHECK: [[END2]]
484 // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
485 // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float
486 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[COND_LVALUE:%.+]], float [[CONV]])
487 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_REF]] to i8*
488 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
489 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
490 // CHECK: call void @__kmpc_end_critical(
491 
492 // t_var1 = min(t_var1, t_var1_reduction);
493 // CHECK: load float, float* [[T_VAR1_PRIV]]
494 // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
495 // CHECK: [[OLD1:%.+]] = load atomic i32, i32* [[T_VAR1_REF_INT]] monotonic,
496 // CHECK: br label %[[CONT:.+]]
497 // CHECK: [[CONT]]
498 // CHECK: [[ORIG_OLD_INT:%.+]] = phi i32 [ [[OLD1]], %{{.+}} ], [ [[OLD2:%.+]], %{{.+}} ]
499 // CHECK: [[CMP:%.+]] = fcmp olt float
500 // CHECK: br i1 [[CMP]]
501 // CHECK: [[UP:%.+]] = phi float
502 // CHECK: [[UP_INT:%.+]] = load i32
503 // CHECK: [[T_VAR1_REF_INT:%.+]] = bitcast float* [[T_VAR1_REF]] to i32*
504 // CHECK: [[RES:%.+]] = cmpxchg i32* [[T_VAR1_REF_INT]], i32 [[ORIG_OLD_INT]], i32 [[UP_INT]] monotonic monotonic
505 // CHECK: [[OLD2:%.+]] = extractvalue { i32, i1 } [[RES]], 0
506 // CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i32, i1 } [[RES]], 1
507 // CHECK: br i1 [[SUCCESS_FAIL]], label %[[ATOMIC_DONE:.+]], label %[[CONT]]
508 // CHECK: [[ATOMIC_DONE]]
509 
510 // break;
511 // CHECK: br label %[[RED_DONE]]
512 // CHECK: [[RED_DONE]]
513 
514 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* {{[^,]*}} [[VAR_PRIV]])
515 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
516 // CHECK: ret void
517 
518 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
519 //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
520 //  ...
521 //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
522 //  *(Type<n>-1*)rhs[<n>-1]);
523 // }
524 // CHECK: define internal void [[REDUCTION_FUNC]](i8* %0, i8* %1)
525 // t_var_lhs = (float*)lhs[0];
526 // CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
527 // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
528 // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to float*
529 // t_var_rhs = (float*)rhs[0];
530 // CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
531 // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
532 // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to float*
533 
534 // var_lhs = (S<float>*)lhs[1];
535 // CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
536 // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
537 // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_FLOAT_TY]]*
538 // var_rhs = (S<float>*)rhs[1];
539 // CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
540 // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
541 // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_FLOAT_TY]]*
542 
543 // var1_lhs = (S<float>*)lhs[2];
544 // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
545 // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
546 // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_FLOAT_TY]]*
547 // var1_rhs = (S<float>*)rhs[2];
548 // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
549 // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
550 // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_FLOAT_TY]]*
551 
552 // t_var1_lhs = (float*)lhs[3];
553 // CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
554 // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
555 // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to float*
556 // t_var1_rhs = (float*)rhs[3];
557 // CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
558 // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
559 // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to float*
560 
561 // t_var_lhs += t_var_rhs;
562 // CHECK: [[T_VAR_LHS_VAL:%.+]] = load float, float* [[T_VAR_LHS]],
563 // CHECK: [[T_VAR_RHS_VAL:%.+]] = load float, float* [[T_VAR_RHS]],
564 // CHECK: [[UP:%.+]] = fadd float [[T_VAR_LHS_VAL]], [[T_VAR_RHS_VAL]]
565 // CHECK: store float [[UP]], float* [[T_VAR_LHS]],
566 
567 // var_lhs = var_lhs.operator &(var_rhs);
568 // CHECK: [[UP:%.+]] = call nonnull align 4 dereferenceable(4) [[S_FLOAT_TY]]* @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR_LHS]], [[S_FLOAT_TY]]* nonnull align 4 dereferenceable(4) [[VAR_RHS]])
569 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_LHS]] to i8*
570 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[UP]] to i8*
571 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
572 
573 // var1_lhs = var1_lhs.operator &&(var1_rhs);
574 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_LHS]])
575 // CHECK: [[VAR1_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
576 // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
577 // CHECK: [[TRUE]]
578 // CHECK: [[TO_FLOAT:%.+]] = call float @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_RHS]])
579 // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = fcmp une float [[TO_FLOAT]], 0.0
580 // CHECK: br label %[[END2]]
581 // CHECK: [[END2]]
582 // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
583 // CHECK: [[CONV:%.+]] = uitofp i1 [[COND_LVALUE]] to float
584 // CHECK:  call void @{{.+}}([[S_FLOAT_TY]]* {{[^,]*}} [[COND_LVALUE:%.+]], float [[CONV]])
585 // CHECK: [[BC1:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_LHS]] to i8*
586 // CHECK: [[BC2:%.+]] = bitcast [[S_FLOAT_TY]]* [[COND_LVALUE]] to i8*
587 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
588 
589 // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
590 // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load float, float* [[T_VAR1_LHS]],
591 // CHECK: [[T_VAR1_RHS_VAL:%.+]] = load float, float* [[T_VAR1_RHS]],
592 // CHECK: [[CMP:%.+]] = fcmp olt float [[T_VAR1_LHS_VAL]], [[T_VAR1_RHS_VAL]]
593 // CHECK: br i1 [[CMP]]
594 // CHECK: [[UP:%.+]] = phi float
595 // CHECK: store float [[UP]], float* [[T_VAR1_LHS]],
596 // CHECK: ret void
597 
598 // CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
599 // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
600 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
601 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
602 // CHECK: [[T_VAR1_PRIV:%.+]] = alloca float,
603 
604 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
605 
606 // CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
607 // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
608 // CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
609 // CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
610 
611 // For + reduction operation initial value of private variable is 0.
612 // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
613 
614 // For & reduction operation initial value of private variable is ones in all bits.
615 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{[^,]*}} [[VAR_PRIV]])
616 
617 // For && reduction operation initial value of private variable is 1.0.
618 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{[^,]*}} [[VAR1_PRIV]])
619 
620 // For min reduction operation initial value of private variable is largest repesentable value.
621 // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
622 
623 // CHECK-NOT: call i32 @__kmpc_reduce
624 
625 // CHECK: }
626 
627 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
628 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
629 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* {{[^,]*}} [[TEST]])
630 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*)* [[TMAIN_MICROTASK:@.+]] to void
631 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
632 // CHECK: ret
633 //
634 // CHECK: define {{.+}} @{{.+}}([[SS_TY]]*
635 // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
636 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
637 // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
638 // CHECK: store i8
639 // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
640 // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
641 // CHECK-NOT: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
642 // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
643 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*)* [[SS_MICROTASK:@.+]] to void
644 // CHECK: [[B_REF:%.+]] = getelementptr {{.*}}[[SS_TY]], [[SS_TY]]* %{{.*}}, i32 0, i32 1
645 // CHECK: store i8 %{{.+}}, i8* [[B_REF]],
646 // CHECK: ret
647 
648 // CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]*
649 // CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
650 // CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
651 // CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
652 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[A_PRIV]],
653 // CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REFA:%.+]],
654 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[B_PRIV]],
655 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[C_PRIV]],
656 // CHECK: store i{{[0-9]+}}* [[C_PRIV]], i{{[0-9]+}}** [[REFC:%.+]],
657 // CHECK: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
658 // CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
659 // CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
660 // CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
661 // CHECK-NEXT: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_PRIV]],
662 // CHECK-NEXT: [[DEC:%.+]] = add nsw i{{[0-9]+}} [[B_VAL]], -1
663 // CHECK-NEXT: store i{{[0-9]+}} [[DEC]], i{{[0-9]+}}* [[B_PRIV]],
664 // CHECK-NEXT: [[C_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFC]],
665 // CHECK-NEXT: [[C_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C_PRIV]],
666 // CHECK-NEXT: [[DIV:%.+]] = sdiv i{{[0-9]+}} [[C_VAL]], 1
667 // CHECK-NEXT: store i{{[0-9]+}} [[DIV]], i{{[0-9]+}}* [[C_PRIV]],
668 // CHECK: call i32 @__kmpc_reduce_nowait(
669 // CHECK: ret void
670 
671 // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
672 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
673 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
674 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
675 // CHECK: [[T_VAR1_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
676 
677 // Reduction list for runtime.
678 // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
679 
680 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
681 
682 // CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
683 // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
684 // CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
685 // CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
686 
687 // For + reduction operation initial value of private variable is 0.
688 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[T_VAR_PRIV]],
689 
690 // For & reduction operation initial value of private variable is ones in all bits.
691 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* {{[^,]*}} [[VAR_PRIV]])
692 
693 // For && reduction operation initial value of private variable is 1.0.
694 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* {{[^,]*}} [[VAR1_PRIV]])
695 
696 // For min reduction operation initial value of private variable is largest repesentable value.
697 // CHECK: store i{{[0-9]+}} 2147483647, i{{[0-9]+}}* [[T_VAR1_PRIV]],
698 
699 // Skip checks for internal operations.
700 
701 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
702 
703 // CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
704 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_PRIV]] to i8*
705 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
706 // CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
707 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to i8*
708 // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
709 // CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
710 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_PRIV]] to i8*
711 // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
712 // CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
713 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR1_PRIV]] to i8*
714 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
715 
716 // res = __kmpc_reduce_nowait(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>);
717 
718 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
719 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
720 // CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8*
721 // CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 4, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]])
722 
723 // switch(res)
724 // CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [
725 // CHECK: i32 1, label %[[CASE1:.+]]
726 // CHECK: i32 2, label %[[CASE2:.+]]
727 // CHECK: ]
728 
729 // case 1:
730 // t_var += t_var_reduction;
731 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
732 // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
733 // CHECK: [[UP:%.+]] = add nsw i{{[0-9]+}} [[T_VAR_VAL]], [[T_VAR_PRIV_VAL]]
734 // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR_REF]],
735 
736 // var = var.operator &(var_reduction);
737 // CHECK: [[UP:%.+]] = call nonnull align 4 dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR_REF]], [[S_INT_TY]]* nonnull align 4 dereferenceable(4) [[VAR_PRIV]])
738 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
739 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
740 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
741 
742 // var1 = var1.operator &&(var1_reduction);
743 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR1_REF]])
744 // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
745 // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
746 // CHECK: [[TRUE]]
747 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR1_PRIV]])
748 // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
749 // CHECK: [[END2]]
750 // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
751 // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32
752 // CHECK:  call void @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[COND_LVALUE:%.+]], i32 [[CONV]])
753 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
754 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
755 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
756 
757 // t_var1 = min(t_var1, t_var1_reduction);
758 // CHECK: [[T_VAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_REF]],
759 // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]],
760 // CHECK: [[CMP:%.+]] = icmp slt i{{[0-9]+}} [[T_VAR1_VAL]], [[T_VAR1_PRIV_VAL]]
761 // CHECK: br i1 [[CMP]]
762 // CHECK: [[UP:%.+]] = phi i32
763 // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_REF]],
764 
765 // __kmpc_end_reduce_nowait(<loc>, <gtid>, &<lock>);
766 // CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]])
767 
768 // break;
769 // CHECK: br label %[[RED_DONE]]
770 
771 // case 2:
772 // t_var += t_var_reduction;
773 // CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]]
774 // CHECK: atomicrmw add i32* [[T_VAR_REF]], i32 [[T_VAR_PRIV_VAL]] monotonic
775 
776 // var = var.operator &(var_reduction);
777 // CHECK: call void @__kmpc_critical(
778 // CHECK: [[UP:%.+]] = call nonnull align 4 dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR_REF]], [[S_INT_TY]]* nonnull align 4 dereferenceable(4) [[VAR_PRIV]])
779 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_REF]] to i8*
780 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
781 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
782 // CHECK: call void @__kmpc_end_critical(
783 
784 // var1 = var1.operator &&(var1_reduction);
785 // CHECK: call void @__kmpc_critical(
786 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR1_REF]])
787 // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
788 // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
789 // CHECK: [[TRUE]]
790 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR1_PRIV]])
791 // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
792 // CHECK: br label %[[END2]]
793 // CHECK: [[END2]]
794 // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
795 // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32
796 // CHECK:  call void @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[COND_LVALUE:%.+]], i32 [[CONV]])
797 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_REF]] to i8*
798 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
799 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
800 // CHECK: call void @__kmpc_end_critical(
801 
802 // t_var1 = min(t_var1, t_var1_reduction);
803 // CHECK: [[T_VAR1_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_PRIV]]
804 // CHECK: atomicrmw min i32* [[T_VAR1_REF]], i32 [[T_VAR1_PRIV_VAL]] monotonic
805 
806 // break;
807 // CHECK: br label %[[RED_DONE]]
808 // CHECK: [[RED_DONE]]
809 
810 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* {{[^,]*}} [[VAR_PRIV]])
811 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
812 // CHECK: ret void
813 
814 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
815 //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
816 //  ...
817 //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
818 //  *(Type<n>-1*)rhs[<n>-1]);
819 // }
820 // CHECK: define internal void [[REDUCTION_FUNC]](i8* %0, i8* %1)
821 // t_var_lhs = (i{{[0-9]+}}*)lhs[0];
822 // CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
823 // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
824 // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to i{{[0-9]+}}*
825 // t_var_rhs = (i{{[0-9]+}}*)rhs[0];
826 // CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
827 // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
828 // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to i{{[0-9]+}}*
829 
830 // var_lhs = (S<i{{[0-9]+}}>*)lhs[1];
831 // CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
832 // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
833 // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_INT_TY]]*
834 // var_rhs = (S<i{{[0-9]+}}>*)rhs[1];
835 // CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
836 // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
837 // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_INT_TY]]*
838 
839 // var1_lhs = (S<i{{[0-9]+}}>*)lhs[2];
840 // CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
841 // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
842 // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_INT_TY]]*
843 // var1_rhs = (S<i{{[0-9]+}}>*)rhs[2];
844 // CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
845 // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
846 // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_INT_TY]]*
847 
848 // t_var1_lhs = (i{{[0-9]+}}*)lhs[3];
849 // CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
850 // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
851 // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to i{{[0-9]+}}*
852 // t_var1_rhs = (i{{[0-9]+}}*)rhs[3];
853 // CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
854 // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
855 // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to i{{[0-9]+}}*
856 
857 // t_var_lhs += t_var_rhs;
858 // CHECK: [[T_VAR_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_LHS]],
859 // CHECK: [[T_VAR_RHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_RHS]],
860 // CHECK: [[UP:%.+]] = add nsw i{{[0-9]+}} [[T_VAR_LHS_VAL]], [[T_VAR_RHS_VAL]]
861 // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR_LHS]],
862 
863 // var_lhs = var_lhs.operator &(var_rhs);
864 // CHECK: [[UP:%.+]] = call nonnull align 4 dereferenceable(4) [[S_INT_TY]]* @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR_LHS]], [[S_INT_TY]]* nonnull align 4 dereferenceable(4) [[VAR_RHS]])
865 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR_LHS]] to i8*
866 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[UP]] to i8*
867 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
868 
869 // var1_lhs = var1_lhs.operator &&(var1_rhs);
870 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR1_LHS]])
871 // CHECK: [[VAR1_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
872 // CHECK: br i1 [[VAR1_BOOL]], label %[[TRUE:.+]], label %[[END2:.+]]
873 // CHECK: [[TRUE]]
874 // CHECK: [[TO_INT:%.+]] = call i{{[0-9]+}} @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[VAR1_RHS]])
875 // CHECK: [[VAR1_REDUCTION_BOOL:%.+]] = icmp ne i{{[0-9]+}} [[TO_INT]], 0
876 // CHECK: br label %[[END2]]
877 // CHECK: [[END2]]
878 // CHECK: [[COND_LVALUE:%.+]] = phi i1 [ false, %{{.+}} ], [ [[VAR1_REDUCTION_BOOL]], %[[TRUE]] ]
879 // CHECK: [[CONV:%.+]] = zext i1 [[COND_LVALUE]] to i32
880 // CHECK:  call void @{{.+}}([[S_INT_TY]]* {{[^,]*}} [[COND_LVALUE:%.+]], i32 [[CONV]])
881 // CHECK: [[BC1:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_LHS]] to i8*
882 // CHECK: [[BC2:%.+]] = bitcast [[S_INT_TY]]* [[COND_LVALUE]] to i8*
883 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC1]], i8* align 4 [[BC2]], i64 4, i1 false)
884 
885 // t_var1_lhs = min(t_var1_lhs, t_var1_rhs);
886 // CHECK: [[T_VAR1_LHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_LHS]],
887 // CHECK: [[T_VAR1_RHS_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR1_RHS]],
888 // CHECK: [[CMP:%.+]] = icmp slt i{{[0-9]+}} [[T_VAR1_LHS_VAL]], [[T_VAR1_RHS_VAL]]
889 // CHECK: br i1 [[CMP]]
890 // CHECK: [[UP:%.+]] = phi i32
891 // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_LHS]],
892 // CHECK: ret void
893 
894 #endif
895 
896