1 // Test host code gen
2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
8
9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
16
17 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
18 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
19 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
20 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
21 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
22 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
23
24 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
25 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
26 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
27 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
28 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
29 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
31 // expected-no-diagnostics
32 #ifndef HEADER
33 #define HEADER
34
35
36 template <typename T>
tmain()37 T tmain() {
38 T *a, *b, *c;
39 int n = 10000;
40 int ch = 100;
41
42 // no schedule clauses
43 #pragma omp target
44 #pragma omp teams
45 #pragma omp distribute parallel for simd
46 for (int i = 0; i < n; ++i) {
47 a[i] = b[i] + c[i];
48 }
49
50 // dist_schedule: static no chunk
51 #pragma omp target
52 #pragma omp teams
53 #pragma omp distribute parallel for simd dist_schedule(static)
54 for (int i = 0; i < n; ++i) {
55 a[i] = b[i] + c[i];
56 }
57
58 // dist_schedule: static chunk
59 #pragma omp target
60 #pragma omp teams
61 #pragma omp distribute parallel for simd dist_schedule(static, ch)
62 for (int i = 0; i < n; ++i) {
63 a[i] = b[i] + c[i];
64 }
65
66 // schedule: static no chunk
67 #pragma omp target
68 #pragma omp teams
69 #pragma omp distribute parallel for simd schedule(static)
70 for (int i = 0; i < n; ++i) {
71 a[i] = b[i] + c[i];
72 }
73
74 // schedule: static chunk
75 #pragma omp target
76 #pragma omp teams
77 #pragma omp distribute parallel for simd schedule(static, ch)
78 for (int i = 0; i < n; ++i) {
79 a[i] = b[i] + c[i];
80 }
81
82 // schedule: dynamic no chunk
83 #pragma omp target
84 #pragma omp teams
85 #pragma omp distribute parallel for simd schedule(dynamic)
86 for (int i = 0; i < n; ++i) {
87 a[i] = b[i] + c[i];
88 }
89
90 // schedule: dynamic chunk
91 #pragma omp target
92 #pragma omp teams
93 #pragma omp distribute parallel for simd schedule(dynamic, ch)
94 for (int i = 0; i < n; ++i) {
95 a[i] = b[i] + c[i];
96 }
97
98 return T();
99 }
100
main()101 int main() {
102 double *a, *b, *c;
103 int n = 10000;
104 int ch = 100;
105
106 #ifdef LAMBDA
107 // LAMBDA-LABEL: @main
108 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
109 [&]() {
110 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
111
112 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
113 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
114
115 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
116 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
117
118 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
119 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
120
121 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
122 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
123
124 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
125 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
126
127 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
128 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
129
130 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
131 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
132
133 // no schedule clauses
134 #pragma omp target
135 #pragma omp teams
136 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]](
137 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
138
139 #pragma omp distribute parallel for simd
140 for (int i = 0; i < n; ++i) {
141 a[i] = b[i] + c[i];
142 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]](
143 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
144 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
145 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
146 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
147
148 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
149
150 // check EUB for distribute
151 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
152 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
153 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
154 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
155 // LAMBDA-DAG: [[EUB_TRUE]]:
156 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
157 // LAMBDA: br label %[[EUB_END:.+]]
158 // LAMBDA-DAG: [[EUB_FALSE]]:
159 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
160 // LAMBDA: br label %[[EUB_END]]
161 // LAMBDA-DAG: [[EUB_END]]:
162 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
163 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
164
165 // initialize omp.iv
166 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
167 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
168 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
169
170 // check exit condition
171 // LAMBDA: [[OMP_JUMP_BACK]]:
172 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
173 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
174 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
175 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
176
177 // check that PrevLB and PrevUB are passed to the 'for'
178 // LAMBDA: [[DIST_BODY]]:
179 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
180 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
181 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
182 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
183 // check that distlb and distub are properly passed to fork_call
184 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
185 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
186 // LAMBDA: br label %[[DIST_INC:.+]]
187
188 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
189 // LAMBDA: [[DIST_INC]]:
190 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
191 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
192 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
193 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
194 // LAMBDA: br label %[[OMP_JUMP_BACK]]
195
196 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
197 // LAMBDA: ret
198
199 // implementation of 'parallel for'
200 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
201
202 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
203 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
204 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
205
206 // initialize lb and ub to PrevLB and PrevUB
207 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
208 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
209 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
210 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
211 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
212 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
213 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
214 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
215 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
216 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
217 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
218
219 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
220 // In this case we use EUB
221 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
222 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
223 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
224 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
225 // LAMBDA: [[PF_EUB_TRUE]]:
226 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
227 // LAMBDA: br label %[[PF_EUB_END:.+]]
228 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
229 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
230 // LAMBDA: br label %[[PF_EUB_END]]
231 // LAMBDA-DAG: [[PF_EUB_END]]:
232 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
233 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
234
235 // initialize omp.iv
236 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
237 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
238 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
239
240 // check exit condition
241 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
242 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
243 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
244 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
245 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
246
247 // check that PrevLB and PrevUB are passed to the 'for'
248 // LAMBDA: [[PF_BODY]]:
249 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
250 // LAMBDA: br label {{.+}}
251
252 // check stride 1 for 'for' in 'distribute parallel for simd'
253 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
254 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
255 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
256 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
257
258 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
259 // LAMBDA: ret
260
261 [&]() {
262 a[i] = b[i] + c[i];
263 }();
264 }
265
266 // dist_schedule: static no chunk (same sa default - no dist_schedule)
267 #pragma omp target
268 #pragma omp teams
269 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]](
270 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
271
272 #pragma omp distribute parallel for simd dist_schedule(static)
273 for (int i = 0; i < n; ++i) {
274 a[i] = b[i] + c[i];
275 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]](
276 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
277 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
278 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
279 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
280
281 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
282
283 // check EUB for distribute
284 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
285 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
286 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
287 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
288 // LAMBDA-DAG: [[EUB_TRUE]]:
289 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
290 // LAMBDA: br label %[[EUB_END:.+]]
291 // LAMBDA-DAG: [[EUB_FALSE]]:
292 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
293 // LAMBDA: br label %[[EUB_END]]
294 // LAMBDA-DAG: [[EUB_END]]:
295 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
296 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
297
298 // initialize omp.iv
299 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
300 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
301 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
302
303 // check exit condition
304 // LAMBDA: [[OMP_JUMP_BACK]]:
305 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
306 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
307 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
308 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
309
310 // check that PrevLB and PrevUB are passed to the 'for'
311 // LAMBDA: [[DIST_BODY]]:
312 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
313 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
314 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
315 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
316 // check that distlb and distub are properly passed to fork_call
317 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
318 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
319 // LAMBDA: br label %[[DIST_INC:.+]]
320
321 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
322 // LAMBDA: [[DIST_INC]]:
323 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
324 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
325 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
326 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
327 // LAMBDA: br label %[[OMP_JUMP_BACK]]
328
329 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
330 // LAMBDA: ret
331
332 // implementation of 'parallel for'
333 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
334
335 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
336 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
337 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
338
339 // initialize lb and ub to PrevLB and PrevUB
340 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
341 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
342 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
343 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
344 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
345 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
346 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
347 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
348 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
349 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
350 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
351
352 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
353 // In this case we use EUB
354 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
355 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
356 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
357 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
358 // LAMBDA: [[PF_EUB_TRUE]]:
359 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
360 // LAMBDA: br label %[[PF_EUB_END:.+]]
361 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
362 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
363 // LAMBDA: br label %[[PF_EUB_END]]
364 // LAMBDA-DAG: [[PF_EUB_END]]:
365 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
366 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
367
368 // initialize omp.iv
369 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
370 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
371 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
372
373 // check exit condition
374 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
375 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
376 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
377 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
378 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
379
380 // check that PrevLB and PrevUB are passed to the 'for'
381 // LAMBDA: [[PF_BODY]]:
382 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
383 // LAMBDA: br label {{.+}}
384
385 // check stride 1 for 'for' in 'distribute parallel for simd'
386 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
387 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
388 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
389 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
390
391 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
392 // LAMBDA: ret
393 [&]() {
394 a[i] = b[i] + c[i];
395 }();
396 }
397
398 // dist_schedule: static chunk
399 #pragma omp target
400 #pragma omp teams
401 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]](
402 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
403
404 #pragma omp distribute parallel for simd dist_schedule(static, ch)
405 for (int i = 0; i < n; ++i) {
406 a[i] = b[i] + c[i];
407 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
408 // LAMBDA: alloca
409 // LAMBDA: alloca
410 // LAMBDA: alloca
411 // LAMBDA: alloca
412 // LAMBDA: alloca
413 // LAMBDA: alloca
414 // LAMBDA: alloca
415 // LAMBDA: [[OMP_IV:%.+]] = alloca
416 // LAMBDA: alloca
417 // LAMBDA: alloca
418 // LAMBDA: alloca
419 // LAMBDA: alloca
420 // LAMBDA: [[OMP_LB:%.+]] = alloca
421 // LAMBDA: [[OMP_UB:%.+]] = alloca
422 // LAMBDA: [[OMP_ST:%.+]] = alloca
423
424 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
425
426 // check EUB for distribute
427 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
428 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}
429 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
430 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
431 // LAMBDA-DAG: [[EUB_TRUE]]:
432 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
433 // LAMBDA: br label %[[EUB_END:.+]]
434 // LAMBDA-DAG: [[EUB_FALSE]]:
435 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
436 // LAMBDA: br label %[[EUB_END]]
437 // LAMBDA-DAG: [[EUB_END]]:
438 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
439 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
440
441 // initialize omp.iv
442 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
443 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
444
445 // check exit condition
446 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
447 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
448 // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
449 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
450 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
451
452 // check that PrevLB and PrevUB are passed to the 'for'
453 // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
454 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
455 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
456 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
457 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
458 // check that distlb and distub are properly passed to fork_call
459 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
460 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
461 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]]
462
463 // check DistInc
464 // LAMBDA: [[DIST_INNER_LOOP_INC]]:
465 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
466 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
467 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
468 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
469 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
470 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
471 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
472 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
473 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
474 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
475 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
476 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
477
478 // Update UB
479 // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
480 // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
481 // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
482 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
483 // LAMBDA-DAG: [[EUB_TRUE_1]]:
484 // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}}
485 // LAMBDA: br label %[[EUB_END_1:.+]]
486 // LAMBDA-DAG: [[EUB_FALSE_1]]:
487 // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
488 // LAMBDA: br label %[[EUB_END_1]]
489 // LAMBDA-DAG: [[EUB_END_1]]:
490 // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
491 // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
492
493 // Store LB in IV
494 // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
495 // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
496
497 // LAMBDA: [[DIST_INNER_LOOP_END]]:
498 // LAMBDA: br label %[[LOOP_EXIT:.+]]
499
500 // loop exit
501 // LAMBDA: [[LOOP_EXIT]]:
502 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
503 // LAMBDA: ret
504
505 // skip implementation of 'parallel for': using default scheduling and was tested above
506 [&]() {
507 a[i] = b[i] + c[i];
508 }();
509 }
510
511 // schedule: static no chunk
512 #pragma omp target
513 #pragma omp teams
514 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]](
515 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
516
517 #pragma omp distribute parallel for simd schedule(static)
518 for (int i = 0; i < n; ++i) {
519 a[i] = b[i] + c[i];
520 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]](
521 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
522 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
523 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
524 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
525
526 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
527 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
528 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
529 // LAMBDA: ret
530
531 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
532 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
533
534 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
535 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
536 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
537
538 // initialize lb and ub to PrevLB and PrevUB
539 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
540 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
541 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
542 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
543 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
544 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
545 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
546 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
547 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
548 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
549 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
550
551 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
552 // In this case we use EUB
553 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
554 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
555 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
556 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
557 // LAMBDA: [[PF_EUB_TRUE]]:
558 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
559 // LAMBDA: br label %[[PF_EUB_END:.+]]
560 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
561 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
562 // LAMBDA: br label %[[PF_EUB_END]]
563 // LAMBDA-DAG: [[PF_EUB_END]]:
564 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
565 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
566
567 // initialize omp.iv
568 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
569 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
570 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
571
572 // check exit condition
573 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
574 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
575 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
576 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
577 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
578
579 // check that PrevLB and PrevUB are passed to the 'for'
580 // LAMBDA: [[PF_BODY]]:
581 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
582 // LAMBDA: br label {{.+}}
583
584 // check stride 1 for 'for' in 'distribute parallel for simd'
585 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
586 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
587 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
588 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
589
590 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
591 // LAMBDA: ret
592
593 [&]() {
594 a[i] = b[i] + c[i];
595 }();
596 }
597
598 // schedule: static chunk
599 #pragma omp target
600 #pragma omp teams
601 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]](
602 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
603
604 #pragma omp distribute parallel for simd schedule(static, ch)
605 for (int i = 0; i < n; ++i) {
606 a[i] = b[i] + c[i];
607 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]](
608 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
609 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
610 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
611 // LAMBDA: ret
612
613 // 'parallel for' implementation using outer and inner loops and PrevEUB
614 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
615 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
616 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
617 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
618 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
619
620 // initialize lb and ub to PrevLB and PrevUB
621 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
622 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
623 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
624 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
625 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
626 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
627 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
628 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
629 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
630 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
631 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
632 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
633
634 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
635 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
636 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
637 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
638 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
639 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
640 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
641 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
642 // LAMBDA: [[PF_EUB_TRUE]]:
643 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
644 // LAMBDA: br label %[[PF_EUB_END:.+]]
645 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
646 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
647 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
648 // LAMBDA: br label %[[PF_EUB_END]]
649 // LAMBDA-DAG: [[PF_EUB_END]]:
650 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
651 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
652 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
653 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
654 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
655
656 // initialize omp.iv (IV = LB)
657 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
658 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
659
660 // outer loop: while (IV < UB) {
661 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
662 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
663 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
664 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
665
666 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
667 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
668
669 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]:
670 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
671 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
672 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
673 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
674
675 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
676 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
677 // skip body branch
678 // LAMBDA: br{{.+}}
679 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
680
681 // IV = IV + 1 and inner loop latch
682 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
683 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
684 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
685 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
686 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
687
688 // check NextLB and NextUB
689 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
690 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
691
692 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
693 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
694 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
695 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
696 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
697 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
698 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
699 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
700 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
701 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
702
703 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
704 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
705 // LAMBDA: ret
706 [&]() {
707 a[i] = b[i] + c[i];
708 }();
709 }
710
711 // schedule: dynamic no chunk
712 #pragma omp target
713 #pragma omp teams
714 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]](
715 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
716
717 #pragma omp distribute parallel for simd schedule(dynamic)
718 for (int i = 0; i < n; ++i) {
719 a[i] = b[i] + c[i];
720 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]](
721 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
722 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
723 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
724 // LAMBDA: ret
725
726 // 'parallel for' implementation using outer and inner loops and PrevEUB
727 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
728 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
729 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
730 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
731 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
732
733 // initialize lb and ub to PrevLB and PrevUB
734 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
735 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
736 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
737 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
738 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
739 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
740 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
741 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
742 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
743 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
744 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
745 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
746 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
747 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
748
749 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
750 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
751 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
752 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
753
754 // initialize omp.iv (IV = LB)
755 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
756 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
757 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
758 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
759
760 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
761 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
762 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
763 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
764 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
765
766 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
767 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
768 // skip body branch
769 // LAMBDA: br{{.+}}
770 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
771
772 // IV = IV + 1 and inner loop latch
773 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
774 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
775 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
776 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
777 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
778
779 // check NextLB and NextUB
780 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
781 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
782
783 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
784 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
785
786 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
787 // LAMBDA: ret
788 [&]() {
789 a[i] = b[i] + c[i];
790 }();
791 }
792
793 // schedule: dynamic chunk
794 #pragma omp target
795 #pragma omp teams
796 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]](
797 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
798
799 #pragma omp distribute parallel for simd schedule(dynamic, ch)
800 for (int i = 0; i < n; ++i) {
801 a[i] = b[i] + c[i];
802 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]](
803 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
804 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
805 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
806 // LAMBDA: ret
807
808 // 'parallel for' implementation using outer and inner loops and PrevEUB
809 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
810 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
811 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
812 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
813 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
814
815 // initialize lb and ub to PrevLB and PrevUB
816 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
817 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
818 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
819 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
820 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
821 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
822 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
823 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
824 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
825 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
826 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
827 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
828 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
829 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
830
831 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
832 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
833 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
834 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
835
836 // initialize omp.iv (IV = LB)
837 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
838 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
839 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
840 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
841
842 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
843 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
844 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
845 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
846 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
847
848 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
849 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
850 // skip body branch
851 // LAMBDA: br{{.+}}
852 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
853
854 // IV = IV + 1 and inner loop latch
855 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
856 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
857 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
858 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
859 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
860
861 // check NextLB and NextUB
862 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
863 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
864
865 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
866 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
867
868 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
869 // LAMBDA: ret
870 [&]() {
871 a[i] = b[i] + c[i];
872 }();
873 }
874 }();
875 return 0;
876 #else
877 // CHECK-LABEL: @main
878
879 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
880 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
881
882 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
883 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
884
885 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
886 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
887
888 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
889 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
890
891 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
892 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
893
894 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
895 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
896
897 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
898 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
899
900 // CHECK: call{{.+}} [[TMAIN:@.+]]()
901
902 // no schedule clauses
903 #pragma omp target
904 #pragma omp teams
905 // CHECK: define internal void [[OFFLOADING_FUN_1]](
906 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
907
908 #pragma omp distribute parallel for simd
909 for (int i = 0; i < n; ++i) {
910 a[i] = b[i] + c[i];
911 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
912 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
913 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
914 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
915 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
916
917 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
918
919 // check EUB for distribute
920 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
921 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
922 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
923 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
924 // CHECK-DAG: [[EUB_TRUE]]:
925 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
926 // CHECK: br label %[[EUB_END:.+]]
927 // CHECK-DAG: [[EUB_FALSE]]:
928 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
929 // CHECK: br label %[[EUB_END]]
930 // CHECK-DAG: [[EUB_END]]:
931 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
932 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
933
934 // initialize omp.iv
935 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
936 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
937 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
938
939 // check exit condition
940 // CHECK: [[OMP_JUMP_BACK]]:
941 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
942 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
943 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
944 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
945
946 // check that PrevLB and PrevUB are passed to the 'for'
947 // CHECK: [[DIST_BODY]]:
948 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
949 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
950 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
951 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
952 // check that distlb and distub are properly passed to fork_call
953 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
954 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
955 // CHECK: br label %[[DIST_INC:.+]]
956
957 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
958 // CHECK: [[DIST_INC]]:
959 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
960 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
961 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
962 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
963 // CHECK: br label %[[OMP_JUMP_BACK]]
964
965 // CHECK-DAG: call void @__kmpc_for_static_fini(
966 // CHECK: ret
967
968 // implementation of 'parallel for'
969 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
970
971 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
972 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
973 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
974
975 // initialize lb and ub to PrevLB and PrevUB
976 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
977 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
978 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
979 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
980 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
981 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
982 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
983 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
984 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
985 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
986 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
987
988 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
989 // In this case we use EUB
990 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
991 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
992 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
993 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
994 // CHECK: [[PF_EUB_TRUE]]:
995 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
996 // CHECK: br label %[[PF_EUB_END:.+]]
997 // CHECK-DAG: [[PF_EUB_FALSE]]:
998 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
999 // CHECK: br label %[[PF_EUB_END]]
1000 // CHECK-DAG: [[PF_EUB_END]]:
1001 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1002 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1003
1004 // initialize omp.iv
1005 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1006 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1007 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1008
1009 // check exit condition
1010 // CHECK: [[OMP_PF_JUMP_BACK]]:
1011 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1012 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1013 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1014 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1015
1016 // check that PrevLB and PrevUB are passed to the 'for'
1017 // CHECK: [[PF_BODY]]:
1018 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1019 // CHECK: br label {{.+}}
1020
1021 // check stride 1 for 'for' in 'distribute parallel for simd'
1022 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1023 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1024 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1025 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1026
1027 // CHECK-DAG: call void @__kmpc_for_static_fini(
1028 // CHECK: ret
1029 }
1030
1031 // dist_schedule: static no chunk
1032 #pragma omp target
1033 #pragma omp teams
1034 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1035 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1036
1037 #pragma omp distribute parallel for simd dist_schedule(static)
1038 for (int i = 0; i < n; ++i) {
1039 a[i] = b[i] + c[i];
1040 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1041 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1042 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1043 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1044 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1045
1046 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1047
1048 // check EUB for distribute
1049 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1050 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1051 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1052 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1053 // CHECK-DAG: [[EUB_TRUE]]:
1054 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1055 // CHECK: br label %[[EUB_END:.+]]
1056 // CHECK-DAG: [[EUB_FALSE]]:
1057 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1058 // CHECK: br label %[[EUB_END]]
1059 // CHECK-DAG: [[EUB_END]]:
1060 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1061 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1062
1063 // initialize omp.iv
1064 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1065 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1066 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1067
1068 // check exit condition
1069 // CHECK: [[OMP_JUMP_BACK]]:
1070 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1071 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1072 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1073 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1074
1075 // check that PrevLB and PrevUB are passed to the 'for'
1076 // CHECK: [[DIST_BODY]]:
1077 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1078 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1079 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1080 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1081 // check that distlb and distub are properly passed to fork_call
1082 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1083 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1084 // CHECK: br label %[[DIST_INC:.+]]
1085
1086 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1087 // CHECK: [[DIST_INC]]:
1088 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1089 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1090 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1091 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1092 // CHECK: br label %[[OMP_JUMP_BACK]]
1093
1094 // CHECK-DAG: call void @__kmpc_for_static_fini(
1095 // CHECK: ret
1096
1097 // implementation of 'parallel for'
1098 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1099
1100 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1101 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1102 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1103
1104 // initialize lb and ub to PrevLB and PrevUB
1105 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1106 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1107 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1108 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1109 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1110 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1111 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1112 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1113 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1114 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1115 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1116
1117 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1118 // In this case we use EUB
1119 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1120 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1121 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1122 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1123 // CHECK: [[PF_EUB_TRUE]]:
1124 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1125 // CHECK: br label %[[PF_EUB_END:.+]]
1126 // CHECK-DAG: [[PF_EUB_FALSE]]:
1127 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1128 // CHECK: br label %[[PF_EUB_END]]
1129 // CHECK-DAG: [[PF_EUB_END]]:
1130 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1131 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1132
1133 // initialize omp.iv
1134 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1135 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1136 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1137
1138 // check exit condition
1139 // CHECK: [[OMP_PF_JUMP_BACK]]:
1140 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1141 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1142 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1143 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1144
1145 // check that PrevLB and PrevUB are passed to the 'for'
1146 // CHECK: [[PF_BODY]]:
1147 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1148 // CHECK: br label {{.+}}
1149
1150 // check stride 1 for 'for' in 'distribute parallel for simd'
1151 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1152 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1153 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1154 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1155
1156 // CHECK-DAG: call void @__kmpc_for_static_fini(
1157 // CHECK: ret
1158 }
1159
1160 // dist_schedule: static chunk
1161 #pragma omp target
1162 #pragma omp teams
1163 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1164 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1165
1166 #pragma omp distribute parallel for simd dist_schedule(static, ch)
1167 for (int i = 0; i < n; ++i) {
1168 a[i] = b[i] + c[i];
1169 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1170 // CHECK: alloca
1171 // CHECK: alloca
1172 // CHECK: alloca
1173 // CHECK: alloca
1174 // CHECK: alloca
1175 // CHECK: alloca
1176 // CHECK: alloca
1177 // CHECK: [[OMP_IV:%.+]] = alloca
1178 // CHECK: alloca
1179 // CHECK: alloca
1180 // CHECK: alloca
1181 // CHECK: alloca
1182 // CHECK: [[OMP_LB:%.+]] = alloca
1183 // CHECK: [[OMP_UB:%.+]] = alloca
1184 // CHECK: [[OMP_ST:%.+]] = alloca
1185
1186 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1187 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1188
1189 // check EUB for distribute
1190 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1191 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
1192 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1193 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1194 // CHECK-DAG: [[EUB_TRUE]]:
1195 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1196 // CHECK: br label %[[EUB_END:.+]]
1197 // CHECK-DAG: [[EUB_FALSE]]:
1198 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1199 // CHECK: br label %[[EUB_END]]
1200 // CHECK-DAG: [[EUB_END]]:
1201 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1202 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1203
1204 // initialize omp.iv
1205 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1206 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1207
1208 // check exit condition
1209 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1210 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
1211 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
1212 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
1213 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1214
1215 // check that PrevLB and PrevUB are passed to the 'for'
1216 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1217 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1218 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1219 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1220 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1221 // check that distlb and distub are properly passed to fork_call
1222 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1223 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1224 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1225
1226 // check DistInc
1227 // CHECK: [[DIST_INNER_LOOP_INC]]:
1228 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1229 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1230 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1231 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1232 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1233 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1234 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1235 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1236 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1237 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1238 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1239 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1240
1241 // Update UB
1242 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1243 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
1244 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
1245 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
1246 // CHECK-DAG: [[EUB_TRUE_1]]:
1247 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
1248 // CHECK: br label %[[EUB_END_1:.+]]
1249 // CHECK-DAG: [[EUB_FALSE_1]]:
1250 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
1251 // CHECK: br label %[[EUB_END_1]]
1252 // CHECK-DAG: [[EUB_END_1]]:
1253 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
1254 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
1255
1256 // Store LB in IV
1257 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1258 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
1259
1260 // CHECK: [[DIST_INNER_LOOP_END]]:
1261 // CHECK: br label %[[LOOP_EXIT:.+]]
1262
1263 // loop exit
1264 // CHECK: [[LOOP_EXIT]]:
1265 // CHECK-DAG: call void @__kmpc_for_static_fini(
1266 // CHECK: ret
1267
1268 // skip implementation of 'parallel for': using default scheduling and was tested above
1269 }
1270
1271 // schedule: static no chunk
1272 #pragma omp target
1273 #pragma omp teams
1274 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1275 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1276
1277 #pragma omp distribute parallel for simd schedule(static)
1278 for (int i = 0; i < n; ++i) {
1279 a[i] = b[i] + c[i];
1280 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1281 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1282 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1283 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1284 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1285
1286 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1287 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1288 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1289 // CHECK: ret
1290
1291 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1292 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1293
1294 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1295 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1296 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1297
1298 // initialize lb and ub to PrevLB and PrevUB
1299 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1300 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1301 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1302 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1303 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1304 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1305 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1306 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1307 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1308 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1309 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1310
1311 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1312 // In this case we use EUB
1313 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1314 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1315 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1316 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1317 // CHECK: [[PF_EUB_TRUE]]:
1318 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1319 // CHECK: br label %[[PF_EUB_END:.+]]
1320 // CHECK-DAG: [[PF_EUB_FALSE]]:
1321 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1322 // CHECK: br label %[[PF_EUB_END]]
1323 // CHECK-DAG: [[PF_EUB_END]]:
1324 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1325 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1326
1327 // initialize omp.iv
1328 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1329 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1330 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1331
1332 // check exit condition
1333 // CHECK: [[OMP_PF_JUMP_BACK]]:
1334 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1335 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1336 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1337 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1338
1339 // check that PrevLB and PrevUB are passed to the 'for'
1340 // CHECK: [[PF_BODY]]:
1341 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1342 // CHECK: br label {{.+}}
1343
1344 // check stride 1 for 'for' in 'distribute parallel for simd'
1345 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1346 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1347 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1348 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1349
1350 // CHECK-DAG: call void @__kmpc_for_static_fini(
1351 // CHECK: ret
1352 }
1353
1354 // schedule: static chunk
1355 #pragma omp target
1356 #pragma omp teams
1357 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
1358 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
1359
1360 #pragma omp distribute parallel for simd schedule(static, ch)
1361 for (int i = 0; i < n; ++i) {
1362 a[i] = b[i] + c[i];
1363 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
1364 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1365 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
1366 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1367 // CHECK: ret
1368
1369 // 'parallel for' implementation using outer and inner loops and PrevEUB
1370 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1371 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1372 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1373 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1374 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1375
1376 // initialize lb and ub to PrevLB and PrevUB
1377 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1378 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1379 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1380 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1381 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1382 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1383 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1384 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1385 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1386 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1387 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1388 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1389
1390 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
1391 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1392 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1393 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
1394 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1395 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
1396 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
1397 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1398 // CHECK: [[PF_EUB_TRUE]]:
1399 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1400 // CHECK: br label %[[PF_EUB_END:.+]]
1401 // CHECK-DAG: [[PF_EUB_FALSE]]:
1402 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1403 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
1404 // CHECK: br label %[[PF_EUB_END]]
1405 // CHECK-DAG: [[PF_EUB_END]]:
1406 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
1407 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
1408 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
1409 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
1410 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
1411
1412 // initialize omp.iv (IV = LB)
1413 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1414 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1415
1416 // outer loop: while (IV < UB) {
1417 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1418 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1419 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1420 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1421
1422 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1423 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
1424
1425 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
1426 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1427 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1428 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1429 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1430
1431 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1432 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1433 // skip body branch
1434 // CHECK: br{{.+}}
1435 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1436
1437 // IV = IV + 1 and inner loop latch
1438 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1439 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1440 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1441 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1442 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
1443
1444 // check NextLB and NextUB
1445 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1446 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1447
1448 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1449 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1450 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1451 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
1452 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
1453 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1454 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1455 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
1456 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
1457 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1458
1459 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1460 // CHECK-DAG: call void @__kmpc_for_static_fini(
1461 // CHECK: ret
1462 }
1463
1464 // schedule: dynamic no chunk
1465 #pragma omp target
1466 #pragma omp teams
1467 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
1468 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
1469
1470 #pragma omp distribute parallel for simd schedule(dynamic)
1471 for (int i = 0; i < n; ++i) {
1472 a[i] = b[i] + c[i];
1473 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
1474 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1475 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
1476 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1477 // CHECK: ret
1478
1479 // 'parallel for' implementation using outer and inner loops and PrevEUB
1480 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1481 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1482 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1483 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1484 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1485
1486 // initialize lb and ub to PrevLB and PrevUB
1487 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1488 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1489 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1490 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1491 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1492 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1493 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1494 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1495 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1496 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1497 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1498 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1499 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1500 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1501
1502 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1503 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1504 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1505 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1506
1507 // initialize omp.iv (IV = LB)
1508 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1509 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1510 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1511 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1512
1513 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1514 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1515 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1516 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1517 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1518
1519 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1520 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1521 // skip body branch
1522 // CHECK: br{{.+}}
1523 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1524
1525 // IV = IV + 1 and inner loop latch
1526 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1527 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1528 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1529 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1530 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1531
1532 // check NextLB and NextUB
1533 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1534 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1535
1536 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1537 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1538
1539 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1540 // CHECK: ret
1541 }
1542
1543 // schedule: dynamic chunk
1544 #pragma omp target
1545 #pragma omp teams
1546 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
1547 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
1548
1549 #pragma omp distribute parallel for simd schedule(dynamic, ch)
1550 for (int i = 0; i < n; ++i) {
1551 a[i] = b[i] + c[i];
1552 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
1553 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1554 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
1555 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1556 // CHECK: ret
1557
1558 // 'parallel for' implementation using outer and inner loops and PrevEUB
1559 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1560 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1561 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1562 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1563 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1564
1565 // initialize lb and ub to PrevLB and PrevUB
1566 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1567 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1568 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1569 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1570 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1571 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1572 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1573 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1574 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1575 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1576 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1577 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1578 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1579 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1580
1581 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1582 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1583 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1584 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1585
1586 // initialize omp.iv (IV = LB)
1587 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1588 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1589 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1590 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1591
1592 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1593 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1594 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1595 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1596 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1597
1598 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1599 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1600 // skip body branch
1601 // CHECK: br{{.+}}
1602 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1603
1604 // IV = IV + 1 and inner loop latch
1605 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1606 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1607 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1608 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1609 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1610
1611 // check NextLB and NextUB
1612 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1613 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1614
1615 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1616 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1617
1618 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1619 // CHECK: ret
1620 }
1621
1622 return tmain<int>();
1623 #endif
1624 }
1625
1626 // check code
1627 // CHECK: define{{.+}} [[TMAIN]]()
1628
1629 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1630 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
1631
1632 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1633 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
1634
1635 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1636 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
1637
1638 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1639 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
1640
1641 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1642 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
1643
1644 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1645 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
1646
1647 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1648 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
1649
1650 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
1651 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
1652
1653 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
1654 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1655 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1656 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1657 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1658
1659 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1660
1661 // check EUB for distribute
1662 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1663 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1664 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1665 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1666 // CHECK-DAG: [[EUB_TRUE]]:
1667 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1668 // CHECK: br label %[[EUB_END:.+]]
1669 // CHECK-DAG: [[EUB_FALSE]]:
1670 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1671 // CHECK: br label %[[EUB_END]]
1672 // CHECK-DAG: [[EUB_END]]:
1673 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1674 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1675
1676 // initialize omp.iv
1677 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1678 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1679 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1680
1681 // check exit condition
1682 // CHECK: [[OMP_JUMP_BACK]]:
1683 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1684 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1685 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1686 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1687
1688 // check that PrevLB and PrevUB are passed to the 'for'
1689 // CHECK: [[DIST_BODY]]:
1690 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1691 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1692 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1693 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1694 // check that distlb and distub are properly passed to fork_call
1695 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1696 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1697 // CHECK: br label %[[DIST_INC:.+]]
1698
1699 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1700 // CHECK: [[DIST_INC]]:
1701 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1702 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1703 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1704 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1705 // CHECK: br label %[[OMP_JUMP_BACK]]
1706
1707 // CHECK-DAG: call void @__kmpc_for_static_fini(
1708 // CHECK: ret
1709
1710 // implementation of 'parallel for'
1711 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1712
1713 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1714 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1715 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1716
1717 // initialize lb and ub to PrevLB and PrevUB
1718 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1719 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1720 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1721 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1722 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1723 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1724 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1725 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1726 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1727 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1728 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1729
1730 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1731 // In this case we use EUB
1732 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1733 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1734 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1735 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1736 // CHECK: [[PF_EUB_TRUE]]:
1737 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1738 // CHECK: br label %[[PF_EUB_END:.+]]
1739 // CHECK-DAG: [[PF_EUB_FALSE]]:
1740 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1741 // CHECK: br label %[[PF_EUB_END]]
1742 // CHECK-DAG: [[PF_EUB_END]]:
1743 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1744 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1745
1746 // initialize omp.iv
1747 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1748 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1749 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1750
1751 // check exit condition
1752 // CHECK: [[OMP_PF_JUMP_BACK]]:
1753 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1754 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1755 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1756 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1757
1758 // check that PrevLB and PrevUB are passed to the 'for'
1759 // CHECK: [[PF_BODY]]:
1760 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1761 // CHECK: br label {{.+}}
1762
1763 // check stride 1 for 'for' in 'distribute parallel for simd'
1764 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1765 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1766 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1767 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1768
1769 // CHECK-DAG: call void @__kmpc_for_static_fini(
1770 // CHECK: ret
1771
1772 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1773 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1774
1775 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1776 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1777 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1778 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1779 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1780
1781 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1782
1783 // check EUB for distribute
1784 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1785 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1786 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1787 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1788 // CHECK-DAG: [[EUB_TRUE]]:
1789 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1790 // CHECK: br label %[[EUB_END:.+]]
1791 // CHECK-DAG: [[EUB_FALSE]]:
1792 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1793 // CHECK: br label %[[EUB_END]]
1794 // CHECK-DAG: [[EUB_END]]:
1795 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1796 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1797
1798 // initialize omp.iv
1799 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1800 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1801 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1802
1803 // check exit condition
1804 // CHECK: [[OMP_JUMP_BACK]]:
1805 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1806 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1807 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1808 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1809
1810 // check that PrevLB and PrevUB are passed to the 'for'
1811 // CHECK: [[DIST_BODY]]:
1812 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1813 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1814 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1815 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1816 // check that distlb and distub are properly passed to fork_call
1817 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1818 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1819 // CHECK: br label %[[DIST_INC:.+]]
1820
1821 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1822 // CHECK: [[DIST_INC]]:
1823 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1824 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1825 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1826 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1827 // CHECK: br label %[[OMP_JUMP_BACK]]
1828
1829 // CHECK-DAG: call void @__kmpc_for_static_fini(
1830 // CHECK: ret
1831
1832 // implementation of 'parallel for'
1833 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1834
1835 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1836 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1837 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1838
1839 // initialize lb and ub to PrevLB and PrevUB
1840 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1841 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1842 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1843 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1844 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1845 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1846 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1847 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1848 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1849 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1850 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1851
1852 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1853 // In this case we use EUB
1854 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1855 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1856 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1857 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1858 // CHECK: [[PF_EUB_TRUE]]:
1859 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1860 // CHECK: br label %[[PF_EUB_END:.+]]
1861 // CHECK-DAG: [[PF_EUB_FALSE]]:
1862 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1863 // CHECK: br label %[[PF_EUB_END]]
1864 // CHECK-DAG: [[PF_EUB_END]]:
1865 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1866 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1867
1868 // initialize omp.iv
1869 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1870 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1871 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1872
1873 // check exit condition
1874 // CHECK: [[OMP_PF_JUMP_BACK]]:
1875 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1876 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1877 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1878 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1879
1880 // check that PrevLB and PrevUB are passed to the 'for'
1881 // CHECK: [[PF_BODY]]:
1882 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1883 // CHECK: br label {{.+}}
1884
1885 // check stride 1 for 'for' in 'distribute parallel for simd'
1886 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1887 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1888 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1889 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1890
1891 // CHECK-DAG: call void @__kmpc_for_static_fini(
1892 // CHECK: ret
1893
1894 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1895 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1896
1897 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1898 // CHECK: alloca
1899 // CHECK: alloca
1900 // CHECK: alloca
1901 // CHECK: alloca
1902 // CHECK: alloca
1903 // CHECK: alloca
1904 // CHECK: alloca
1905 // CHECK: [[OMP_IV:%.+]] = alloca
1906 // CHECK: alloca
1907 // CHECK: alloca
1908 // CHECK: alloca
1909 // CHECK: alloca
1910 // CHECK: [[OMP_LB:%.+]] = alloca
1911 // CHECK: [[OMP_UB:%.+]] = alloca
1912 // CHECK: [[OMP_ST:%.+]] = alloca
1913
1914 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1915 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1916
1917 // check EUB for distribute
1918 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1919 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
1920 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1921 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1922 // CHECK-DAG: [[EUB_TRUE]]:
1923 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1924 // CHECK: br label %[[EUB_END:.+]]
1925 // CHECK-DAG: [[EUB_FALSE]]:
1926 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1927 // CHECK: br label %[[EUB_END]]
1928 // CHECK-DAG: [[EUB_END]]:
1929 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1930 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1931
1932 // initialize omp.iv
1933 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1934 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1935
1936 // check exit condition
1937 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1938 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
1939 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
1940 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
1941 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1942
1943 // check that PrevLB and PrevUB are passed to the 'for'
1944 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1945 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1946 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1947 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1948 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1949 // check that distlb and distub are properly passed to fork_call
1950 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1951 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1952 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1953
1954 // check DistInc
1955 // CHECK: [[DIST_INNER_LOOP_INC]]:
1956 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1957 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1958 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1959 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1960 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1961 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1962 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1963 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1964 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1965 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1966 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1967 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1968
1969 // Update UB
1970 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1971 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
1972 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
1973 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
1974 // CHECK-DAG: [[EUB_TRUE_1]]:
1975 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
1976 // CHECK: br label %[[EUB_END_1:.+]]
1977 // CHECK-DAG: [[EUB_FALSE_1]]:
1978 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
1979 // CHECK: br label %[[EUB_END_1]]
1980 // CHECK-DAG: [[EUB_END_1]]:
1981 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
1982 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
1983
1984 // Store LB in IV
1985 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1986 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
1987
1988 // CHECK: [[DIST_INNER_LOOP_END]]:
1989 // CHECK: br label %[[LOOP_EXIT:.+]]
1990
1991 // loop exit
1992 // CHECK: [[LOOP_EXIT]]:
1993 // CHECK-DAG: call void @__kmpc_for_static_fini(
1994 // CHECK: ret
1995
1996 // skip implementation of 'parallel for': using default scheduling and was tested above
1997
1998 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1999 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
2000
2001 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
2002 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
2003 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
2004 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
2005 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
2006
2007 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2008 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
2009 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2010 // CHECK: ret
2011
2012 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
2013 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
2014
2015 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2016 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2017 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2018
2019 // initialize lb and ub to PrevLB and PrevUB
2020 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2021 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2022 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2023 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2024 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2025 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2026 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2027 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2028 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2029 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2030 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
2031
2032 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
2033 // In this case we use EUB
2034 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
2035 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
2036 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
2037 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
2038 // CHECK: [[PF_EUB_TRUE]]:
2039 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
2040 // CHECK: br label %[[PF_EUB_END:.+]]
2041 // CHECK-DAG: [[PF_EUB_FALSE]]:
2042 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2043 // CHECK: br label %[[PF_EUB_END]]
2044 // CHECK-DAG: [[PF_EUB_END]]:
2045 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
2046 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
2047
2048 // initialize omp.iv
2049 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2050 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2051 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
2052
2053 // check exit condition
2054 // CHECK: [[OMP_PF_JUMP_BACK]]:
2055 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
2056 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
2057 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2058 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
2059
2060 // check that PrevLB and PrevUB are passed to the 'for'
2061 // CHECK: [[PF_BODY]]:
2062 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2063 // CHECK: br label {{.+}}
2064
2065 // check stride 1 for 'for' in 'distribute parallel for simd'
2066 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
2067 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
2068 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
2069 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
2070
2071 // CHECK-DAG: call void @__kmpc_for_static_fini(
2072 // CHECK: ret
2073
2074 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
2075 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
2076
2077 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
2078 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2079 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
2080 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2081 // CHECK: ret
2082
2083 // 'parallel for' implementation using outer and inner loops and PrevEUB
2084 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2085 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2086 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2087 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2088 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2089
2090 // initialize lb and ub to PrevLB and PrevUB
2091 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2092 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2093 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2094 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2095 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2096 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2097 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2098 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2099 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2100 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2101 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
2102 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2103
2104 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
2105 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2106 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
2107 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
2108 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2109 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
2110 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
2111 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
2112 // CHECK: [[PF_EUB_TRUE]]:
2113 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2114 // CHECK: br label %[[PF_EUB_END:.+]]
2115 // CHECK-DAG: [[PF_EUB_FALSE]]:
2116 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2117 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
2118 // CHECK: br label %[[PF_EUB_END]]
2119 // CHECK-DAG: [[PF_EUB_END]]:
2120 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
2121 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
2122 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
2123 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
2124 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
2125
2126 // initialize omp.iv (IV = LB)
2127 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2128 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2129
2130 // outer loop: while (IV < UB) {
2131 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2132 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2133 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2134 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2135
2136 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2137 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
2138
2139 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
2140 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2141 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2142 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2143 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2144
2145 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2146 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2147 // skip body branch
2148 // CHECK: br{{.+}}
2149 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2150
2151 // IV = IV + 1 and inner loop latch
2152 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2153 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2154 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2155 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2156 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
2157
2158 // check NextLB and NextUB
2159 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2160 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2161
2162 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2163 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2164 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2165 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
2166 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
2167 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2168 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2169 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
2170 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
2171 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2172
2173 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2174 // CHECK-DAG: call void @__kmpc_for_static_fini(
2175 // CHECK: ret
2176
2177 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
2178 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
2179
2180 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
2181 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2182 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
2183 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2184 // CHECK: ret
2185
2186 // 'parallel for' implementation using outer and inner loops and PrevEUB
2187 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
2188 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2189 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2190 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2191 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2192
2193 // initialize lb and ub to PrevLB and PrevUB
2194 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2195 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2196 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2197 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2198 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2199 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2200 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2201 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2202 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2203 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2204 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2205 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2206 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2207 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2208
2209 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2210 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2211 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2212 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2213
2214 // initialize omp.iv (IV = LB)
2215 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2216 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2217 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2218 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2219
2220 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2221 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2222 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2223 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2224 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2225
2226 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2227 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2228 // skip body branch
2229 // CHECK: br{{.+}}
2230 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2231
2232 // IV = IV + 1 and inner loop latch
2233 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2234 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2235 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2236 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2237 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2238
2239 // check NextLB and NextUB
2240 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2241 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2242
2243 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2244 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2245
2246 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2247 // CHECK: ret
2248
2249 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
2250 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
2251
2252 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
2253 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2254 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
2255 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2256 // CHECK: ret
2257
2258 // 'parallel for' implementation using outer and inner loops and PrevEUB
2259 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2260 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2261 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2262 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2263 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2264
2265 // initialize lb and ub to PrevLB and PrevUB
2266 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2267 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2268 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2269 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2270 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2271 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2272 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2273 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2274 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2275 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2276 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2277 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2278 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2279 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2280
2281 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2282 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2283 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2284 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2285
2286 // initialize omp.iv (IV = LB)
2287 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2288 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2289 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2290 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2291
2292 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2293 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2294 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2295 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2296 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2297
2298 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2299 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2300 // skip body branch
2301 // CHECK: br{{.+}}
2302 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2303
2304 // IV = IV + 1 and inner loop latch
2305 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2306 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2307 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2308 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2309 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2310
2311 // check NextLB and NextUB
2312 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2313 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2314
2315 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2316 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2317
2318 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2319 // CHECK: ret
2320
2321 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
2322 #endif
2323