1 // RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
2 // RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
3 // RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
5 //
6 // expected-no-diagnostics
7 #ifndef HEADER
8 #define HEADER
9
10 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
11 // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
without_schedule_clause(float * a,float * b,float * c,float * d)12 void without_schedule_clause(float *a, float *b, float *c, float *d) {
13 #pragma omp parallel for
14 // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
15 // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
16 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
17 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
18 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
19 // CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
20 // UB = min(UB, GlobalUB)
21 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
22 // CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
23 // CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
24 // CHECK: [[UBRESULT:%.+]] = phi i32 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
25 // CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
26 // CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
27 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
28 // Loop header
29 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
30 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
31 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
32 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
33 for (int i = 33; i < 32000000; i += 7) {
34 // CHECK: [[LOOP1_BODY]]
35 // Start of body: calculate i from IV:
36 // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]
37 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 7
38 // CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 33, [[CALC_I_1]]
39 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
40 // ... loop body ...
41 // End of body: store into a[i]:
42 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
43 a[i] = b[i] * c[i] * d[i];
44 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
45 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
46 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
47 // CHECK-NEXT: br label %{{.+}}
48 }
49 // CHECK: [[LOOP1_END]]
50 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
51 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
52 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
53 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
54 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
55 // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
56 // CHECK: ret void
57 }
58
59 // CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
static_not_chunked(float * a,float * b,float * c,float * d)60 void static_not_chunked(float *a, float *b, float *c, float *d) {
61 #pragma omp parallel for schedule(static)
62 // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
63 // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
64 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
65 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
66 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
67 // CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
68 // UB = min(UB, GlobalUB)
69 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
70 // CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
71 // CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
72 // CHECK: [[UBRESULT:%.+]] = phi i32 [ 4571423, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
73 // CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
74 // CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
75 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
76 // Loop header
77 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
78 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
79 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
80 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
81 for (int i = 32000000; i > 33; i += -7) {
82 // CHECK: [[LOOP1_BODY]]
83 // Start of body: calculate i from IV:
84 // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]
85 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 7
86 // CHECK-NEXT: [[CALC_I_2:%.+]] = sub nsw i32 32000000, [[CALC_I_1]]
87 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
88 // ... loop body ...
89 // End of body: store into a[i]:
90 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
91 a[i] = b[i] * c[i] * d[i];
92 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
93 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
94 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
95 // CHECK-NEXT: br label %{{.+}}
96 }
97 // CHECK: [[LOOP1_END]]
98 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
99 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
100 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
101 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
102 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
103 // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
104 // CHECK: ret void
105 }
106
107 // CHECK-LABEL: define {{.*void}} @{{.*}}static_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
static_chunked(float * a,float * b,float * c,float * d)108 void static_chunked(float *a, float *b, float *c, float *d) {
109 #pragma omp parallel for schedule(static, 5)
110 // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
111 // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
112 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
113 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
114 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
115 // CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
116 // UB = min(UB, GlobalUB)
117 // CHECK: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
118 // CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288
119 // CHECK-NEXT: br i1 [[UBCMP]], label [[UB_TRUE:%[^,]+]], label [[UB_FALSE:%[^,]+]]
120 // CHECK: [[UBRESULT:%.+]] = phi i32 [ 16908288, [[UB_TRUE]] ], [ [[UBVAL:%[^,]+]], [[UB_FALSE]] ]
121 // CHECK-NEXT: store i32 [[UBRESULT]], i32* [[OMP_UB]]
122 // CHECK-NEXT: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
123 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
124
125 // Outer loop header
126 // CHECK: [[O_IV:%.+]] = load i32, i32* [[OMP_IV]]
127 // CHECK-NEXT: [[O_UB:%.+]] = load i32, i32* [[OMP_UB]]
128 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ule i32 [[O_IV]], [[O_UB]]
129 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
130
131 // Loop header
132 // CHECK: [[O_LOOP1_BODY]]
133 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
134 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
135 // CHECK-NEXT: [[CMP:%.+]] = icmp ule i32 [[IV]], [[UB]]
136 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
137 for (unsigned i = 131071; i <= 2147483647; i += 127) {
138 // CHECK: [[LOOP1_BODY]]
139 // Start of body: calculate i from IV:
140 // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]
141 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul i32 [[IV1_1]], 127
142 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 131071, [[CALC_I_1]]
143 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
144 // ... loop body ...
145 // End of body: store into a[i]:
146 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
147 a[i] = b[i] * c[i] * d[i];
148 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
149 // CHECK-NEXT: [[ADD1_2:%.+]] = add i32 [[IV1_2]], 1
150 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
151 // CHECK-NEXT: br label %{{.+}}
152 }
153 // CHECK: [[LOOP1_END]]
154 // Update the counters, adding stride
155 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
156 // CHECK-NEXT: [[ST:%.+]] = load i32, i32* [[OMP_ST]]
157 // CHECK-NEXT: [[ADD_LB:%.+]] = add i32 [[LB]], [[ST]]
158 // CHECK-NEXT: store i32 [[ADD_LB]], i32* [[OMP_LB]]
159 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
160 // CHECK-NEXT: [[ST:%.+]] = load i32, i32* [[OMP_ST]]
161 // CHECK-NEXT: [[ADD_UB:%.+]] = add i32 [[UB]], [[ST]]
162 // CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]]
163
164 // CHECK: [[O_LOOP1_END]]
165 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
166 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
167 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
168 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
169 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
170 // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
171 // CHECK: ret void
172 }
173
174 // CHECK-LABEL: define {{.*void}} @{{.*}}dynamic1{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
dynamic1(float * a,float * b,float * c,float * d)175 void dynamic1(float *a, float *b, float *c, float *d) {
176 #pragma omp parallel for schedule(dynamic)
177 // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
178 // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
179 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
180 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
181 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
182 // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1)
183 //
184 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
185 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
186 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
187 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
188 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
189
190 // Loop header
191 // CHECK: [[O_LOOP1_BODY]]
192 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
193 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
194 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
195
196 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
197 // CHECK-NEXT: [[CMP:%.+]] = icmp ule i64 [[IV]], [[UB]]
198 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
199 for (unsigned long long i = 131071; i < 2147483647; i += 127) {
200 // CHECK: [[LOOP1_BODY]]
201 // Start of body: calculate i from IV:
202 // CHECK: [[IV1_1:%.+]] = load i64, i64* [[OMP_IV]]
203 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 127
204 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 131071, [[CALC_I_1]]
205 // CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
206 // ... loop body ...
207 // End of body: store into a[i]:
208 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
209 a[i] = b[i] * c[i] * d[i];
210 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
211 // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
212 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
213 // CHECK-NEXT: br label %{{.+}}
214 }
215 // CHECK: [[LOOP1_END]]
216 // CHECK: [[O_LOOP1_END]]
217 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
218 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
219 // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
220 // CHECK: ret void
221 }
222
223 // CHECK-LABEL: define {{.*void}} @{{.*}}guided7{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
guided7(float * a,float * b,float * c,float * d)224 void guided7(float *a, float *b, float *c, float *d) {
225 #pragma omp parallel for schedule(guided, 7)
226 // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
227 // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
228 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
229 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
230 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
231 // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 36, i64 0, i64 16908287, i64 1, i64 7)
232 //
233 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
234 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
235 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
236 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
237 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
238
239 // Loop header
240 // CHECK: [[O_LOOP1_BODY]]
241 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
242 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
243 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
244
245 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
246 // CHECK-NEXT: [[CMP:%.+]] = icmp ule i64 [[IV]], [[UB]]
247 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
248 for (unsigned long long i = 131071; i < 2147483647; i += 127) {
249 // CHECK: [[LOOP1_BODY]]
250 // Start of body: calculate i from IV:
251 // CHECK: [[IV1_1:%.+]] = load i64, i64* [[OMP_IV]]
252 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 127
253 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 131071, [[CALC_I_1]]
254 // CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
255 // ... loop body ...
256 // End of body: store into a[i]:
257 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
258 a[i] = b[i] * c[i] * d[i];
259 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
260 // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
261 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
262 // CHECK-NEXT: br label %{{.+}}
263 }
264 // CHECK: [[LOOP1_END]]
265 // CHECK: [[O_LOOP1_END]]
266 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
267 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
268 // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
269 // CHECK: ret void
270 }
271
272 // CHECK-LABEL: define {{.*void}} @{{.*}}test_auto{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
test_auto(float * a,float * b,float * c,float * d)273 void test_auto(float *a, float *b, float *c, float *d) {
274 unsigned int x = 0;
275 unsigned int y = 0;
276 #pragma omp parallel for schedule(auto) collapse(2)
277 // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
278 // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
279 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
280 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
281 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
282 // CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 38, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1)
283 //
284 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
285 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
286 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
287 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
288 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
289
290 // Loop header
291 // CHECK: [[O_LOOP1_BODY]]
292 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
293 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
294 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
295
296 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
297 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB]]
298 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
299 // FIXME: When the iteration count of some nested loop is not a known constant,
300 // we should pre-calculate it, like we do for the total number of iterations!
301 for (char i = static_cast<char>(y); i <= '9'; ++i)
302 for (x = 11; x > 0; --x) {
303 // CHECK: [[LOOP1_BODY]]
304 // Start of body: indices are calculated from IV:
305 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
306 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
307 // ... loop body ...
308 // End of body: store into a[i]:
309 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
310 a[i] = b[i] * c[i] * d[i];
311 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
312 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1
313 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
314 // CHECK-NEXT: br label %{{.+}}
315 }
316 // CHECK: [[LOOP1_END]]
317 // CHECK: [[O_LOOP1_END]]
318 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
319 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
320 // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
321 // CHECK: ret void
322 }
323
324 // CHECK-LABEL: define {{.*void}} @{{.*}}runtime{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
runtime(float * a,float * b,float * c,float * d)325 void runtime(float *a, float *b, float *c, float *d) {
326 int x = 0;
327 #pragma omp parallel for collapse(2) schedule(runtime)
328 // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
329 // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
330 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
331 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
332 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
333 // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1)
334 //
335 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
336 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
337 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
338 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
339 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
340
341 // Loop header
342 // CHECK: [[O_LOOP1_BODY]]
343 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
344 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
345 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
346
347 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
348 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
349 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
350 for (unsigned char i = '0' ; i <= '9'; ++i)
351 for (x = -10; x < 10; ++x) {
352 // CHECK: [[LOOP1_BODY]]
353 // Start of body: indices are calculated from IV:
354 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
355 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
356 // ... loop body ...
357 // End of body: store into a[i]:
358 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
359 a[i] = b[i] * c[i] * d[i];
360 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
361 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
362 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
363 // CHECK-NEXT: br label %{{.+}}
364 }
365 // CHECK: [[LOOP1_END]]
366 // CHECK: [[O_LOOP1_END]]
367 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
368 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
369 // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
370 // CHECK: ret void
371 }
372
373 // TERM_DEBUG-LABEL: foo
foo()374 int foo() {return 0;};
375
376 // TERM_DEBUG-LABEL: parallel_for
parallel_for(float * a)377 void parallel_for(float *a) {
378 #pragma omp parallel for schedule(static, 5)
379 // TERM_DEBUG-NOT: __kmpc_global_thread_num
380 // TERM_DEBUG: call void @__kmpc_for_static_init_4u({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
381 // TERM_DEBUG: invoke i32 {{.*}}foo{{.*}}()
382 // TERM_DEBUG: unwind label %[[TERM_LPAD:.+]],
383 // TERM_DEBUG-NOT: __kmpc_global_thread_num
384 // TERM_DEBUG: call void @__kmpc_for_static_fini({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
385 // TERM_DEBUG: call {{.+}} @__kmpc_cancel_barrier({{.+}}), !dbg [[DBG_LOC_CANCEL:![0-9]+]]
386 // TERM_DEBUG: [[TERM_LPAD]]
387 // TERM_DEBUG: call void @__clang_call_terminate
388 // TERM_DEBUG: unreachable
389 for (unsigned i = 131071; i <= 2147483647; i += 127)
390 a[i] += foo();
391 }
392 // Check source line corresponds to "#pragma omp parallel for schedule(static, 5)" above:
393 // TERM_DEBUG-DAG: [[DBG_LOC_START]] = !MDLocation(line: [[@LINE-4]],
394 // TERM_DEBUG-DAG: [[DBG_LOC_END]] = !MDLocation(line: [[@LINE-16]],
395 // TERM_DEBUG-DAG: [[DBG_LOC_CANCEL]] = !MDLocation(line: [[@LINE-17]],
396
397 #endif // HEADER
398
399