1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 #include "util/u_memory.h"
30 #include "lp_bld_const.h"
31 #include "lp_bld_type.h"
32 #include "lp_bld_init.h"
33 #include "lp_bld_flow.h"
34 #include "lp_bld_ir_common.h"
35 #include "lp_bld_logic.h"
36
37 /*
38 * Return the context for the current function.
39 * (always 'main', if shader doesn't do any function calls)
40 */
41 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)42 func_ctx(struct lp_exec_mask *mask)
43 {
44 assert(mask->function_stack_size > 0);
45 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
46 return &mask->function_stack[mask->function_stack_size - 1];
47 }
48
49 /*
50 * Returns true if we're in a loop.
51 * It's global, meaning that it returns true even if there's
52 * no loop inside the current function, but we were inside
53 * a loop inside another function, from which this one was called.
54 */
55 static inline bool
mask_has_loop(struct lp_exec_mask * mask)56 mask_has_loop(struct lp_exec_mask *mask)
57 {
58 int i;
59 for (i = mask->function_stack_size - 1; i >= 0; --i) {
60 const struct function_ctx *ctx = &mask->function_stack[i];
61 if (ctx->loop_stack_size > 0)
62 return true;
63 }
64 return false;
65 }
66
67 /*
68 * Returns true if we're inside a switch statement.
69 * It's global, meaning that it returns true even if there's
70 * no switch in the current function, but we were inside
71 * a switch inside another function, from which this one was called.
72 */
73 static inline bool
mask_has_switch(struct lp_exec_mask * mask)74 mask_has_switch(struct lp_exec_mask *mask)
75 {
76 int i;
77 for (i = mask->function_stack_size - 1; i >= 0; --i) {
78 const struct function_ctx *ctx = &mask->function_stack[i];
79 if (ctx->switch_stack_size > 0)
80 return true;
81 }
82 return false;
83 }
84
85 /*
86 * Returns true if we're inside a conditional.
87 * It's global, meaning that it returns true even if there's
88 * no conditional in the current function, but we were inside
89 * a conditional inside another function, from which this one was called.
90 */
91 static inline bool
mask_has_cond(struct lp_exec_mask * mask)92 mask_has_cond(struct lp_exec_mask *mask)
93 {
94 int i;
95 for (i = mask->function_stack_size - 1; i >= 0; --i) {
96 const struct function_ctx *ctx = &mask->function_stack[i];
97 if (ctx->cond_stack_size > 0)
98 return true;
99 }
100 return false;
101 }
102
lp_exec_mask_update(struct lp_exec_mask * mask)103 void lp_exec_mask_update(struct lp_exec_mask *mask)
104 {
105 LLVMBuilderRef builder = mask->bld->gallivm->builder;
106 bool has_loop_mask = mask_has_loop(mask);
107 bool has_cond_mask = mask_has_cond(mask);
108 bool has_switch_mask = mask_has_switch(mask);
109 bool has_ret_mask = mask->function_stack_size > 1 ||
110 mask->ret_in_main;
111
112 if (has_loop_mask) {
113 /*for loops we need to update the entire mask at runtime */
114 LLVMValueRef tmp;
115 assert(mask->break_mask);
116 tmp = LLVMBuildAnd(builder,
117 LLVMBuildLoad2(builder, mask->int_vec_type, mask->cont_mask, ""),
118 LLVMBuildLoad2(builder, mask->int_vec_type, mask->break_mask, ""),
119 "maskcb");
120 mask->exec_mask = LLVMBuildAnd(builder,
121 mask->cond_mask,
122 tmp,
123 "maskfull");
124 } else
125 mask->exec_mask = mask->cond_mask;
126
127 if (has_switch_mask) {
128 mask->exec_mask = LLVMBuildAnd(builder,
129 mask->exec_mask,
130 mask->switch_mask,
131 "switchmask");
132 }
133
134 if (has_ret_mask) {
135 mask->exec_mask = LLVMBuildAnd(builder,
136 mask->exec_mask,
137 mask->ret_mask,
138 "callmask");
139 }
140
141 mask->has_mask = (has_cond_mask ||
142 has_loop_mask ||
143 has_switch_mask ||
144 has_ret_mask);
145 }
146
147 /*
148 * Initialize a function context at the specified index.
149 */
150 void
lp_exec_mask_function_init(struct lp_exec_mask * mask,int function_idx)151 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
152 {
153 struct function_ctx *ctx = &mask->function_stack[function_idx];
154
155 ctx->cond_stack_size = 0;
156 ctx->loop_stack_size = 0;
157 ctx->bgnloop_stack_size = 0;
158 ctx->switch_stack_size = 0;
159
160 if (function_idx == 0) {
161 ctx->ret_mask = mask->ret_mask;
162 }
163 }
164
lp_exec_mask_init(struct lp_exec_mask * mask,struct lp_build_context * bld)165 void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
166 {
167 mask->bld = bld;
168 mask->has_mask = false;
169 mask->ret_in_main = false;
170 /* For the main function */
171 mask->function_stack_size = 1;
172
173 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
174 mask->exec_mask = mask->ret_mask =
175 mask->cond_mask = mask->switch_mask =
176 LLVMConstAllOnes(mask->int_vec_type);
177
178 mask->break_mask = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "break_mask");
179 LLVMBuildStore(bld->gallivm->builder, LLVMConstAllOnes(mask->int_vec_type), mask->break_mask);
180
181 mask->cont_mask = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "cont_mask");
182 LLVMBuildStore(bld->gallivm->builder, LLVMConstAllOnes(mask->int_vec_type), mask->cont_mask);
183
184 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
185 sizeof(mask->function_stack[0]));
186 lp_exec_mask_function_init(mask, 0);
187 }
188
189 void
lp_exec_mask_fini(struct lp_exec_mask * mask)190 lp_exec_mask_fini(struct lp_exec_mask *mask)
191 {
192 FREE(mask->function_stack);
193 }
194
195 /* stores val into an address pointed to by dst_ptr.
196 * mask->exec_mask is used to figure out which bits of val
197 * should be stored into the address
198 * (0 means don't store this bit, 1 means do store).
199 */
lp_exec_mask_store(struct lp_exec_mask * mask,struct lp_build_context * bld_store,LLVMValueRef val,LLVMValueRef dst_ptr)200 void lp_exec_mask_store(struct lp_exec_mask *mask,
201 struct lp_build_context *bld_store,
202 LLVMValueRef val,
203 LLVMValueRef dst_ptr)
204 {
205 LLVMBuilderRef builder = mask->bld->gallivm->builder;
206 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
207
208 assert(lp_check_value(bld_store->type, val));
209 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
210 assert(LLVM_VERSION_MAJOR >= 15
211 || (LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val)
212 || LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind));
213
214 if (exec_mask) {
215 LLVMValueRef res, dst;
216
217 dst = LLVMBuildLoad2(builder, LLVMTypeOf(val), dst_ptr, "");
218 if (bld_store->type.width < 32)
219 exec_mask = LLVMBuildTrunc(builder, exec_mask, bld_store->vec_type, "");
220 res = lp_build_select(bld_store, exec_mask, val, dst);
221 LLVMBuildStore(builder, res, dst_ptr);
222 } else
223 LLVMBuildStore(builder, val, dst_ptr);
224 }
225
lp_exec_bgnloop_post_phi(struct lp_exec_mask * mask)226 void lp_exec_bgnloop_post_phi(struct lp_exec_mask *mask)
227 {
228 LLVMBuilderRef builder = mask->bld->gallivm->builder;
229 struct function_ctx *ctx = func_ctx(mask);
230
231 if (ctx->loop_stack_size != ctx->bgnloop_stack_size) {
232 LLVMBuildStore(builder, LLVMBuildLoad2(builder, mask->int_vec_type, ctx->break_var, ""), mask->break_mask);
233 lp_exec_mask_update(mask);
234 ctx->bgnloop_stack_size = ctx->loop_stack_size;
235 }
236 }
237
lp_exec_bgnloop(struct lp_exec_mask * mask,bool load)238 void lp_exec_bgnloop(struct lp_exec_mask *mask, bool load)
239 {
240 LLVMBuilderRef builder = mask->bld->gallivm->builder;
241 struct function_ctx *ctx = func_ctx(mask);
242
243 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
244 ++ctx->loop_stack_size;
245 return;
246 }
247
248 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
249 ctx->break_type;
250 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
251
252 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
253 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
254 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
255 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
256 ++ctx->loop_stack_size;
257
258 LLVMValueRef cont_mask = LLVMBuildLoad2(builder, mask->int_vec_type, mask->cont_mask, "");
259 LLVMValueRef break_mask = LLVMBuildLoad2(builder, mask->int_vec_type, mask->break_mask, "");
260
261 mask->break_mask = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
262 LLVMBuildStore(builder, break_mask, mask->break_mask);
263
264 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
265 LLVMBuildStore(builder, break_mask, ctx->break_var);
266
267 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
268
269 LLVMBuildBr(builder, ctx->loop_block);
270 LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
271
272 mask->cont_mask = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
273 LLVMBuildStore(builder, cont_mask, mask->cont_mask);
274
275 if (load) {
276 lp_exec_bgnloop_post_phi(mask);
277 }
278 }
279
lp_exec_endloop(struct gallivm_state * gallivm,struct lp_exec_mask * exec_mask,struct lp_build_mask_context * mask)280 void lp_exec_endloop(struct gallivm_state *gallivm,
281 struct lp_exec_mask *exec_mask,
282 struct lp_build_mask_context *mask)
283 {
284 LLVMBuilderRef builder = exec_mask->bld->gallivm->builder;
285 struct function_ctx *ctx = func_ctx(exec_mask);
286 LLVMBasicBlockRef endloop;
287 LLVMTypeRef mask_type = LLVMIntTypeInContext(exec_mask->bld->gallivm->context, exec_mask->bld->type.length);
288 LLVMValueRef icond;
289
290 assert(exec_mask->break_mask);
291
292 assert(ctx->loop_stack_size);
293 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
294 --ctx->loop_stack_size;
295 --ctx->bgnloop_stack_size;
296 return;
297 }
298
299 /*
300 * Restore the cont_mask, but don't pop
301 */
302 exec_mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
303 lp_exec_mask_update(exec_mask);
304
305 /*
306 * Unlike the continue mask, the break_mask must be preserved across loop
307 * iterations
308 */
309 LLVMBuildStore(builder, LLVMBuildLoad2(builder, exec_mask->int_vec_type, exec_mask->break_mask, ""), ctx->break_var);
310
311 LLVMValueRef end_mask = exec_mask->exec_mask;
312 if (mask)
313 end_mask = LLVMBuildAnd(builder, exec_mask->exec_mask, lp_build_mask_value(mask), "");
314 end_mask = LLVMBuildICmp(builder, LLVMIntNE, end_mask, lp_build_zero(gallivm, exec_mask->bld->type), "");
315 end_mask = LLVMBuildBitCast(builder, end_mask, mask_type, "");
316
317 /* i1cond = (end_mask != 0) */
318 icond = LLVMBuildICmp(
319 builder,
320 LLVMIntNE,
321 end_mask,
322 LLVMConstNull(mask_type), "i1cond");
323
324 endloop = lp_build_insert_new_block(exec_mask->bld->gallivm, "endloop");
325
326 LLVMBuildCondBr(builder,
327 icond, ctx->loop_block, endloop);
328
329 LLVMPositionBuilderAtEnd(builder, endloop);
330
331 assert(ctx->loop_stack_size);
332 --ctx->loop_stack_size;
333 --ctx->bgnloop_stack_size;
334 exec_mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
335 exec_mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
336 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
337 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
338 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
339 ctx->switch_stack_size];
340
341 lp_exec_mask_update(exec_mask);
342 }
343
lp_exec_mask_cond_push(struct lp_exec_mask * mask,LLVMValueRef val)344 void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
345 LLVMValueRef val)
346 {
347 LLVMBuilderRef builder = mask->bld->gallivm->builder;
348 struct function_ctx *ctx = func_ctx(mask);
349
350 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
351 ctx->cond_stack_size++;
352 return;
353 }
354 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
355 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
356 }
357 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
358 assert(LLVMTypeOf(val) == mask->int_vec_type);
359 mask->cond_mask = LLVMBuildAnd(builder,
360 mask->cond_mask,
361 val,
362 "");
363 lp_exec_mask_update(mask);
364 }
365
lp_exec_mask_cond_invert(struct lp_exec_mask * mask)366 void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
367 {
368 LLVMBuilderRef builder = mask->bld->gallivm->builder;
369 struct function_ctx *ctx = func_ctx(mask);
370 LLVMValueRef prev_mask;
371 LLVMValueRef inv_mask;
372
373 assert(ctx->cond_stack_size);
374 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
375 return;
376 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
377 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
378 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
379 }
380
381 inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
382
383 mask->cond_mask = LLVMBuildAnd(builder,
384 inv_mask,
385 prev_mask, "");
386 lp_exec_mask_update(mask);
387 }
388
lp_exec_mask_cond_pop(struct lp_exec_mask * mask)389 void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
390 {
391 struct function_ctx *ctx = func_ctx(mask);
392 assert(ctx->cond_stack_size);
393 --ctx->cond_stack_size;
394 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
395 return;
396 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
397 lp_exec_mask_update(mask);
398 }
399
400
lp_exec_continue(struct lp_exec_mask * mask)401 void lp_exec_continue(struct lp_exec_mask *mask)
402 {
403 LLVMBuilderRef builder = mask->bld->gallivm->builder;
404 LLVMValueRef exec_mask = LLVMBuildNot(builder,
405 mask->exec_mask,
406 "");
407
408 LLVMBuildStore(builder, LLVMBuildAnd(builder,
409 LLVMBuildLoad2(builder, mask->int_vec_type, mask->cont_mask, ""),
410 exec_mask, ""), mask->cont_mask);
411
412 lp_exec_mask_update(mask);
413 }
414
lp_exec_break(struct lp_exec_mask * mask,int * pc,bool break_always)415 void lp_exec_break(struct lp_exec_mask *mask, int *pc,
416 bool break_always)
417 {
418 LLVMBuilderRef builder = mask->bld->gallivm->builder;
419 struct function_ctx *ctx = func_ctx(mask);
420
421 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
422 LLVMValueRef exec_mask = LLVMBuildNot(builder,
423 mask->exec_mask,
424 "break");
425
426 LLVMBuildStore(builder, LLVMBuildAnd(builder,
427 LLVMBuildLoad2(builder, mask->int_vec_type, mask->break_mask, ""),
428 exec_mask, "break_full"), mask->break_mask);
429 }
430 else {
431 if (ctx->switch_in_default) {
432 /*
433 * stop default execution but only if this is an unconditional switch.
434 * (The condition here is not perfect since dead code after break is
435 * allowed but should be sufficient since false negatives are just
436 * unoptimized - so we don't have to pre-evaluate that).
437 */
438 if(break_always && ctx->switch_pc) {
439 if (pc)
440 *pc = ctx->switch_pc;
441 return;
442 }
443 }
444
445 if (break_always) {
446 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
447 }
448 else {
449 LLVMValueRef exec_mask = LLVMBuildNot(builder,
450 mask->exec_mask,
451 "break");
452 mask->switch_mask = LLVMBuildAnd(builder,
453 mask->switch_mask,
454 exec_mask, "break_switch");
455 }
456 }
457
458 lp_exec_mask_update(mask);
459 }
460