1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc;
309 boolean default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = TRUE;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449
450 switch (file) {
451 case TGSI_FILE_TEMPORARY:
452 array_of_vars = bld->temps;
453 var_of_array = bld->temps_array;
454 break;
455 case TGSI_FILE_OUTPUT:
456 array_of_vars = bld->outputs;
457 var_of_array = bld->outputs_array;
458 break;
459 default:
460 assert(0);
461 return NULL;
462 }
463
464 assert(chan < 4);
465
466 if (bld->indirect_files & (1 << file)) {
467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469 LLVMValueRef gep[2];
470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471 gep[1] = lindex;
472 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473 } else {
474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475 }
476 }
477 else {
478 assert(index <= bld->bld_base.info->file_max[file]);
479 return array_of_vars[index][chan];
480 }
481 }
482
483
484 /**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index which temporary register
488 * \param chan which channel of the temp register.
489 */
490 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492 unsigned index,
493 unsigned chan)
494 {
495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497
498 /**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index which output register
502 * \param chan which channel of the output register.
503 */
504 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506 unsigned index,
507 unsigned chan)
508 {
509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511
512 /*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521 unsigned index, chan;
522 assert(bld->bld_base.info->num_outputs <=
523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527 }
528 }
529 }
530 }
531
532 /**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)538 build_gather(struct lp_build_tgsi_context *bld_base,
539 LLVMValueRef base_ptr,
540 LLVMValueRef indexes,
541 LLVMValueRef overflow_mask,
542 LLVMValueRef indexes2)
543 {
544 struct gallivm_state *gallivm = bld_base->base.gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *uint_bld = &bld_base->uint_bld;
547 struct lp_build_context *bld = &bld_base->base;
548 LLVMValueRef res;
549 unsigned i;
550
551 if (indexes2)
552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553 else
554 res = bld->undef;
555 /*
556 * overflow_mask is a vector telling us which channels
557 * in the vector overflowed. We use the overflow behavior for
558 * constant buffers which is defined as:
559 * Out of bounds access to constant buffer returns 0 in all
560 * components. Out of bounds behavior is always with respect
561 * to the size of the buffer bound at that slot.
562 */
563
564 if (overflow_mask) {
565 /*
566 * We avoid per-element control flow here (also due to llvm going crazy,
567 * though I suspect it's better anyway since overflow is likely rare).
568 * Note that since we still fetch from buffers even if num_elements was
569 * zero (in this case we'll fetch from index zero) the jit func callers
570 * MUST provide valid fake constant buffers of size 4x32 (the values do
571 * not matter), otherwise we'd still need (not per element though)
572 * control flow.
573 */
574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575 if (indexes2)
576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577 }
578
579 /*
580 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581 */
582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583 LLVMValueRef si, di;
584 LLVMValueRef index;
585 LLVMValueRef scalar_ptr, scalar;
586
587 di = lp_build_const_int32(bld->gallivm, i);
588 if (indexes2)
589 si = lp_build_const_int32(bld->gallivm, i >> 1);
590 else
591 si = di;
592
593 if (indexes2 && (i & 1)) {
594 index = LLVMBuildExtractElement(builder,
595 indexes2, si, "");
596 } else {
597 index = LLVMBuildExtractElement(builder,
598 indexes, si, "");
599 }
600 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601 &index, 1, "gather_ptr");
602 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605 }
606
607 if (overflow_mask) {
608 if (indexes2) {
609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611 bld_base->dbl_bld.int_vec_type, "");
612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613 bld_base->dbl_bld.zero, res);
614 } else
615 res = lp_build_select(bld, overflow_mask, bld->zero, res);
616 }
617
618 return res;
619 }
620
621
622 /**
623 * Scatter/store vector.
624 */
625 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627 LLVMValueRef base_ptr,
628 LLVMValueRef indexes,
629 LLVMValueRef values,
630 struct lp_exec_mask *mask)
631 {
632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633 LLVMBuilderRef builder = gallivm->builder;
634 unsigned i;
635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637 /*
638 * Loop over elements of index_vec, store scalar value.
639 */
640 for (i = 0; i < bld->bld_base.base.type.length; i++) {
641 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645 LLVMValueRef scalar_pred = pred ?
646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648 if (0)
649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650 ii, val, index, scalar_ptr);
651
652 if (scalar_pred) {
653 LLVMValueRef real_val, dst_val;
654 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656 LLVMBuildStore(builder, real_val, scalar_ptr);
657 }
658 else {
659 LLVMBuildStore(builder, val, scalar_ptr);
660 }
661 }
662 }
663
664
665 /**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673 unsigned reg_file, unsigned reg_index,
674 const struct tgsi_ind_register *indirect_reg,
675 int index_limit)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679 /* always use X component of address register */
680 unsigned swizzle = indirect_reg->Swizzle;
681 LLVMValueRef base;
682 LLVMValueRef rel;
683 LLVMValueRef max_index;
684 LLVMValueRef index;
685
686 assert(bld->indirect_files & (1 << reg_file));
687
688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690 assert(swizzle < 4);
691 switch (indirect_reg->File) {
692 case TGSI_FILE_ADDRESS:
693 rel = LLVMBuildLoad(builder,
694 bld->addr[indirect_reg->Index][swizzle],
695 "load addr reg");
696 /* ADDR LLVM values already have LLVM integer type. */
697 break;
698 case TGSI_FILE_TEMPORARY:
699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700 rel = LLVMBuildLoad(builder, rel, "load temp reg");
701 /* TEMP LLVM values always have LLVM float type, but for indirection, the
702 * value actually stored is expected to be an integer */
703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704 break;
705 default:
706 assert(0);
707 rel = uint_bld->zero;
708 }
709
710 index = lp_build_add(uint_bld, base, rel);
711
712 /*
713 * emit_fetch_constant handles constant buffer overflow so this code
714 * is pointless for them.
715 * Furthermore the D3D10 spec in section 6.5 says:
716 * If the constant buffer bound to a slot is larger than the size
717 * declared in the shader for that slot, implementations are allowed
718 * to return incorrect data (not necessarily 0) for indices that are
719 * larger than the declared size but smaller than the buffer size.
720 */
721 if (reg_file != TGSI_FILE_CONSTANT) {
722 assert(index_limit >= 0);
723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724 uint_bld->type, index_limit);
725
726 assert(!uint_bld->type.sign);
727 index = lp_build_min(uint_bld, index, max_index);
728 }
729
730 return index;
731 }
732
733 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 enum tgsi_opcode_type stype)
736 {
737 struct lp_build_context *bld_fetch;
738
739 switch (stype) {
740 case TGSI_TYPE_FLOAT:
741 case TGSI_TYPE_UNTYPED:
742 bld_fetch = &bld_base->base;
743 break;
744 case TGSI_TYPE_UNSIGNED:
745 bld_fetch = &bld_base->uint_bld;
746 break;
747 case TGSI_TYPE_SIGNED:
748 bld_fetch = &bld_base->int_bld;
749 break;
750 case TGSI_TYPE_DOUBLE:
751 bld_fetch = &bld_base->dbl_bld;
752 break;
753 case TGSI_TYPE_UNSIGNED64:
754 bld_fetch = &bld_base->uint64_bld;
755 break;
756 case TGSI_TYPE_SIGNED64:
757 bld_fetch = &bld_base->int64_bld;
758 break;
759 case TGSI_TYPE_VOID:
760 default:
761 assert(0);
762 bld_fetch = NULL;
763 break;
764 }
765 return bld_fetch;
766 }
767
768 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770 LLVMValueRef indirect_index,
771 unsigned chan_index,
772 boolean need_perelement_offset)
773 {
774 struct gallivm_state *gallivm = uint_bld->gallivm;
775 LLVMValueRef chan_vec =
776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777 LLVMValueRef length_vec =
778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779 LLVMValueRef index_vec;
780
781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786 if (need_perelement_offset) {
787 LLVMValueRef pixel_offsets;
788 unsigned i;
789 /* build pixel offset vector: {0, 1, 2, 3, ...} */
790 pixel_offsets = uint_bld->undef;
791 for (i = 0; i < uint_bld->type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794 ii, ii, "");
795 }
796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797 }
798 return index_vec;
799 }
800
801 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)802 emit_fetch_constant(
803 struct lp_build_tgsi_context * bld_base,
804 const struct tgsi_full_src_register * reg,
805 enum tgsi_opcode_type stype,
806 unsigned swizzle_in)
807 {
808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809 struct gallivm_state *gallivm = bld_base->base.gallivm;
810 LLVMBuilderRef builder = gallivm->builder;
811 struct lp_build_context *uint_bld = &bld_base->uint_bld;
812 unsigned dimension = 0;
813 LLVMValueRef consts_ptr;
814 LLVMValueRef num_consts;
815 LLVMValueRef res;
816 unsigned swizzle = swizzle_in & 0xffff;
817
818 /* XXX: Handle fetching xyzw components as a vector */
819 assert(swizzle != ~0u);
820
821 if (reg->Register.Dimension) {
822 assert(!reg->Dimension.Indirect);
823 dimension = reg->Dimension.Index;
824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825 }
826
827 consts_ptr = bld->consts[dimension];
828 num_consts = bld->consts_sizes[dimension];
829
830 if (reg->Register.Indirect) {
831 LLVMValueRef indirect_index;
832 LLVMValueRef swizzle_vec =
833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834 LLVMValueRef index_vec; /* index into the const buffer */
835 LLVMValueRef overflow_mask;
836 LLVMValueRef index_vec2 = NULL;
837
838 indirect_index = get_indirect_index(bld,
839 reg->Register.File,
840 reg->Register.Index,
841 ®->Indirect,
842 bld->bld_base.info->file_max[reg->Register.File]);
843
844 /* All fetches are from the same constant buffer, so
845 * we need to propagate the size to a vector to do a
846 * vector comparison */
847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848 /* Construct a boolean vector telling us which channels
849 * overflow the bound constant buffer */
850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851 indirect_index, num_consts);
852
853 /* index_vec = indirect_index * 4 + swizzle */
854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857 if (tgsi_type_is_64bit(stype)) {
858 LLVMValueRef swizzle_vec2;
859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862 }
863 /* Gather values from the constant buffer */
864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865 }
866 else {
867 LLVMValueRef index; /* index into the const buffer */
868 LLVMValueRef scalar, scalar_ptr;
869 struct lp_build_context *bld_broad = &bld_base->base;
870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873 &index, 1, "");
874
875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877 LLVMValueRef scalar2, scalar2_ptr;
878 LLVMValueRef shuffles[2];
879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882 &index, 1, "");
883
884 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886 shuffles[0] = lp_build_const_int32(gallivm, 0);
887 shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892 } else {
893 if (stype == TGSI_TYPE_DOUBLE) {
894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896 bld_broad = &bld_base->dbl_bld;
897 } else if (stype == TGSI_TYPE_UNSIGNED64) {
898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900 bld_broad = &bld_base->uint64_bld;
901 } else if (stype == TGSI_TYPE_SIGNED64) {
902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904 bld_broad = &bld_base->int64_bld;
905 }
906 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907 res = lp_build_broadcast_scalar(bld_broad, scalar);
908 }
909
910 }
911
912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915 }
916
917 return res;
918 }
919
920 /**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)928 emit_fetch_64bit(
929 struct lp_build_tgsi_context * bld_base,
930 enum tgsi_opcode_type stype,
931 LLVMValueRef input,
932 LLVMValueRef input2)
933 {
934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936 LLVMBuilderRef builder = gallivm->builder;
937 LLVMValueRef res;
938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939 int i;
940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941 int len = bld_base->base.type.length * 2;
942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947 }
948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952
953 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)954 emit_fetch_immediate(
955 struct lp_build_tgsi_context * bld_base,
956 const struct tgsi_full_src_register * reg,
957 enum tgsi_opcode_type stype,
958 unsigned swizzle_in)
959 {
960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962 LLVMBuilderRef builder = gallivm->builder;
963 LLVMValueRef res = NULL;
964 unsigned swizzle = swizzle_in & 0xffff;
965
966 if (bld->use_immediates_array || reg->Register.Indirect) {
967 LLVMValueRef imms_array;
968 LLVMTypeRef fptr_type;
969
970 /* cast imms_array pointer to float* */
971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974 if (reg->Register.Indirect) {
975 LLVMValueRef indirect_index;
976 LLVMValueRef index_vec; /* index into the immediate register array */
977 LLVMValueRef index_vec2 = NULL;
978 indirect_index = get_indirect_index(bld,
979 reg->Register.File,
980 reg->Register.Index,
981 ®->Indirect,
982 bld->bld_base.info->file_max[reg->Register.File]);
983 /*
984 * Unlike for other reg classes, adding pixel offsets is unnecessary -
985 * immediates are stored as full vectors (FIXME??? - might be better
986 * to store them the same as constants) but all elements are the same
987 * in any case.
988 */
989 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990 indirect_index,
991 swizzle,
992 FALSE);
993 if (tgsi_type_is_64bit(stype))
994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995 indirect_index,
996 swizzle_in >> 16,
997 FALSE);
998 /* Gather values from the immediate register array */
999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000 } else {
1001 LLVMValueRef gep[2];
1002 gep[0] = lp_build_const_int32(gallivm, 0);
1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005 bld->imms_array, gep, 2, "");
1006 res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008 if (tgsi_type_is_64bit(stype)) {
1009 LLVMValueRef imms_ptr2;
1010 LLVMValueRef res2;
1011 gep[1] = lp_build_const_int32(gallivm,
1012 reg->Register.Index * 4 + (swizzle_in >> 16));
1013 imms_ptr2 = LLVMBuildGEP(builder,
1014 bld->imms_array, gep, 2, "");
1015 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016 res = emit_fetch_64bit(bld_base, stype, res, res2);
1017 }
1018 }
1019 }
1020 else {
1021 res = bld->immediates[reg->Register.Index][swizzle];
1022 if (tgsi_type_is_64bit(stype))
1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024 }
1025
1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029 }
1030 return res;
1031 }
1032
1033 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1034 emit_fetch_input(
1035 struct lp_build_tgsi_context * bld_base,
1036 const struct tgsi_full_src_register * reg,
1037 enum tgsi_opcode_type stype,
1038 unsigned swizzle_in)
1039 {
1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 LLVMValueRef res;
1044 unsigned swizzle = swizzle_in & 0xffff;
1045
1046 if (reg->Register.Indirect) {
1047 LLVMValueRef indirect_index;
1048 LLVMValueRef index_vec; /* index into the input reg array */
1049 LLVMValueRef index_vec2 = NULL;
1050 LLVMValueRef inputs_array;
1051 LLVMTypeRef fptr_type;
1052
1053 indirect_index = get_indirect_index(bld,
1054 reg->Register.File,
1055 reg->Register.Index,
1056 ®->Indirect,
1057 bld->bld_base.info->file_max[reg->Register.File]);
1058
1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060 indirect_index,
1061 swizzle,
1062 TRUE);
1063 if (tgsi_type_is_64bit(stype)) {
1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065 indirect_index,
1066 swizzle_in >> 16,
1067 TRUE);
1068 }
1069 /* cast inputs_array pointer to float* */
1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073 /* Gather values from the input register array */
1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075 } else {
1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078 reg->Register.Index * 4 + swizzle);
1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080 bld->inputs_array, &lindex, 1, "");
1081
1082 res = LLVMBuildLoad(builder, input_ptr, "");
1083 if (tgsi_type_is_64bit(stype)) {
1084 LLVMValueRef lindex1;
1085 LLVMValueRef input_ptr2;
1086 LLVMValueRef res2;
1087
1088 lindex1 = lp_build_const_int32(gallivm,
1089 reg->Register.Index * 4 + (swizzle_in >> 16));
1090 input_ptr2 = LLVMBuildGEP(builder,
1091 bld->inputs_array, &lindex1, 1, "");
1092 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093 res = emit_fetch_64bit(bld_base, stype, res, res2);
1094 }
1095 }
1096 else {
1097 res = bld->inputs[reg->Register.Index][swizzle];
1098 if (tgsi_type_is_64bit(stype))
1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100 }
1101 }
1102
1103 assert(res);
1104
1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle_in)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 const struct tgsi_shader_info *info = bld->bld_base.info;
1124 LLVMBuilderRef builder = gallivm->builder;
1125 LLVMValueRef attrib_index = NULL;
1126 LLVMValueRef vertex_index = NULL;
1127 unsigned swizzle = swizzle_in & 0xffff;
1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129 LLVMValueRef res;
1130
1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132 /* This is really a system value not a regular input */
1133 assert(!reg->Register.Indirect);
1134 assert(!reg->Dimension.Indirect);
1135 res = bld->system_values.prim_id;
1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138 }
1139 return res;
1140 }
1141
1142 if (reg->Register.Indirect) {
1143 /*
1144 * XXX: this is possibly not quite the right value, since file_max may be
1145 * larger than the max attrib index, due to it being the max of declared
1146 * inputs AND the max vertices per prim (which is 6 for tri adj).
1147 * It should however be safe to use (since we always allocate
1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149 */
1150 int index_limit = info->file_max[reg->Register.File];
1151 attrib_index = get_indirect_index(bld,
1152 reg->Register.File,
1153 reg->Register.Index,
1154 ®->Indirect,
1155 index_limit);
1156 } else {
1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158 }
1159
1160 if (reg->Dimension.Indirect) {
1161 /*
1162 * A fixed 6 should do as well (which is what we allocate).
1163 */
1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165 vertex_index = get_indirect_index(bld,
1166 reg->Register.File,
1167 reg->Dimension.Index,
1168 ®->DimIndirect,
1169 index_limit);
1170 } else {
1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172 }
1173
1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175 reg->Dimension.Indirect,
1176 vertex_index,
1177 reg->Register.Indirect,
1178 attrib_index,
1179 swizzle_index);
1180
1181 assert(res);
1182 if (tgsi_type_is_64bit(stype)) {
1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184 LLVMValueRef res2;
1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186 reg->Dimension.Indirect,
1187 vertex_index,
1188 reg->Register.Indirect,
1189 attrib_index,
1190 swizzle_index);
1191 assert(res2);
1192 res = emit_fetch_64bit(bld_base, stype, res, res2);
1193 } else if (stype == TGSI_TYPE_UNSIGNED) {
1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195 } else if (stype == TGSI_TYPE_SIGNED) {
1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197 }
1198
1199 return res;
1200 }
1201
1202 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1203 emit_fetch_tcs_input(
1204 struct lp_build_tgsi_context * bld_base,
1205 const struct tgsi_full_src_register * reg,
1206 enum tgsi_opcode_type stype,
1207 unsigned swizzle_in)
1208 {
1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211 const struct tgsi_shader_info *info = bld->bld_base.info;
1212 LLVMBuilderRef builder = gallivm->builder;
1213 LLVMValueRef attrib_index = NULL;
1214 LLVMValueRef vertex_index = NULL;
1215 unsigned swizzle = swizzle_in & 0xffff;
1216 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217 LLVMValueRef res;
1218
1219 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220 /* This is really a system value not a regular input */
1221 assert(!reg->Register.Indirect);
1222 assert(!reg->Dimension.Indirect);
1223 res = bld->system_values.prim_id;
1224 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226 }
1227 return res;
1228 }
1229
1230 if (reg->Register.Indirect) {
1231 int index_limit = info->file_max[reg->Register.File];
1232 attrib_index = get_indirect_index(bld,
1233 reg->Register.File,
1234 reg->Register.Index,
1235 ®->Indirect,
1236 index_limit);
1237 } else {
1238 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239 }
1240
1241 if (reg->Dimension.Indirect) {
1242 vertex_index = get_indirect_index(bld,
1243 reg->Register.File,
1244 reg->Dimension.Index,
1245 ®->DimIndirect,
1246 PIPE_MAX_SHADER_INPUTS);
1247 } else {
1248 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249 }
1250
1251 // TCS can read from its own outputs
1252 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254 reg->Dimension.Indirect,
1255 vertex_index,
1256 reg->Register.Indirect,
1257 attrib_index,
1258 FALSE,
1259 swizzle_index,
1260 bld_base->info->output_semantic_name[reg->Register.Index]);
1261 } else {
1262 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1263 reg->Dimension.Indirect,
1264 vertex_index,
1265 reg->Register.Indirect,
1266 attrib_index,
1267 FALSE,
1268 swizzle_index);
1269 }
1270
1271
1272 assert(res);
1273 if (tgsi_type_is_64bit(stype)) {
1274 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1275 LLVMValueRef res2;
1276 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1277 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1278 reg->Dimension.Indirect,
1279 vertex_index,
1280 reg->Register.Indirect,
1281 attrib_index,
1282 FALSE,
1283 swizzle_index,
1284 bld_base->info->output_semantic_name[reg->Register.Index]);
1285 } else {
1286 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1287 reg->Dimension.Indirect,
1288 vertex_index,
1289 reg->Register.Indirect,
1290 attrib_index,
1291 FALSE,
1292 swizzle_index);
1293 }
1294 assert(res2);
1295 res = emit_fetch_64bit(bld_base, stype, res, res2);
1296 } else if (stype == TGSI_TYPE_UNSIGNED) {
1297 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1298 } else if (stype == TGSI_TYPE_SIGNED) {
1299 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1300 }
1301
1302 return res;
1303 }
1304
1305 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1306 emit_fetch_tes_input(
1307 struct lp_build_tgsi_context * bld_base,
1308 const struct tgsi_full_src_register * reg,
1309 enum tgsi_opcode_type stype,
1310 unsigned swizzle_in)
1311 {
1312 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314 const struct tgsi_shader_info *info = bld->bld_base.info;
1315 LLVMBuilderRef builder = gallivm->builder;
1316 LLVMValueRef attrib_index = NULL;
1317 LLVMValueRef vertex_index = NULL;
1318 unsigned swizzle = swizzle_in & 0xffff;
1319 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1320 LLVMValueRef res;
1321
1322 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1323 /* This is really a system value not a regular input */
1324 assert(!reg->Register.Indirect);
1325 assert(!reg->Dimension.Indirect);
1326 res = bld->system_values.prim_id;
1327 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1328 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1329 }
1330 return res;
1331 }
1332
1333 if (reg->Register.Indirect) {
1334 int index_limit = info->file_max[reg->Register.File];
1335 attrib_index = get_indirect_index(bld,
1336 reg->Register.File,
1337 reg->Register.Index,
1338 ®->Indirect,
1339 index_limit);
1340 } else {
1341 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1342 }
1343
1344 if (reg->Dimension.Indirect) {
1345 vertex_index = get_indirect_index(bld,
1346 reg->Register.File,
1347 reg->Dimension.Index,
1348 ®->DimIndirect,
1349 PIPE_MAX_SHADER_INPUTS);
1350 } else {
1351 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1352 }
1353
1354 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1355 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1356 reg->Register.Indirect,
1357 attrib_index,
1358 swizzle_index);
1359 } else {
1360 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1361 reg->Dimension.Indirect,
1362 vertex_index,
1363 reg->Register.Indirect,
1364 attrib_index,
1365 FALSE,
1366 swizzle_index);
1367 }
1368
1369 assert(res);
1370 if (tgsi_type_is_64bit(stype)) {
1371 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1372 LLVMValueRef res2;
1373 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1374 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1375 reg->Register.Indirect,
1376 attrib_index,
1377 swizzle_index);
1378 }
1379 else {
1380 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1381 reg->Dimension.Indirect,
1382 vertex_index,
1383 reg->Register.Indirect,
1384 attrib_index,
1385 FALSE,
1386 swizzle_index);
1387 }
1388 assert(res2);
1389 res = emit_fetch_64bit(bld_base, stype, res, res2);
1390 } else if (stype == TGSI_TYPE_UNSIGNED) {
1391 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1392 } else if (stype == TGSI_TYPE_SIGNED) {
1393 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1394 }
1395
1396 return res;
1397 }
1398
1399
1400
1401 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1402 emit_fetch_temporary(
1403 struct lp_build_tgsi_context * bld_base,
1404 const struct tgsi_full_src_register * reg,
1405 enum tgsi_opcode_type stype,
1406 unsigned swizzle_in)
1407 {
1408 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1409 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1410 LLVMBuilderRef builder = gallivm->builder;
1411 LLVMValueRef res;
1412 unsigned swizzle = swizzle_in & 0xffff;
1413
1414 if (reg->Register.Indirect) {
1415 LLVMValueRef indirect_index;
1416 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1417 LLVMValueRef temps_array;
1418 LLVMTypeRef fptr_type;
1419
1420 indirect_index = get_indirect_index(bld,
1421 reg->Register.File,
1422 reg->Register.Index,
1423 ®->Indirect,
1424 bld->bld_base.info->file_max[reg->Register.File]);
1425
1426 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1427 indirect_index,
1428 swizzle,
1429 TRUE);
1430 if (tgsi_type_is_64bit(stype)) {
1431 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1432 indirect_index,
1433 swizzle_in >> 16,
1434 TRUE);
1435 }
1436
1437 /* cast temps_array pointer to float* */
1438 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1439 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1440
1441 /* Gather values from the temporary register array */
1442 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1443 }
1444 else {
1445 LLVMValueRef temp_ptr;
1446 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1447 res = LLVMBuildLoad(builder, temp_ptr, "");
1448
1449 if (tgsi_type_is_64bit(stype)) {
1450 LLVMValueRef temp_ptr2, res2;
1451
1452 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1453 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1454 res = emit_fetch_64bit(bld_base, stype, res, res2);
1455 }
1456 }
1457
1458 if (stype == TGSI_TYPE_SIGNED ||
1459 stype == TGSI_TYPE_UNSIGNED ||
1460 stype == TGSI_TYPE_DOUBLE ||
1461 stype == TGSI_TYPE_SIGNED64 ||
1462 stype == TGSI_TYPE_UNSIGNED64) {
1463 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1464 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1465 }
1466
1467 return res;
1468 }
1469
1470 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1471 emit_fetch_system_value(
1472 struct lp_build_tgsi_context * bld_base,
1473 const struct tgsi_full_src_register * reg,
1474 enum tgsi_opcode_type stype,
1475 unsigned swizzle_in)
1476 {
1477 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1478 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1479 const struct tgsi_shader_info *info = bld->bld_base.info;
1480 LLVMBuilderRef builder = gallivm->builder;
1481 LLVMValueRef res;
1482 enum tgsi_opcode_type atype; // Actual type of the value
1483 unsigned swizzle = swizzle_in & 0xffff;
1484
1485 assert(!reg->Register.Indirect);
1486
1487 switch (info->system_value_semantic_name[reg->Register.Index]) {
1488 case TGSI_SEMANTIC_INSTANCEID:
1489 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1490 atype = TGSI_TYPE_UNSIGNED;
1491 break;
1492
1493 case TGSI_SEMANTIC_VERTEXID:
1494 res = bld->system_values.vertex_id;
1495 atype = TGSI_TYPE_UNSIGNED;
1496 break;
1497
1498 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1499 res = bld->system_values.vertex_id_nobase;
1500 atype = TGSI_TYPE_UNSIGNED;
1501 break;
1502
1503 case TGSI_SEMANTIC_BASEVERTEX:
1504 res = bld->system_values.basevertex;
1505 atype = TGSI_TYPE_UNSIGNED;
1506 break;
1507
1508 case TGSI_SEMANTIC_BASEINSTANCE:
1509 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1510 atype = TGSI_TYPE_UNSIGNED;
1511 break;
1512
1513 case TGSI_SEMANTIC_PRIMID:
1514 res = bld->system_values.prim_id;
1515 atype = TGSI_TYPE_UNSIGNED;
1516 break;
1517
1518 case TGSI_SEMANTIC_INVOCATIONID:
1519 if (info->processor == PIPE_SHADER_TESS_CTRL)
1520 res = bld->system_values.invocation_id;
1521 else
1522 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1523 atype = TGSI_TYPE_UNSIGNED;
1524 break;
1525
1526 case TGSI_SEMANTIC_HELPER_INVOCATION:
1527 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1528 atype = TGSI_TYPE_UNSIGNED;
1529 break;
1530
1531 case TGSI_SEMANTIC_THREAD_ID:
1532 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1533 atype = TGSI_TYPE_UNSIGNED;
1534 break;
1535
1536 case TGSI_SEMANTIC_BLOCK_ID:
1537 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1538 atype = TGSI_TYPE_UNSIGNED;
1539 break;
1540
1541 case TGSI_SEMANTIC_GRID_SIZE:
1542 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1543 atype = TGSI_TYPE_UNSIGNED;
1544 break;
1545
1546 case TGSI_SEMANTIC_TESSCOORD:
1547 {
1548 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1549 LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1550 res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1551 }
1552 atype = TGSI_TYPE_FLOAT;
1553 break;
1554
1555 case TGSI_SEMANTIC_FACE:
1556 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1557 atype = TGSI_TYPE_UNSIGNED;
1558 break;
1559
1560 case TGSI_SEMANTIC_DRAWID:
1561 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1562 atype = TGSI_TYPE_UNSIGNED;
1563 break;
1564
1565 case TGSI_SEMANTIC_TESSOUTER:
1566 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1567 bld->system_values.tess_outer,
1568 lp_build_const_int32(gallivm, swizzle_in));
1569 atype = TGSI_TYPE_FLOAT;
1570 break;
1571
1572 case TGSI_SEMANTIC_TESSINNER:
1573 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1574 bld->system_values.tess_inner,
1575 lp_build_const_int32(gallivm, swizzle_in));
1576 atype = TGSI_TYPE_FLOAT;
1577 break;
1578
1579 case TGSI_SEMANTIC_VERTICESIN:
1580 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1581 atype = TGSI_TYPE_UNSIGNED;
1582 break;
1583
1584 default:
1585 assert(!"unexpected semantic in emit_fetch_system_value");
1586 res = bld_base->base.zero;
1587 atype = TGSI_TYPE_FLOAT;
1588 break;
1589 }
1590
1591 if (atype != stype) {
1592 if (stype == TGSI_TYPE_FLOAT) {
1593 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1594 } else if (stype == TGSI_TYPE_UNSIGNED) {
1595 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1596 } else if (stype == TGSI_TYPE_SIGNED) {
1597 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1598 }
1599 }
1600
1601 return res;
1602 }
1603
1604 /**
1605 * Register fetch with derivatives.
1606 */
1607 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1608 emit_fetch_deriv(
1609 struct lp_build_tgsi_soa_context *bld,
1610 LLVMValueRef src,
1611 LLVMValueRef *res,
1612 LLVMValueRef *ddx,
1613 LLVMValueRef *ddy)
1614 {
1615 if (res)
1616 *res = src;
1617
1618 /* TODO: use interpolation coeffs for inputs */
1619
1620 if (ddx)
1621 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1622
1623 if (ddy)
1624 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1625 }
1626
1627 /**
1628 * store an array of vec-length 64-bit into two arrays of vec_length floats
1629 * i.e.
1630 * value is d0, d1, d2, d3 etc.
1631 * each 64-bit has high and low pieces x, y
1632 * so gets stored into the separate channels as:
1633 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1634 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1635 */
1636 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1637 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1638 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1639 LLVMValueRef value)
1640 {
1641 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1642 struct gallivm_state *gallivm = bld_base->base.gallivm;
1643 LLVMBuilderRef builder = gallivm->builder;
1644 struct lp_build_context *float_bld = &bld_base->base;
1645 unsigned i;
1646 LLVMValueRef temp, temp2;
1647 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1648 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1649
1650 for (i = 0; i < bld_base->base.type.length; i++) {
1651 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1652 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1653 }
1654
1655 temp = LLVMBuildShuffleVector(builder, value,
1656 LLVMGetUndef(LLVMTypeOf(value)),
1657 LLVMConstVector(shuffles,
1658 bld_base->base.type.length),
1659 "");
1660 temp2 = LLVMBuildShuffleVector(builder, value,
1661 LLVMGetUndef(LLVMTypeOf(value)),
1662 LLVMConstVector(shuffles2,
1663 bld_base->base.type.length),
1664 "");
1665
1666 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1667 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1668 }
1669
1670 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1671 emit_store_output(struct lp_build_tgsi_context *bld_base,
1672 enum tgsi_opcode_type dtype,
1673 const struct tgsi_full_dst_register *reg,
1674 unsigned index,
1675 unsigned chan_index,
1676 LLVMValueRef indirect_index,
1677 LLVMValueRef value)
1678 {
1679 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1680 struct gallivm_state *gallivm = bld_base->base.gallivm;
1681 LLVMBuilderRef builder = gallivm->builder;
1682 struct lp_build_context *float_bld = &bld_base->base;
1683
1684 /* Outputs are always stored as floats */
1685 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1686
1687 if (reg->Register.Indirect) {
1688 LLVMValueRef index_vec; /* indexes into the output registers */
1689 LLVMValueRef outputs_array;
1690 LLVMTypeRef fptr_type;
1691
1692 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1693 indirect_index,
1694 chan_index,
1695 TRUE);
1696
1697 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1698 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1699
1700 /* Scatter store values into output registers */
1701 emit_mask_scatter(bld, outputs_array, index_vec, value,
1702 &bld->exec_mask);
1703 }
1704 else {
1705 assert(LLVMTypeOf(value) == float_bld->vec_type);
1706 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1707 chan_index);
1708
1709 if (tgsi_type_is_64bit(dtype)) {
1710 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1711 chan_index + 1);
1712 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1713 value);
1714 } else
1715 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1716 }
1717 }
1718
1719 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1720 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1721 enum tgsi_opcode_type dtype,
1722 const struct tgsi_full_dst_register *reg,
1723 unsigned index,
1724 unsigned chan_index,
1725 LLVMValueRef indirect_index,
1726 LLVMValueRef value)
1727 {
1728 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1729 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1730 const struct tgsi_shader_info *info = bld->bld_base.info;
1731 LLVMValueRef attrib_index = NULL;
1732 LLVMValueRef vertex_index = NULL;
1733 LLVMValueRef channel_index = NULL;
1734
1735 if (reg->Register.Indirect) {
1736 /*
1737 * XXX: this is possibly not quite the right value, since file_max may be
1738 * larger than the max attrib index, due to it being the max of declared
1739 * inputs AND the max vertices per prim (which is 6 for tri adj).
1740 * It should however be safe to use (since we always allocate
1741 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1742 */
1743 int index_limit = info->file_max[reg->Register.File];
1744 attrib_index = get_indirect_index(bld,
1745 reg->Register.File,
1746 reg->Register.Index,
1747 ®->Indirect,
1748 index_limit);
1749 } else {
1750 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1751 }
1752
1753 if (reg->Dimension.Indirect) {
1754 vertex_index = get_indirect_index(bld,
1755 reg->Register.File,
1756 reg->Dimension.Index,
1757 ®->DimIndirect,
1758 PIPE_MAX_SHADER_OUTPUTS);
1759 } else {
1760 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1761 }
1762
1763 channel_index = lp_build_const_int32(gallivm, chan_index);
1764
1765 assert(bld->tcs_iface->emit_store_output);
1766 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1767 bld_base->info->output_semantic_name[reg->Register.Index],
1768 reg->Dimension.Indirect,
1769 vertex_index,
1770 reg->Register.Indirect,
1771 attrib_index,
1772 false,
1773 channel_index,
1774 value,
1775 mask_vec(bld_base));
1776 }
1777
1778 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1779 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1780 enum tgsi_opcode_type dtype,
1781 const struct tgsi_full_dst_register *reg,
1782 unsigned index,
1783 unsigned chan_index,
1784 LLVMValueRef indirect_index,
1785 LLVMValueRef value)
1786 {
1787 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1788 struct gallivm_state *gallivm = bld_base->base.gallivm;
1789 LLVMBuilderRef builder = gallivm->builder;
1790 struct lp_build_context *float_bld = &bld_base->base;
1791
1792 /* Temporaries are always stored as floats */
1793 if (!tgsi_type_is_64bit(dtype))
1794 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1795 else
1796 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1797
1798 if (reg->Register.Indirect) {
1799 LLVMValueRef index_vec; /* indexes into the temp registers */
1800 LLVMValueRef temps_array;
1801 LLVMTypeRef fptr_type;
1802
1803 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1804 indirect_index,
1805 chan_index,
1806 TRUE);
1807
1808 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1809 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1810
1811 /* Scatter store values into temp registers */
1812 emit_mask_scatter(bld, temps_array, index_vec, value,
1813 &bld->exec_mask);
1814 }
1815 else {
1816 LLVMValueRef temp_ptr;
1817 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1818
1819 if (tgsi_type_is_64bit(dtype)) {
1820 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1821 reg->Register.Index,
1822 chan_index + 1);
1823 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1824 value);
1825 }
1826 else
1827 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1828 }
1829 }
1830
1831 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1832 emit_store_address(struct lp_build_tgsi_context *bld_base,
1833 enum tgsi_opcode_type dtype,
1834 const struct tgsi_full_dst_register *reg,
1835 unsigned index,
1836 unsigned chan_index,
1837 LLVMValueRef indirect_index,
1838 LLVMValueRef value)
1839 {
1840 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1841 struct gallivm_state *gallivm = bld_base->base.gallivm;
1842 LLVMBuilderRef builder = gallivm->builder;
1843 struct lp_build_context *int_bld = &bld_base->int_bld;
1844
1845 assert(dtype == TGSI_TYPE_SIGNED);
1846 assert(LLVMTypeOf(value) == int_bld->vec_type);
1847 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1848 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1849 bld->addr[reg->Register.Index][chan_index]);
1850 }
1851
1852 /**
1853 * Register store.
1854 */
1855 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1856 emit_store_chan(
1857 struct lp_build_tgsi_context *bld_base,
1858 const struct tgsi_full_instruction *inst,
1859 unsigned index,
1860 unsigned chan_index,
1861 LLVMValueRef value)
1862 {
1863 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1864 struct gallivm_state *gallivm = bld_base->base.gallivm;
1865 LLVMBuilderRef builder = gallivm->builder;
1866 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1867 struct lp_build_context *float_bld = &bld_base->base;
1868 LLVMValueRef indirect_index = NULL;
1869 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1870
1871 /*
1872 * Apply saturation.
1873 *
1874 * It is always assumed to be float.
1875 */
1876 if (inst->Instruction.Saturate) {
1877 assert(dtype == TGSI_TYPE_FLOAT ||
1878 dtype == TGSI_TYPE_UNTYPED);
1879 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1880 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1881 }
1882
1883 if (reg->Register.Indirect) {
1884 /*
1885 * Currently the mesa/st doesn't generate indirect stores
1886 * to 64-bit values, it normally uses MOV to do indirect stores.
1887 */
1888 assert(!tgsi_type_is_64bit(dtype));
1889 indirect_index = get_indirect_index(bld,
1890 reg->Register.File,
1891 reg->Register.Index,
1892 ®->Indirect,
1893 bld->bld_base.info->file_max[reg->Register.File]);
1894 } else {
1895 assert(reg->Register.Index <=
1896 bld_base->info->file_max[reg->Register.File]);
1897 }
1898
1899 if (DEBUG_EXECUTION) {
1900 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1901 }
1902
1903 assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1904 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1905 dtype,
1906 reg,
1907 index,
1908 chan_index,
1909 indirect_index,
1910 value);
1911
1912 (void)dtype;
1913 }
1914
1915 /*
1916 * Called at the beginning of the translation of each TGSI instruction, to
1917 * emit some debug code.
1918 */
1919 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1920 emit_debug(
1921 struct lp_build_tgsi_context * bld_base,
1922 const struct tgsi_full_instruction * inst,
1923 const struct tgsi_opcode_info * info)
1924
1925 {
1926 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1927
1928 if (DEBUG_EXECUTION) {
1929 /*
1930 * Dump the TGSI instruction.
1931 */
1932
1933 struct gallivm_state *gallivm = bld_base->base.gallivm;
1934 char buf[512];
1935 buf[0] = '$';
1936 buf[1] = ' ';
1937 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1938 lp_build_printf(gallivm, buf);
1939
1940 /* Dump the execution mask.
1941 */
1942 if (bld->exec_mask.has_mask) {
1943 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1944 }
1945 }
1946 }
1947
1948 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1949 emit_store(
1950 struct lp_build_tgsi_context * bld_base,
1951 const struct tgsi_full_instruction * inst,
1952 const struct tgsi_opcode_info * info,
1953 unsigned index,
1954 LLVMValueRef dst[4])
1955
1956 {
1957 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1958
1959 unsigned writemask = inst->Dst[index].Register.WriteMask;
1960 while (writemask) {
1961 unsigned chan_index = u_bit_scan(&writemask);
1962 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1963 continue;
1964 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1965 }
1966 }
1967
1968 static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)1969 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1970 {
1971 switch (tgsi_target) {
1972 case TGSI_TEXTURE_BUFFER:
1973 return PIPE_BUFFER;
1974 case TGSI_TEXTURE_1D:
1975 case TGSI_TEXTURE_SHADOW1D:
1976 return PIPE_TEXTURE_1D;
1977 case TGSI_TEXTURE_2D:
1978 case TGSI_TEXTURE_SHADOW2D:
1979 case TGSI_TEXTURE_2D_MSAA:
1980 return PIPE_TEXTURE_2D;
1981 case TGSI_TEXTURE_3D:
1982 return PIPE_TEXTURE_3D;
1983 case TGSI_TEXTURE_CUBE:
1984 case TGSI_TEXTURE_SHADOWCUBE:
1985 return PIPE_TEXTURE_CUBE;
1986 case TGSI_TEXTURE_RECT:
1987 case TGSI_TEXTURE_SHADOWRECT:
1988 return PIPE_TEXTURE_RECT;
1989 case TGSI_TEXTURE_1D_ARRAY:
1990 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1991 return PIPE_TEXTURE_1D_ARRAY;
1992 case TGSI_TEXTURE_2D_ARRAY:
1993 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1994 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1995 return PIPE_TEXTURE_2D_ARRAY;
1996 case TGSI_TEXTURE_CUBE_ARRAY:
1997 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1998 return PIPE_TEXTURE_CUBE_ARRAY;
1999 default:
2000 assert(0);
2001 return PIPE_BUFFER;
2002 }
2003 }
2004
2005
2006 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2007 lp_build_lod_property(
2008 struct lp_build_tgsi_context *bld_base,
2009 const struct tgsi_full_instruction *inst,
2010 unsigned src_op)
2011 {
2012 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2013 enum lp_sampler_lod_property lod_property;
2014
2015 /*
2016 * Not much we can do here. We could try catching inputs declared
2017 * with constant interpolation but not sure it's worth it - since for
2018 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2019 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2020 * like the constant/immediate recognition below.
2021 * What seems to be of more value would be to recognize temps holding
2022 * broadcasted scalars but no way we can do it.
2023 * Tried asking llvm but without any success (using LLVMIsConstant
2024 * even though this isn't exactly what we'd need), even as simple as
2025 * IMM[0] UINT32 (0,-1,0,0)
2026 * MOV TEMP[0] IMM[0].yyyy
2027 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2028 * doesn't work.
2029 * This means there's ZERO chance this will ever catch a scalar lod
2030 * with traditional tex opcodes as well as texel fetches, since the lod
2031 * comes from the same reg as coords (except some test shaders using
2032 * constant coords maybe).
2033 * There's at least hope for sample opcodes as well as size queries.
2034 */
2035 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2036 reg->Register.File == TGSI_FILE_CONSTANT ||
2037 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2038 lod_property = LP_SAMPLER_LOD_SCALAR;
2039 }
2040 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2041 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2042 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2043 }
2044 else {
2045 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2046 }
2047 }
2048 else {
2049 /* never use scalar (per-quad) lod the results are just too wrong. */
2050 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2051 }
2052 return lod_property;
2053 }
2054
2055
2056 /**
2057 * High-level instruction translators.
2058 */
2059
2060 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2061 emit_tex( struct lp_build_tgsi_soa_context *bld,
2062 const struct tgsi_full_instruction *inst,
2063 enum lp_build_tex_modifier modifier,
2064 LLVMValueRef *texel,
2065 unsigned sampler_reg,
2066 enum lp_sampler_op_type sampler_op)
2067 {
2068 unsigned unit = inst->Src[sampler_reg].Register.Index;
2069 LLVMValueRef oow = NULL;
2070 LLVMValueRef lod = NULL;
2071 LLVMValueRef coords[5];
2072 LLVMValueRef offsets[3] = { NULL };
2073 struct lp_derivatives derivs;
2074 struct lp_sampler_params params;
2075 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2076 unsigned num_derivs, num_offsets, i;
2077 unsigned shadow_coord = 0;
2078 unsigned layer_coord = 0;
2079 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2080
2081 memset(¶ms, 0, sizeof(params));
2082
2083 if (!bld->sampler) {
2084 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2085 for (i = 0; i < 4; i++) {
2086 texel[i] = bld->bld_base.base.undef;
2087 }
2088 return;
2089 }
2090
2091 switch (inst->Texture.Texture) {
2092 case TGSI_TEXTURE_1D_ARRAY:
2093 layer_coord = 1;
2094 /* fallthrough */
2095 case TGSI_TEXTURE_1D:
2096 num_offsets = 1;
2097 num_derivs = 1;
2098 break;
2099 case TGSI_TEXTURE_2D_ARRAY:
2100 layer_coord = 2;
2101 /* fallthrough */
2102 case TGSI_TEXTURE_2D:
2103 case TGSI_TEXTURE_RECT:
2104 num_offsets = 2;
2105 num_derivs = 2;
2106 break;
2107 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2108 layer_coord = 1;
2109 /* fallthrough */
2110 case TGSI_TEXTURE_SHADOW1D:
2111 shadow_coord = 2;
2112 num_offsets = 1;
2113 num_derivs = 1;
2114 break;
2115 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2116 layer_coord = 2;
2117 shadow_coord = 3;
2118 num_offsets = 2;
2119 num_derivs = 2;
2120 break;
2121 case TGSI_TEXTURE_SHADOW2D:
2122 case TGSI_TEXTURE_SHADOWRECT:
2123 shadow_coord = 2;
2124 num_offsets = 2;
2125 num_derivs = 2;
2126 break;
2127 case TGSI_TEXTURE_CUBE:
2128 num_offsets = 2;
2129 num_derivs = 3;
2130 break;
2131 case TGSI_TEXTURE_3D:
2132 num_offsets = 3;
2133 num_derivs = 3;
2134 break;
2135 case TGSI_TEXTURE_SHADOWCUBE:
2136 shadow_coord = 3;
2137 num_offsets = 2;
2138 num_derivs = 3;
2139 break;
2140 case TGSI_TEXTURE_CUBE_ARRAY:
2141 num_offsets = 2;
2142 num_derivs = 3;
2143 layer_coord = 3;
2144 break;
2145 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2146 num_offsets = 2;
2147 num_derivs = 3;
2148 layer_coord = 3;
2149 shadow_coord = 4; /* shadow coord special different reg */
2150 break;
2151 case TGSI_TEXTURE_2D_MSAA:
2152 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2153 default:
2154 assert(0);
2155 return;
2156 }
2157
2158 /* Note lod and especially projected are illegal in a LOT of cases */
2159 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2160 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2161 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2162 lod = bld->bld_base.base.zero;
2163 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2164 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2165 /* note that shadow cube array with bias/explicit lod does not exist */
2166 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2167 }
2168 else {
2169 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2170 }
2171 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2172 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2173 }
2174 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2175 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2176 }
2177 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2178 }
2179
2180 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2181 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2182 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2183 }
2184 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2185 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2186 oow = lp_build_rcp(&bld->bld_base.base, oow);
2187 }
2188
2189 for (i = 0; i < num_derivs; i++) {
2190 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2191 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2192 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2193 }
2194 for (i = num_derivs; i < 5; i++) {
2195 coords[i] = bld->bld_base.base.undef;
2196 }
2197
2198 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2199 if (layer_coord) {
2200 if (layer_coord == 3) {
2201 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2202 }
2203 else {
2204 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2205 }
2206 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2207 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2208 }
2209 /* Shadow coord occupies always 5th slot. */
2210 if (shadow_coord) {
2211 sample_key |= LP_SAMPLER_SHADOW;
2212 if (shadow_coord == 4) {
2213 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2214 }
2215 else {
2216 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2217 }
2218 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2219 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2220 }
2221
2222 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2223 unsigned dim;
2224 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2225 for (dim = 0; dim < num_derivs; ++dim) {
2226 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2227 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2228 }
2229 params.derivs = &derivs;
2230 /*
2231 * could also check all src regs if constant but I doubt such
2232 * cases exist in practice.
2233 */
2234 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2235 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2236 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2237 }
2238 else {
2239 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2240 }
2241 }
2242 else {
2243 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2244 }
2245 }
2246 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2247
2248 /* we don't handle the 4 offset version of tg4 */
2249 if (inst->Texture.NumOffsets == 1) {
2250 unsigned dim;
2251 sample_key |= LP_SAMPLER_OFFSETS;
2252 for (dim = 0; dim < num_offsets; dim++) {
2253 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2254 }
2255 }
2256
2257 params.type = bld->bld_base.base.type;
2258 params.sample_key = sample_key;
2259 params.texture_index = unit;
2260 params.sampler_index = unit;
2261 params.context_ptr = bld->context_ptr;
2262 params.thread_data_ptr = bld->thread_data_ptr;
2263 params.coords = coords;
2264 params.offsets = offsets;
2265 params.lod = lod;
2266 params.texel = texel;
2267
2268 bld->sampler->emit_tex_sample(bld->sampler,
2269 bld->bld_base.base.gallivm,
2270 ¶ms);
2271 }
2272
2273 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2274 emit_sample(struct lp_build_tgsi_soa_context *bld,
2275 const struct tgsi_full_instruction *inst,
2276 enum lp_build_tex_modifier modifier,
2277 boolean compare,
2278 enum lp_sampler_op_type sample_type,
2279 LLVMValueRef *texel)
2280 {
2281 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2282 unsigned texture_unit, sampler_unit;
2283 LLVMValueRef lod = NULL;
2284 LLVMValueRef coords[5];
2285 LLVMValueRef offsets[3] = { NULL };
2286 struct lp_derivatives derivs;
2287 struct lp_sampler_params params;
2288 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2289
2290 unsigned num_offsets, num_derivs, i;
2291 unsigned layer_coord = 0;
2292 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2293
2294 memset(¶ms, 0, sizeof(params));
2295
2296 if (!bld->sampler) {
2297 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2298 for (i = 0; i < 4; i++) {
2299 texel[i] = bld->bld_base.base.undef;
2300 }
2301 return;
2302 }
2303
2304 /*
2305 * unlike old-style tex opcodes the texture/sampler indices
2306 * always come from src1 and src2 respectively.
2307 */
2308 texture_unit = inst->Src[1].Register.Index;
2309 sampler_unit = inst->Src[2].Register.Index;
2310
2311 /*
2312 * Note inst->Texture.Texture will contain the number of offsets,
2313 * however the target information is NOT there and comes from the
2314 * declared sampler views instead.
2315 */
2316 switch (bld->sv[texture_unit].Resource) {
2317 case TGSI_TEXTURE_1D:
2318 num_offsets = 1;
2319 num_derivs = 1;
2320 break;
2321 case TGSI_TEXTURE_1D_ARRAY:
2322 layer_coord = 1;
2323 num_offsets = 1;
2324 num_derivs = 1;
2325 break;
2326 case TGSI_TEXTURE_2D:
2327 case TGSI_TEXTURE_RECT:
2328 num_offsets = 2;
2329 num_derivs = 2;
2330 break;
2331 case TGSI_TEXTURE_2D_ARRAY:
2332 layer_coord = 2;
2333 num_offsets = 2;
2334 num_derivs = 2;
2335 break;
2336 case TGSI_TEXTURE_CUBE:
2337 num_offsets = 2;
2338 num_derivs = 3;
2339 break;
2340 case TGSI_TEXTURE_3D:
2341 num_offsets = 3;
2342 num_derivs = 3;
2343 break;
2344 case TGSI_TEXTURE_CUBE_ARRAY:
2345 layer_coord = 3;
2346 num_offsets = 2;
2347 num_derivs = 3;
2348 break;
2349 default:
2350 assert(0);
2351 return;
2352 }
2353
2354 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2355 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2356 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2357 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2358 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2359 }
2360 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2361 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2362 }
2363 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2364 }
2365 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2366 /* XXX might be better to explicitly pass the level zero information */
2367 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2368 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2369 }
2370
2371 for (i = 0; i < num_derivs; i++) {
2372 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2373 }
2374 for (i = num_derivs; i < 5; i++) {
2375 coords[i] = bld->bld_base.base.undef;
2376 }
2377
2378 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2379 if (layer_coord) {
2380 if (layer_coord == 3)
2381 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2382 else
2383 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2384 }
2385 /* Shadow coord occupies always 5th slot. */
2386 if (compare) {
2387 sample_key |= LP_SAMPLER_SHADOW;
2388 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2389 }
2390
2391 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2392 unsigned dim;
2393 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2394 for (dim = 0; dim < num_derivs; ++dim) {
2395 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2396 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2397 }
2398 params.derivs = &derivs;
2399 /*
2400 * could also check all src regs if constant but I doubt such
2401 * cases exist in practice.
2402 */
2403 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2404 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2405 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2406 }
2407 else {
2408 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2409 }
2410 }
2411 else {
2412 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2413 }
2414 }
2415
2416 /* some advanced gather instructions (txgo) would require 4 offsets */
2417 if (inst->Texture.NumOffsets == 1) {
2418 unsigned dim;
2419 sample_key |= LP_SAMPLER_OFFSETS;
2420 for (dim = 0; dim < num_offsets; dim++) {
2421 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2422 }
2423 }
2424 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2425
2426 params.type = bld->bld_base.base.type;
2427 params.sample_key = sample_key;
2428 params.texture_index = texture_unit;
2429 params.sampler_index = sampler_unit;
2430 params.context_ptr = bld->context_ptr;
2431 params.thread_data_ptr = bld->thread_data_ptr;
2432 params.coords = coords;
2433 params.offsets = offsets;
2434 params.lod = lod;
2435 params.texel = texel;
2436
2437 bld->sampler->emit_tex_sample(bld->sampler,
2438 bld->bld_base.base.gallivm,
2439 ¶ms);
2440
2441 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2442 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2443 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2444 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2445 unsigned char swizzles[4];
2446 swizzles[0] = inst->Src[1].Register.SwizzleX;
2447 swizzles[1] = inst->Src[1].Register.SwizzleY;
2448 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2449 swizzles[3] = inst->Src[1].Register.SwizzleW;
2450
2451 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2452 }
2453 }
2454
2455 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2456 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2457 const struct tgsi_full_instruction *inst,
2458 LLVMValueRef *texel,
2459 boolean is_samplei)
2460 {
2461 unsigned unit, target;
2462 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2463 LLVMValueRef explicit_lod = NULL;
2464 LLVMValueRef coords[5];
2465 LLVMValueRef offsets[3] = { NULL };
2466 LLVMValueRef ms_index = NULL;
2467 struct lp_sampler_params params;
2468 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2469 unsigned dims, i;
2470 unsigned layer_coord = 0;
2471 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2472
2473 memset(¶ms, 0, sizeof(params));
2474
2475 if (!bld->sampler) {
2476 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2477 for (i = 0; i < 4; i++) {
2478 texel[i] = coord_undef;
2479 }
2480 return;
2481 }
2482
2483 unit = inst->Src[1].Register.Index;
2484
2485 if (is_samplei) {
2486 target = bld->sv[unit].Resource;
2487 }
2488 else {
2489 target = inst->Texture.Texture;
2490 }
2491
2492 switch (target) {
2493 case TGSI_TEXTURE_1D:
2494 case TGSI_TEXTURE_BUFFER:
2495 dims = 1;
2496 break;
2497 case TGSI_TEXTURE_1D_ARRAY:
2498 layer_coord = 1;
2499 dims = 1;
2500 break;
2501 case TGSI_TEXTURE_2D:
2502 case TGSI_TEXTURE_RECT:
2503 case TGSI_TEXTURE_2D_MSAA:
2504 dims = 2;
2505 break;
2506 case TGSI_TEXTURE_2D_ARRAY:
2507 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2508 layer_coord = 2;
2509 dims = 2;
2510 break;
2511 case TGSI_TEXTURE_3D:
2512 dims = 3;
2513 break;
2514 default:
2515 assert(0);
2516 return;
2517 }
2518
2519 /* always have lod except for buffers and msaa targets ? */
2520 if (target != TGSI_TEXTURE_BUFFER &&
2521 target != TGSI_TEXTURE_2D_MSAA &&
2522 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2523 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2524 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2525 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2526 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2527 }
2528
2529 if (target == TGSI_TEXTURE_2D_MSAA ||
2530 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2531 sample_key |= LP_SAMPLER_FETCH_MS;
2532 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2533 }
2534
2535 /*
2536 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2537 * would be the sample index.
2538 */
2539
2540 for (i = 0; i < dims; i++) {
2541 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2542 }
2543 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2544 for (i = dims; i < 5; i++) {
2545 coords[i] = coord_undef;
2546 }
2547 if (layer_coord)
2548 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2549
2550 if (inst->Texture.NumOffsets == 1) {
2551 unsigned dim;
2552 sample_key |= LP_SAMPLER_OFFSETS;
2553 for (dim = 0; dim < dims; dim++) {
2554 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2555 }
2556 }
2557 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2558
2559 params.type = bld->bld_base.base.type;
2560 params.sample_key = sample_key;
2561 params.texture_index = unit;
2562 /*
2563 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2564 * and trigger some assertions with d3d10 where the sampler view number
2565 * can exceed this.
2566 */
2567 params.sampler_index = 0;
2568 params.context_ptr = bld->context_ptr;
2569 params.thread_data_ptr = bld->thread_data_ptr;
2570 params.coords = coords;
2571 params.offsets = offsets;
2572 params.derivs = NULL;
2573 params.lod = explicit_lod;
2574 params.texel = texel;
2575 params.ms_index = ms_index;
2576
2577 bld->sampler->emit_tex_sample(bld->sampler,
2578 bld->bld_base.base.gallivm,
2579 ¶ms);
2580
2581 if (is_samplei &&
2582 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2583 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2584 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2585 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2586 unsigned char swizzles[4];
2587 swizzles[0] = inst->Src[1].Register.SwizzleX;
2588 swizzles[1] = inst->Src[1].Register.SwizzleY;
2589 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2590 swizzles[3] = inst->Src[1].Register.SwizzleW;
2591
2592 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2593 }
2594 }
2595
2596 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2597 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2598 const struct tgsi_full_instruction *inst,
2599 LLVMValueRef *sizes_out,
2600 boolean is_sviewinfo)
2601 {
2602 LLVMValueRef explicit_lod;
2603 enum lp_sampler_lod_property lod_property;
2604 unsigned has_lod;
2605 unsigned i;
2606 unsigned unit = inst->Src[1].Register.Index;
2607 unsigned target, pipe_target;
2608 struct lp_sampler_size_query_params params;
2609
2610 if (is_sviewinfo) {
2611 target = bld->sv[unit].Resource;
2612 }
2613 else {
2614 target = inst->Texture.Texture;
2615 }
2616 switch (target) {
2617 case TGSI_TEXTURE_BUFFER:
2618 case TGSI_TEXTURE_RECT:
2619 case TGSI_TEXTURE_SHADOWRECT:
2620 has_lod = 0;
2621 break;
2622 default:
2623 has_lod = 1;
2624 break;
2625 }
2626
2627 if (!bld->sampler) {
2628 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2629 for (i = 0; i < 4; i++)
2630 sizes_out[i] = bld->bld_base.int_bld.undef;
2631 return;
2632 }
2633
2634 if (has_lod) {
2635 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2636 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2637 }
2638 else {
2639 explicit_lod = NULL;
2640 lod_property = LP_SAMPLER_LOD_SCALAR;
2641 }
2642
2643
2644 pipe_target = tgsi_to_pipe_tex_target(target);
2645
2646 params.int_type = bld->bld_base.int_bld.type;
2647 params.texture_unit = unit;
2648 params.target = pipe_target;
2649 params.context_ptr = bld->context_ptr;
2650 params.is_sviewinfo = TRUE;
2651 params.lod_property = lod_property;
2652 params.explicit_lod = explicit_lod;
2653 params.sizes_out = sizes_out;
2654 params.samples_only = false;
2655
2656 bld->sampler->emit_size_query(bld->sampler,
2657 bld->bld_base.base.gallivm,
2658 ¶ms);
2659 }
2660
2661 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2662 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2663 int pc)
2664 {
2665 unsigned i;
2666
2667 for (i = 0; i < 5; i++) {
2668 enum tgsi_opcode opcode;
2669
2670 if (pc + i >= bld->bld_base.info->num_instructions)
2671 return TRUE;
2672
2673 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2674
2675 if (opcode == TGSI_OPCODE_END)
2676 return TRUE;
2677
2678 if (opcode == TGSI_OPCODE_TEX ||
2679 opcode == TGSI_OPCODE_TXP ||
2680 opcode == TGSI_OPCODE_TXD ||
2681 opcode == TGSI_OPCODE_TXB ||
2682 opcode == TGSI_OPCODE_TXL ||
2683 opcode == TGSI_OPCODE_TXF ||
2684 opcode == TGSI_OPCODE_TXQ ||
2685 opcode == TGSI_OPCODE_TEX2 ||
2686 opcode == TGSI_OPCODE_TXB2 ||
2687 opcode == TGSI_OPCODE_TXL2 ||
2688 opcode == TGSI_OPCODE_SAMPLE ||
2689 opcode == TGSI_OPCODE_SAMPLE_B ||
2690 opcode == TGSI_OPCODE_SAMPLE_C ||
2691 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2692 opcode == TGSI_OPCODE_SAMPLE_D ||
2693 opcode == TGSI_OPCODE_SAMPLE_I ||
2694 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2695 opcode == TGSI_OPCODE_SAMPLE_L ||
2696 opcode == TGSI_OPCODE_SVIEWINFO ||
2697 opcode == TGSI_OPCODE_CAL ||
2698 opcode == TGSI_OPCODE_IF ||
2699 opcode == TGSI_OPCODE_UIF ||
2700 opcode == TGSI_OPCODE_BGNLOOP ||
2701 opcode == TGSI_OPCODE_SWITCH)
2702 return FALSE;
2703 }
2704
2705 return TRUE;
2706 }
2707
2708
2709
2710 /**
2711 * Kill fragment if any of the src register values are negative.
2712 */
2713 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2714 emit_kill_if(
2715 struct lp_build_tgsi_soa_context *bld,
2716 const struct tgsi_full_instruction *inst,
2717 int pc)
2718 {
2719 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2720 const struct tgsi_full_src_register *reg = &inst->Src[0];
2721 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2722 LLVMValueRef mask;
2723 unsigned chan_index;
2724
2725 memset(&terms, 0, sizeof terms);
2726
2727 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2728 unsigned swizzle;
2729
2730 /* Unswizzle channel */
2731 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2732
2733 /* Check if the component has not been already tested. */
2734 assert(swizzle < TGSI_NUM_CHANNELS);
2735 if( !terms[swizzle] )
2736 /* TODO: change the comparison operator instead of setting the sign */
2737 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2738 }
2739
2740 mask = NULL;
2741 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2742 if(terms[chan_index]) {
2743 LLVMValueRef chan_mask;
2744
2745 /*
2746 * If term < 0 then mask = 0 else mask = ~0.
2747 */
2748 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2749
2750 if(mask)
2751 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2752 else
2753 mask = chan_mask;
2754 }
2755 }
2756
2757 if (bld->exec_mask.has_mask) {
2758 LLVMValueRef invmask;
2759 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2760 mask = LLVMBuildOr(builder, mask, invmask, "");
2761 }
2762
2763 lp_build_mask_update(bld->mask, mask);
2764 if (!near_end_of_shader(bld, pc))
2765 lp_build_mask_check(bld->mask);
2766 }
2767
2768
2769 /**
2770 * Unconditional fragment kill.
2771 * The only predication is the execution mask which will apply if
2772 * we're inside a loop or conditional.
2773 */
2774 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2775 emit_kill(struct lp_build_tgsi_soa_context *bld,
2776 int pc)
2777 {
2778 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2779 LLVMValueRef mask;
2780
2781 /* For those channels which are "alive", disable fragment shader
2782 * execution.
2783 */
2784 if (bld->exec_mask.has_mask) {
2785 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2786 }
2787 else {
2788 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2789 mask = zero;
2790 }
2791
2792 lp_build_mask_update(bld->mask, mask);
2793
2794 if (!near_end_of_shader(bld, pc))
2795 lp_build_mask_check(bld->mask);
2796 }
2797
2798
2799 /**
2800 * Emit code which will dump the value of all the temporary registers
2801 * to stdout.
2802 */
2803 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2804 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2805 unsigned file)
2806 {
2807 const struct tgsi_shader_info *info = bld->bld_base.info;
2808 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2809 LLVMBuilderRef builder = gallivm->builder;
2810 LLVMValueRef reg_ptr;
2811 int index;
2812 int max_index = info->file_max[file];
2813
2814 /*
2815 * Some register files, particularly constants, can be very large,
2816 * and dumping everything could make this unusably slow.
2817 */
2818 max_index = MIN2(max_index, 32);
2819
2820 for (index = 0; index <= max_index; index++) {
2821 LLVMValueRef res;
2822 unsigned mask;
2823 int chan;
2824
2825 if (index < 8 * sizeof(unsigned) &&
2826 (info->file_mask[file] & (1u << index)) == 0) {
2827 /* This was not declared.*/
2828 continue;
2829 }
2830
2831 if (file == TGSI_FILE_INPUT) {
2832 mask = info->input_usage_mask[index];
2833 } else {
2834 mask = TGSI_WRITEMASK_XYZW;
2835 }
2836
2837 for (chan = 0; chan < 4; chan++) {
2838 if ((mask & (1 << chan)) == 0) {
2839 /* This channel is not used.*/
2840 continue;
2841 }
2842
2843 if (file == TGSI_FILE_CONSTANT) {
2844 struct tgsi_full_src_register reg;
2845 memset(®, 0, sizeof reg);
2846 reg.Register.File = file;
2847 reg.Register.Index = index;
2848 reg.Register.SwizzleX = 0;
2849 reg.Register.SwizzleY = 1;
2850 reg.Register.SwizzleZ = 2;
2851 reg.Register.SwizzleW = 3;
2852
2853 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
2854 if (!res) {
2855 continue;
2856 }
2857 } else if (file == TGSI_FILE_INPUT) {
2858 res = bld->inputs[index][chan];
2859 if (!res) {
2860 continue;
2861 }
2862 } else if (file == TGSI_FILE_TEMPORARY) {
2863 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2864 assert(reg_ptr);
2865 res = LLVMBuildLoad(builder, reg_ptr, "");
2866 } else if (file == TGSI_FILE_OUTPUT) {
2867 reg_ptr = lp_get_output_ptr(bld, index, chan);
2868 assert(reg_ptr);
2869 res = LLVMBuildLoad(builder, reg_ptr, "");
2870 } else {
2871 assert(0);
2872 continue;
2873 }
2874
2875 emit_dump_reg(gallivm, file, index, chan, res);
2876 }
2877 }
2878 }
2879
2880
2881
2882 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2883 lp_emit_declaration_soa(
2884 struct lp_build_tgsi_context *bld_base,
2885 const struct tgsi_full_declaration *decl)
2886 {
2887 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2888 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2889 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2890 const unsigned first = decl->Range.First;
2891 const unsigned last = decl->Range.Last;
2892 unsigned idx, i;
2893
2894 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2895
2896 switch (decl->Declaration.File) {
2897 case TGSI_FILE_TEMPORARY:
2898 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2899 assert(last < LP_MAX_INLINED_TEMPS);
2900 for (idx = first; idx <= last; ++idx) {
2901 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2902 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2903 }
2904 }
2905 break;
2906
2907 case TGSI_FILE_OUTPUT:
2908 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2909 for (idx = first; idx <= last; ++idx) {
2910 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2911 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2912 vec_type, "output");
2913 }
2914 }
2915 break;
2916
2917 case TGSI_FILE_ADDRESS:
2918 /* ADDR registers are only allocated with an integer LLVM IR type,
2919 * as they are guaranteed to always have integers.
2920 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2921 * an ADDR register for that matter).
2922 */
2923 assert(last < LP_MAX_TGSI_ADDRS);
2924 for (idx = first; idx <= last; ++idx) {
2925 assert(idx < LP_MAX_TGSI_ADDRS);
2926 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2927 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2928 }
2929 break;
2930
2931 case TGSI_FILE_SAMPLER_VIEW:
2932 /*
2933 * The target stored here MUST match whatever there actually
2934 * is in the set sampler views (what about return type?).
2935 */
2936 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2937 for (idx = first; idx <= last; ++idx) {
2938 bld->sv[idx] = decl->SamplerView;
2939 }
2940 break;
2941
2942 case TGSI_FILE_CONSTANT:
2943 {
2944 /*
2945 * We could trivially fetch the per-buffer pointer when fetching the
2946 * constant, relying on llvm to figure out it's always the same pointer
2947 * anyway. However, doing so results in a huge (more than factor of 10)
2948 * slowdown in llvm compilation times for some (but not all) shaders
2949 * (more specifically, the IR optimization spends way more time in
2950 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2951 */
2952 unsigned idx2D = decl->Dim.Index2D;
2953 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2954 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2955 bld->consts[idx2D] =
2956 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2957 bld->consts_sizes[idx2D] =
2958 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2959 }
2960 break;
2961 case TGSI_FILE_BUFFER:
2962 {
2963 unsigned idx = decl->Range.First;
2964 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2965 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2966 bld->ssbos[idx] =
2967 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2968 bld->ssbo_sizes[idx] =
2969 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2970
2971 }
2972 break;
2973 case TGSI_FILE_MEMORY:
2974 break;
2975 default:
2976 /* don't need to declare other vars */
2977 break;
2978 }
2979 }
2980
2981
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)2982 void lp_emit_immediate_soa(
2983 struct lp_build_tgsi_context *bld_base,
2984 const struct tgsi_full_immediate *imm)
2985 {
2986 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2987 struct gallivm_state * gallivm = bld_base->base.gallivm;
2988 LLVMValueRef imms[4];
2989 unsigned i;
2990 const uint size = imm->Immediate.NrTokens - 1;
2991 assert(size <= 4);
2992 switch (imm->Immediate.DataType) {
2993 case TGSI_IMM_FLOAT32:
2994 for( i = 0; i < size; ++i )
2995 imms[i] =
2996 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2997
2998 break;
2999 case TGSI_IMM_FLOAT64:
3000 case TGSI_IMM_UINT64:
3001 case TGSI_IMM_INT64:
3002 case TGSI_IMM_UINT32:
3003 for( i = 0; i < size; ++i ) {
3004 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3005 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3006 }
3007
3008 break;
3009 case TGSI_IMM_INT32:
3010 for( i = 0; i < size; ++i ) {
3011 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3012 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3013 }
3014
3015 break;
3016 }
3017 for( i = size; i < 4; ++i )
3018 imms[i] = bld_base->base.undef;
3019
3020 if (bld->use_immediates_array) {
3021 unsigned index = bld->num_immediates;
3022 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3023 LLVMBuilderRef builder = gallivm->builder;
3024 LLVMValueRef gep[2];
3025 gep[0] = lp_build_const_int32(gallivm, 0);
3026
3027 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3028 for (i = 0; i < 4; ++i ) {
3029 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3030 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3031 bld->imms_array, gep, 2, "");
3032 LLVMBuildStore(builder, imms[i], imm_ptr);
3033 }
3034 } else {
3035 /* simply copy the immediate values into the next immediates[] slot */
3036 unsigned i;
3037 assert(imm->Immediate.NrTokens - 1 <= 4);
3038 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3039
3040 for(i = 0; i < 4; ++i )
3041 bld->immediates[bld->num_immediates][i] = imms[i];
3042
3043 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3044 unsigned index = bld->num_immediates;
3045 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3046 LLVMBuilderRef builder = gallivm->builder;
3047 LLVMValueRef gep[2];
3048 gep[0] = lp_build_const_int32(gallivm, 0);
3049 for (i = 0; i < 4; ++i ) {
3050 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3051 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3052 bld->imms_array, gep, 2, "");
3053 LLVMBuildStore(builder,
3054 bld->immediates[index][i],
3055 imm_ptr);
3056 }
3057 }
3058 }
3059
3060 bld->num_immediates++;
3061 }
3062
3063 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3064 ddx_emit(
3065 const struct lp_build_tgsi_action * action,
3066 struct lp_build_tgsi_context * bld_base,
3067 struct lp_build_emit_data * emit_data)
3068 {
3069 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3070
3071 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3072 &emit_data->output[emit_data->chan], NULL);
3073 }
3074
3075 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3076 ddy_emit(
3077 const struct lp_build_tgsi_action * action,
3078 struct lp_build_tgsi_context * bld_base,
3079 struct lp_build_emit_data * emit_data)
3080 {
3081 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3082
3083 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3084 &emit_data->output[emit_data->chan]);
3085 }
3086
3087 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3088 kill_emit(
3089 const struct lp_build_tgsi_action * action,
3090 struct lp_build_tgsi_context * bld_base,
3091 struct lp_build_emit_data * emit_data)
3092 {
3093 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094
3095 emit_kill(bld, bld_base->pc - 1);
3096 }
3097
3098 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3099 kill_if_emit(
3100 const struct lp_build_tgsi_action * action,
3101 struct lp_build_tgsi_context * bld_base,
3102 struct lp_build_emit_data * emit_data)
3103 {
3104 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3105
3106 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3107 }
3108
3109 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3110 tex_emit(
3111 const struct lp_build_tgsi_action * action,
3112 struct lp_build_tgsi_context * bld_base,
3113 struct lp_build_emit_data * emit_data)
3114 {
3115 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3116
3117 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3118 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3119 }
3120
3121 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3122 tex2_emit(
3123 const struct lp_build_tgsi_action * action,
3124 struct lp_build_tgsi_context * bld_base,
3125 struct lp_build_emit_data * emit_data)
3126 {
3127 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3128
3129 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3130 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3131 }
3132
3133 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3134 txb_emit(
3135 const struct lp_build_tgsi_action * action,
3136 struct lp_build_tgsi_context * bld_base,
3137 struct lp_build_emit_data * emit_data)
3138 {
3139 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3140
3141 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3142 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3143 }
3144
3145 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3146 txb2_emit(
3147 const struct lp_build_tgsi_action * action,
3148 struct lp_build_tgsi_context * bld_base,
3149 struct lp_build_emit_data * emit_data)
3150 {
3151 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3152
3153 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3154 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3155 }
3156
3157 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3158 txd_emit(
3159 const struct lp_build_tgsi_action * action,
3160 struct lp_build_tgsi_context * bld_base,
3161 struct lp_build_emit_data * emit_data)
3162 {
3163 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3164
3165 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3166 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3167 }
3168
3169 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3170 txl_emit(
3171 const struct lp_build_tgsi_action * action,
3172 struct lp_build_tgsi_context * bld_base,
3173 struct lp_build_emit_data * emit_data)
3174 {
3175 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3176
3177 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3178 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3179 }
3180
3181 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3182 txl2_emit(
3183 const struct lp_build_tgsi_action * action,
3184 struct lp_build_tgsi_context * bld_base,
3185 struct lp_build_emit_data * emit_data)
3186 {
3187 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3188
3189 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3190 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3191 }
3192
3193 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3194 txp_emit(
3195 const struct lp_build_tgsi_action * action,
3196 struct lp_build_tgsi_context * bld_base,
3197 struct lp_build_emit_data * emit_data)
3198 {
3199 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3200
3201 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3202 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3203 }
3204
3205 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3206 tg4_emit(
3207 const struct lp_build_tgsi_action * action,
3208 struct lp_build_tgsi_context * bld_base,
3209 struct lp_build_emit_data * emit_data)
3210 {
3211 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3212
3213 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3214 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3215 }
3216
3217 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3218 lodq_emit(
3219 const struct lp_build_tgsi_action * action,
3220 struct lp_build_tgsi_context * bld_base,
3221 struct lp_build_emit_data * emit_data)
3222 {
3223 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3224
3225 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3226 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3227 }
3228
3229 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3230 txq_emit(
3231 const struct lp_build_tgsi_action * action,
3232 struct lp_build_tgsi_context * bld_base,
3233 struct lp_build_emit_data * emit_data)
3234 {
3235 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3236
3237 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3238 }
3239
3240 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3241 txf_emit(
3242 const struct lp_build_tgsi_action * action,
3243 struct lp_build_tgsi_context * bld_base,
3244 struct lp_build_emit_data * emit_data)
3245 {
3246 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3247
3248 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3249 }
3250
3251 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3252 sample_i_emit(
3253 const struct lp_build_tgsi_action * action,
3254 struct lp_build_tgsi_context * bld_base,
3255 struct lp_build_emit_data * emit_data)
3256 {
3257 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3258
3259 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3260 }
3261
3262 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3263 sample_emit(
3264 const struct lp_build_tgsi_action * action,
3265 struct lp_build_tgsi_context * bld_base,
3266 struct lp_build_emit_data * emit_data)
3267 {
3268 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3269
3270 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3271 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3272 }
3273
3274 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3275 sample_b_emit(
3276 const struct lp_build_tgsi_action * action,
3277 struct lp_build_tgsi_context * bld_base,
3278 struct lp_build_emit_data * emit_data)
3279 {
3280 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3281
3282 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3283 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3284 }
3285
3286 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3287 sample_c_emit(
3288 const struct lp_build_tgsi_action * action,
3289 struct lp_build_tgsi_context * bld_base,
3290 struct lp_build_emit_data * emit_data)
3291 {
3292 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3293
3294 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3295 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3296 }
3297
3298 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3299 sample_c_lz_emit(
3300 const struct lp_build_tgsi_action * action,
3301 struct lp_build_tgsi_context * bld_base,
3302 struct lp_build_emit_data * emit_data)
3303 {
3304 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3305
3306 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3307 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3308 }
3309
3310 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3311 sample_d_emit(
3312 const struct lp_build_tgsi_action * action,
3313 struct lp_build_tgsi_context * bld_base,
3314 struct lp_build_emit_data * emit_data)
3315 {
3316 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3317
3318 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3319 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3320 }
3321
3322 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3323 sample_l_emit(
3324 const struct lp_build_tgsi_action * action,
3325 struct lp_build_tgsi_context * bld_base,
3326 struct lp_build_emit_data * emit_data)
3327 {
3328 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3329
3330 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3331 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3332 }
3333
3334 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3335 gather4_emit(
3336 const struct lp_build_tgsi_action * action,
3337 struct lp_build_tgsi_context * bld_base,
3338 struct lp_build_emit_data * emit_data)
3339 {
3340 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3341
3342 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3343 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3344 }
3345
3346 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3347 sviewinfo_emit(
3348 const struct lp_build_tgsi_action * action,
3349 struct lp_build_tgsi_context * bld_base,
3350 struct lp_build_emit_data * emit_data)
3351 {
3352 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3353
3354 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3355 }
3356
3357 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3358 lod_emit(
3359 const struct lp_build_tgsi_action * action,
3360 struct lp_build_tgsi_context * bld_base,
3361 struct lp_build_emit_data * emit_data)
3362 {
3363 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3364
3365 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3366 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3367 }
3368
target_to_dims_layer(unsigned target,unsigned * dims,unsigned * layer_coord)3369 static void target_to_dims_layer(unsigned target,
3370 unsigned *dims,
3371 unsigned *layer_coord)
3372 {
3373 *layer_coord = 0;
3374 switch (target) {
3375 case TGSI_TEXTURE_1D:
3376 case TGSI_TEXTURE_BUFFER:
3377 *dims = 1;
3378 break;
3379 case TGSI_TEXTURE_1D_ARRAY:
3380 *layer_coord = 1;
3381 *dims = 1;
3382 break;
3383 case TGSI_TEXTURE_2D:
3384 case TGSI_TEXTURE_RECT:
3385 *dims = 2;
3386 break;
3387 case TGSI_TEXTURE_2D_ARRAY:
3388 *layer_coord = 2;
3389 *dims = 2;
3390 break;
3391 case TGSI_TEXTURE_3D:
3392 case TGSI_TEXTURE_CUBE:
3393 case TGSI_TEXTURE_CUBE_ARRAY:
3394 *dims = 3;
3395 break;
3396 default:
3397 assert(0);
3398 *dims = 0;
3399 return;
3400 }
3401 }
3402
3403 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3404 img_load_emit(
3405 const struct lp_build_tgsi_action * action,
3406 struct lp_build_tgsi_context * bld_base,
3407 struct lp_build_emit_data * emit_data)
3408 {
3409 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3410 struct lp_img_params params;
3411 LLVMValueRef coords[5];
3412 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3413 unsigned dims;
3414 unsigned target = emit_data->inst->Memory.Texture;
3415 unsigned layer_coord;
3416
3417 target_to_dims_layer(target, &dims, &layer_coord);
3418
3419 for (unsigned i = 0; i < dims; i++) {
3420 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3421 }
3422 for (unsigned i = dims; i < 5; i++) {
3423 coords[i] = coord_undef;
3424 }
3425 if (layer_coord)
3426 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3427
3428 memset(¶ms, 0, sizeof(params));
3429
3430 params.type = bld->bld_base.base.type;
3431 params.context_ptr = bld->context_ptr;
3432 params.thread_data_ptr = bld->thread_data_ptr;
3433 params.coords = coords;
3434 params.outdata = emit_data->output;
3435 params.target = tgsi_to_pipe_tex_target(target);
3436 params.image_index = emit_data->inst->Src[0].Register.Index;
3437 params.img_op = LP_IMG_LOAD;
3438 bld->image->emit_op(bld->image,
3439 bld->bld_base.base.gallivm,
3440 ¶ms);
3441 }
3442
3443 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3444 load_emit(
3445 const struct lp_build_tgsi_action * action,
3446 struct lp_build_tgsi_context * bld_base,
3447 struct lp_build_emit_data * emit_data)
3448 {
3449 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3450 struct gallivm_state * gallivm = bld_base->base.gallivm;
3451 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3452 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3453 unsigned buf = bufreg->Register.Index;
3454 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3455 bufreg->Register.File == TGSI_FILE_IMAGE ||
3456 bufreg->Register.File == TGSI_FILE_MEMORY ||
3457 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3458 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3459 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3460
3461 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3462 img_load_emit(action, bld_base, emit_data);
3463 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3464 LLVMValueRef consts_ptr = bld->consts[buf];
3465 LLVMValueRef num_consts = bld->consts_sizes[buf];
3466
3467 LLVMValueRef indirect_index;
3468 LLVMValueRef overflow_mask;
3469
3470 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3471 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3472
3473 /* All fetches are from the same constant buffer, so
3474 * we need to propagate the size to a vector to do a
3475 * vector comparison */
3476 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3477
3478 /* Gather values from the constant buffer */
3479 unsigned chan_index;
3480 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3481 /* Construct a boolean vector telling us which channels
3482 * overflow the bound constant buffer */
3483 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3484 indirect_index, num_consts);
3485
3486 /* index_vec = indirect_index * 4 */
3487 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3488 index_vec = lp_build_add(uint_bld, index_vec,
3489 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3490
3491 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3492 }
3493 } else if (0) {
3494 /* for indirect support with ARB_gpu_shader5 */
3495 } else {
3496 LLVMValueRef index;
3497 LLVMValueRef scalar, scalar_ptr;
3498 unsigned chan_index;
3499
3500 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3501 index = lp_build_shr_imm(uint_bld, index, 2);
3502
3503 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3504
3505 LLVMValueRef ssbo_limit = NULL;
3506
3507 if (!is_shared) {
3508 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3509 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3510 }
3511
3512 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3513 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3514
3515 LLVMValueRef exec_mask = mask_vec(bld_base);
3516 if (!is_shared) {
3517 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3518 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3519 }
3520
3521 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3522 struct lp_build_loop_state loop_state;
3523 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3524
3525 struct lp_build_if_state ifthen;
3526 LLVMValueRef cond, temp_res;
3527
3528 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3529 loop_state.counter, "");
3530
3531 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3532 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3533
3534 lp_build_if(&ifthen, gallivm, cond);
3535 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3536
3537 temp_res = LLVMBuildLoad(builder, result, "");
3538 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3539 LLVMBuildStore(builder, temp_res, result);
3540 lp_build_else(&ifthen);
3541 temp_res = LLVMBuildLoad(builder, result, "");
3542 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3543 LLVMBuildStore(builder, temp_res, result);
3544 lp_build_endif(&ifthen);
3545 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3546 NULL, LLVMIntUGE);
3547 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3548 }
3549 }
3550 }
3551
3552 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3553 img_store_emit(
3554 const struct lp_build_tgsi_action * action,
3555 struct lp_build_tgsi_context * bld_base,
3556 struct lp_build_emit_data * emit_data)
3557 {
3558 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3559 struct lp_img_params params;
3560 LLVMValueRef coords[5];
3561 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3562 unsigned dims;
3563 unsigned target = emit_data->inst->Memory.Texture;
3564 unsigned layer_coord;
3565
3566 target_to_dims_layer(target, &dims, &layer_coord);
3567 for (unsigned i = 0; i < dims; i++) {
3568 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3569 }
3570 for (unsigned i = dims; i < 5; i++) {
3571 coords[i] = coord_undef;
3572 }
3573 if (layer_coord)
3574 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3575 memset(¶ms, 0, sizeof(params));
3576
3577 params.type = bld->bld_base.base.type;
3578 params.context_ptr = bld->context_ptr;
3579 params.thread_data_ptr = bld->thread_data_ptr;
3580 params.coords = coords;
3581 params.outdata = NULL;
3582 params.exec_mask = mask_vec(bld_base);
3583 params.target = tgsi_to_pipe_tex_target(target);
3584 params.image_index = emit_data->inst->Dst[0].Register.Index;
3585 params.img_op = LP_IMG_STORE;
3586 for (unsigned i = 0; i < 4; i++)
3587 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3588
3589 bld->image->emit_op(bld->image,
3590 bld->bld_base.base.gallivm,
3591 ¶ms);
3592 }
3593
3594 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3595 store_emit(
3596 const struct lp_build_tgsi_action * action,
3597 struct lp_build_tgsi_context * bld_base,
3598 struct lp_build_emit_data * emit_data)
3599 {
3600 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3601 struct gallivm_state * gallivm = bld_base->base.gallivm;
3602 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3603 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3604 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3605 unsigned buf = bufreg->Register.Index;
3606 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3607 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3608
3609 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3610 img_store_emit(action, bld_base, emit_data);
3611 } else if (0) {
3612
3613 } else {
3614 LLVMValueRef index; /* index into the const buffer */
3615 LLVMValueRef scalar_ptr;
3616 LLVMValueRef value;
3617 unsigned chan_index;
3618
3619 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3620 index = lp_build_shr_imm(uint_bld, index, 2);
3621
3622 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3623
3624 LLVMValueRef ssbo_limit = NULL;
3625
3626 if (!is_shared) {
3627 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3628 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3629 }
3630
3631 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3632 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3633
3634 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3635
3636 LLVMValueRef exec_mask = mask_vec(bld_base);
3637 if (!is_shared) {
3638 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3639 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3640 }
3641
3642 struct lp_build_loop_state loop_state;
3643 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3644
3645 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3646 loop_state.counter, "");
3647 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3648
3649 struct lp_build_if_state ifthen;
3650 LLVMValueRef cond;
3651
3652 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3653 loop_state.counter, "");
3654
3655 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3656 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3657 lp_build_if(&ifthen, gallivm, cond);
3658
3659 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3660
3661 lp_build_endif(&ifthen);
3662 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3663 NULL, LLVMIntUGE);
3664 }
3665 }
3666 }
3667
3668 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3669 resq_emit(
3670 const struct lp_build_tgsi_action * action,
3671 struct lp_build_tgsi_context * bld_base,
3672 struct lp_build_emit_data * emit_data)
3673 {
3674 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3675 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3676 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3677
3678 unsigned buf = bufreg->Register.Index;
3679 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3680
3681 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3682 unsigned target = emit_data->inst->Memory.Texture;
3683 struct lp_sampler_size_query_params params = { 0 };
3684 params.int_type = bld->bld_base.int_bld.type;
3685 params.texture_unit = buf;
3686 params.target = tgsi_to_pipe_tex_target(target);
3687 params.context_ptr = bld->context_ptr;
3688 params.sizes_out = emit_data->output;
3689
3690 bld->image->emit_size_query(bld->image,
3691 bld->bld_base.base.gallivm,
3692 ¶ms);
3693 } else {
3694 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3695
3696 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3697 }
3698 }
3699
3700 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3701 img_atomic_emit(
3702 const struct lp_build_tgsi_action * action,
3703 struct lp_build_tgsi_context * bld_base,
3704 struct lp_build_emit_data * emit_data,
3705 LLVMAtomicRMWBinOp op)
3706 {
3707 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3708 struct lp_img_params params;
3709 LLVMValueRef coords[5];
3710 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3711 unsigned dims;
3712 unsigned layer_coord;
3713 unsigned target = emit_data->inst->Memory.Texture;
3714
3715 target_to_dims_layer(target, &dims, &layer_coord);
3716
3717 for (unsigned i = 0; i < dims; i++) {
3718 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3719 }
3720 for (unsigned i = dims; i < 5; i++) {
3721 coords[i] = coord_undef;
3722 }
3723 if (layer_coord)
3724 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3725 memset(¶ms, 0, sizeof(params));
3726
3727 params.type = bld->bld_base.base.type;
3728 params.context_ptr = bld->context_ptr;
3729 params.thread_data_ptr = bld->thread_data_ptr;
3730 params.exec_mask = mask_vec(bld_base);
3731 params.image_index = emit_data->inst->Src[0].Register.Index;
3732 params.coords = coords;
3733 params.target = tgsi_to_pipe_tex_target(target);
3734 params.op = op;
3735 params.outdata = emit_data->output;
3736 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3737
3738 for (unsigned i = 0; i < 4; i++)
3739 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3740 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3741 for (unsigned i = 0; i < 4; i++)
3742 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3743 }
3744 bld->image->emit_op(bld->image,
3745 bld->bld_base.base.gallivm,
3746 ¶ms);
3747 }
3748
3749 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3750 atomic_emit(
3751 const struct lp_build_tgsi_action * action,
3752 struct lp_build_tgsi_context * bld_base,
3753 struct lp_build_emit_data * emit_data)
3754 {
3755 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3756 struct gallivm_state * gallivm = bld_base->base.gallivm;
3757 LLVMBuilderRef builder = gallivm->builder;
3758 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3759 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3760
3761 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3762 unsigned buf = bufreg->Register.Index;
3763 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3764
3765 LLVMAtomicRMWBinOp op = -1;
3766 switch (emit_data->inst->Instruction.Opcode) {
3767 case TGSI_OPCODE_ATOMUADD:
3768 op = LLVMAtomicRMWBinOpAdd;
3769 break;
3770 case TGSI_OPCODE_ATOMXCHG:
3771 op = LLVMAtomicRMWBinOpXchg;
3772 break;
3773 case TGSI_OPCODE_ATOMAND:
3774 op = LLVMAtomicRMWBinOpAnd;
3775 break;
3776 case TGSI_OPCODE_ATOMOR:
3777 op = LLVMAtomicRMWBinOpOr;
3778 break;
3779 case TGSI_OPCODE_ATOMXOR:
3780 op = LLVMAtomicRMWBinOpXor;
3781 break;
3782 case TGSI_OPCODE_ATOMUMIN:
3783 op = LLVMAtomicRMWBinOpUMin;
3784 break;
3785 case TGSI_OPCODE_ATOMUMAX:
3786 op = LLVMAtomicRMWBinOpUMax;
3787 break;
3788 case TGSI_OPCODE_ATOMIMIN:
3789 op = LLVMAtomicRMWBinOpMin;
3790 break;
3791 case TGSI_OPCODE_ATOMIMAX:
3792 op = LLVMAtomicRMWBinOpMax;
3793 break;
3794 case TGSI_OPCODE_ATOMCAS:
3795 break;
3796 default:
3797 assert(0);
3798 return;
3799 }
3800
3801 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3802 img_atomic_emit(action, bld_base, emit_data, op);
3803 } else if (0) {
3804 } else {
3805 LLVMValueRef index; /* index into the const buffer */
3806 LLVMValueRef scalar, scalar_ptr;
3807 LLVMValueRef value;
3808
3809 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3810 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3811
3812 index = lp_build_shr_imm(uint_bld, index, 2);
3813
3814 if (!is_shared) {
3815 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3816 scalar_ptr = bld->ssbos[buf];
3817 } else
3818 scalar_ptr = bld->shared_ptr;
3819
3820 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3821 uint_bld->vec_type, "");
3822
3823 LLVMValueRef ssbo_limit;
3824 if (!is_shared) {
3825 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3826 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3827 }
3828
3829 LLVMValueRef exec_mask = mask_vec(bld_base);
3830
3831 if (!is_shared) {
3832 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3833 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3834 }
3835
3836 struct lp_build_loop_state loop_state;
3837 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3838
3839 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3840 loop_state.counter, "");
3841 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3842
3843 index = LLVMBuildExtractElement(gallivm->builder, index,
3844 loop_state.counter, "");
3845
3846 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3847 &index, 1, "");
3848
3849 struct lp_build_if_state ifthen;
3850 LLVMValueRef cond, temp_res;
3851
3852 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3853 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3854 lp_build_if(&ifthen, gallivm, cond);
3855
3856 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3857 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3858 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3859 loop_state.counter, "");
3860 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3861 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3862 cas_src_ptr,
3863 LLVMAtomicOrderingSequentiallyConsistent,
3864 LLVMAtomicOrderingSequentiallyConsistent,
3865 false);
3866 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3867 } else {
3868 scalar = LLVMBuildAtomicRMW(builder, op,
3869 scalar_ptr, value_ptr,
3870 LLVMAtomicOrderingSequentiallyConsistent,
3871 false);
3872 }
3873 temp_res = LLVMBuildLoad(builder, atom_res, "");
3874 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3875 LLVMBuildStore(builder, temp_res, atom_res);
3876 lp_build_else(&ifthen);
3877 temp_res = LLVMBuildLoad(builder, atom_res, "");
3878 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3879 LLVMBuildStore(builder, temp_res, atom_res);
3880 lp_build_endif(&ifthen);
3881
3882 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3883 NULL, LLVMIntUGE);
3884 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3885 }
3886 }
3887
3888 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3889 barrier_emit(
3890 const struct lp_build_tgsi_action * action,
3891 struct lp_build_tgsi_context * bld_base,
3892 struct lp_build_emit_data * emit_data)
3893 {
3894 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3895 struct gallivm_state * gallivm = bld_base->base.gallivm;
3896
3897 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3898
3899 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3900 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3901 }
3902
3903 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3904 membar_emit(
3905 const struct lp_build_tgsi_action * action,
3906 struct lp_build_tgsi_context * bld_base,
3907 struct lp_build_emit_data * emit_data)
3908 {
3909 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3910 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3911 }
3912
3913 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3914 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3915 LLVMValueRef ptr,
3916 LLVMValueRef mask)
3917 {
3918 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3919 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3920
3921 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3922
3923 LLVMBuildStore(builder, current_vec, ptr);
3924 }
3925
3926 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3927 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3928 LLVMValueRef ptr,
3929 LLVMValueRef mask)
3930 {
3931 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3932 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3933
3934 current_vec = lp_build_select(&bld_base->uint_bld,
3935 mask,
3936 bld_base->uint_bld.zero,
3937 current_vec);
3938
3939 LLVMBuildStore(builder, current_vec, ptr);
3940 }
3941
3942 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3943 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3944 LLVMValueRef current_mask_vec,
3945 LLVMValueRef total_emitted_vertices_vec)
3946 {
3947 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3948 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3949 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3950 total_emitted_vertices_vec,
3951 bld->max_output_vertices_vec);
3952
3953 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3954 }
3955
3956 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3957 emit_vertex(
3958 const struct lp_build_tgsi_action * action,
3959 struct lp_build_tgsi_context * bld_base,
3960 struct lp_build_emit_data * emit_data)
3961 {
3962 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3963 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3964
3965 if (bld->gs_iface->emit_vertex) {
3966 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3967 TGSI_TYPE_UNSIGNED,
3968 emit_data->inst->Src[0].Register.SwizzleX);
3969 LLVMValueRef mask = mask_vec(bld_base);
3970 LLVMValueRef total_emitted_vertices_vec =
3971 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3972
3973 mask = clamp_mask_to_max_output_vertices(bld, mask,
3974 total_emitted_vertices_vec);
3975 gather_outputs(bld);
3976 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3977 bld->outputs,
3978 total_emitted_vertices_vec,
3979 mask,
3980 stream_id);
3981 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3982 mask);
3983 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3984 mask);
3985 #if DUMP_GS_EMITS
3986 lp_build_print_value(bld->bld_base.base.gallivm,
3987 " +++ emit vertex masked ones = ",
3988 mask);
3989 lp_build_print_value(bld->bld_base.base.gallivm,
3990 " +++ emit vertex emitted = ",
3991 total_emitted_vertices_vec);
3992 #endif
3993 }
3994 }
3995
3996
3997 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)3998 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3999 LLVMValueRef mask)
4000 {
4001 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4002 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4003
4004 if (bld->gs_iface->end_primitive) {
4005 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4006 LLVMValueRef emitted_vertices_vec =
4007 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4008 LLVMValueRef emitted_prims_vec =
4009 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4010 LLVMValueRef total_emitted_vertices_vec =
4011 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4012 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4013 emitted_vertices_vec,
4014 uint_bld->zero);
4015 /* We need to combine the current execution mask with the mask
4016 telling us which, if any, execution slots actually have
4017 unemitted primitives, this way we make sure that end_primitives
4018 executes only on the paths that have unflushed vertices */
4019 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4020
4021 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4022 total_emitted_vertices_vec,
4023 emitted_vertices_vec,
4024 emitted_prims_vec,
4025 mask_vec(bld_base), 0);
4026
4027 #if DUMP_GS_EMITS
4028 lp_build_print_value(bld->bld_base.base.gallivm,
4029 " +++ end prim masked ones = ",
4030 mask);
4031 lp_build_print_value(bld->bld_base.base.gallivm,
4032 " +++ end prim emitted verts1 = ",
4033 emitted_vertices_vec);
4034 lp_build_print_value(bld->bld_base.base.gallivm,
4035 " +++ end prim emitted prims1 = ",
4036 LLVMBuildLoad(builder,
4037 bld->emitted_prims_vec_ptr, ""));
4038 #endif
4039 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4040 mask);
4041 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4042 mask);
4043 #if DUMP_GS_EMITS
4044 lp_build_print_value(bld->bld_base.base.gallivm,
4045 " +++ end prim emitted verts2 = ",
4046 LLVMBuildLoad(builder,
4047 bld->emitted_vertices_vec_ptr, ""));
4048 #endif
4049 }
4050
4051 }
4052
4053 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4054 end_primitive(
4055 const struct lp_build_tgsi_action * action,
4056 struct lp_build_tgsi_context * bld_base,
4057 struct lp_build_emit_data * emit_data)
4058 {
4059 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4060
4061 if (bld->gs_iface->end_primitive) {
4062 LLVMValueRef mask = mask_vec(bld_base);
4063 end_primitive_masked(bld_base, mask);
4064 }
4065 }
4066
4067 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4068 barrier_emit_tcs(
4069 const struct lp_build_tgsi_action * action,
4070 struct lp_build_tgsi_context * bld_base,
4071 struct lp_build_emit_data * emit_data)
4072 {
4073 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4074
4075 if (bld->tcs_iface->emit_barrier) {
4076 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4077 }
4078 }
4079
4080
4081 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4082 cal_emit(
4083 const struct lp_build_tgsi_action * action,
4084 struct lp_build_tgsi_context * bld_base,
4085 struct lp_build_emit_data * emit_data)
4086 {
4087 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4088
4089 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4090 &bld_base->pc);
4091 }
4092
4093 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4094 ret_emit(
4095 const struct lp_build_tgsi_action * action,
4096 struct lp_build_tgsi_context * bld_base,
4097 struct lp_build_emit_data * emit_data)
4098 {
4099 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4100
4101 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4102 }
4103
4104 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4105 brk_emit(
4106 const struct lp_build_tgsi_action * action,
4107 struct lp_build_tgsi_context * bld_base,
4108 struct lp_build_emit_data * emit_data)
4109 {
4110 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4111
4112 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4113 }
4114
4115 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4116 if_emit(
4117 const struct lp_build_tgsi_action * action,
4118 struct lp_build_tgsi_context * bld_base,
4119 struct lp_build_emit_data * emit_data)
4120 {
4121 LLVMValueRef tmp;
4122 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4123
4124 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4125 emit_data->args[0], bld->bld_base.base.zero);
4126 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4127 }
4128
4129 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4130 uif_emit(
4131 const struct lp_build_tgsi_action * action,
4132 struct lp_build_tgsi_context * bld_base,
4133 struct lp_build_emit_data * emit_data)
4134 {
4135 LLVMValueRef tmp;
4136 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4137 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4138
4139 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4140 emit_data->args[0], uint_bld->zero);
4141 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4142 }
4143
4144 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4145 case_emit(
4146 const struct lp_build_tgsi_action * action,
4147 struct lp_build_tgsi_context * bld_base,
4148 struct lp_build_emit_data * emit_data)
4149 {
4150 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4151
4152 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4153 }
4154
4155 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4156 default_emit(
4157 const struct lp_build_tgsi_action * action,
4158 struct lp_build_tgsi_context * bld_base,
4159 struct lp_build_emit_data * emit_data)
4160 {
4161 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4162
4163 lp_exec_default(&bld->exec_mask, bld_base);
4164 }
4165
4166 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4167 switch_emit(
4168 const struct lp_build_tgsi_action * action,
4169 struct lp_build_tgsi_context * bld_base,
4170 struct lp_build_emit_data * emit_data)
4171 {
4172 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4173
4174 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4175 }
4176
4177 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4178 endswitch_emit(
4179 const struct lp_build_tgsi_action * action,
4180 struct lp_build_tgsi_context * bld_base,
4181 struct lp_build_emit_data * emit_data)
4182 {
4183 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4184
4185 lp_exec_endswitch(&bld->exec_mask, bld_base);
4186 }
4187
4188 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4189 bgnloop_emit(
4190 const struct lp_build_tgsi_action * action,
4191 struct lp_build_tgsi_context * bld_base,
4192 struct lp_build_emit_data * emit_data)
4193 {
4194 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4195
4196 lp_exec_bgnloop(&bld->exec_mask, true);
4197 }
4198
4199 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4200 bgnsub_emit(
4201 const struct lp_build_tgsi_action * action,
4202 struct lp_build_tgsi_context * bld_base,
4203 struct lp_build_emit_data * emit_data)
4204 {
4205 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4206
4207 lp_exec_mask_bgnsub(&bld->exec_mask);
4208 }
4209
4210 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4211 else_emit(
4212 const struct lp_build_tgsi_action * action,
4213 struct lp_build_tgsi_context * bld_base,
4214 struct lp_build_emit_data * emit_data)
4215 {
4216 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4217
4218 lp_exec_mask_cond_invert(&bld->exec_mask);
4219 }
4220
4221 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4222 endif_emit(
4223 const struct lp_build_tgsi_action * action,
4224 struct lp_build_tgsi_context * bld_base,
4225 struct lp_build_emit_data * emit_data)
4226 {
4227 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4228
4229 lp_exec_mask_cond_pop(&bld->exec_mask);
4230 }
4231
4232 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4233 endloop_emit(
4234 const struct lp_build_tgsi_action * action,
4235 struct lp_build_tgsi_context * bld_base,
4236 struct lp_build_emit_data * emit_data)
4237 {
4238 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4239
4240 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4241 }
4242
4243 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4244 endsub_emit(
4245 const struct lp_build_tgsi_action * action,
4246 struct lp_build_tgsi_context * bld_base,
4247 struct lp_build_emit_data * emit_data)
4248 {
4249 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4250
4251 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4252 }
4253
4254 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4255 cont_emit(
4256 const struct lp_build_tgsi_action * action,
4257 struct lp_build_tgsi_context * bld_base,
4258 struct lp_build_emit_data * emit_data)
4259 {
4260 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4261
4262 lp_exec_continue(&bld->exec_mask);
4263 }
4264
emit_prologue(struct lp_build_tgsi_context * bld_base)4265 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4266 {
4267 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4268 struct gallivm_state * gallivm = bld_base->base.gallivm;
4269
4270 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4271 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4272 bld->temps_array = lp_build_alloca_undef(gallivm,
4273 LLVMArrayType(bld_base->base.vec_type, array_size),
4274 "temp_array");
4275 }
4276
4277 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4278 LLVMValueRef array_size =
4279 lp_build_const_int32(gallivm,
4280 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4281 bld->outputs_array = lp_build_array_alloca(gallivm,
4282 bld_base->base.vec_type, array_size,
4283 "output_array");
4284 }
4285
4286 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4287 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4288 bld->imms_array = lp_build_alloca_undef(gallivm,
4289 LLVMArrayType(bld_base->base.vec_type, array_size),
4290 "imms_array");
4291 }
4292
4293 /* If we have indirect addressing in inputs we need to copy them into
4294 * our alloca array to be able to iterate over them */
4295 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4296 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4297 unsigned index, chan;
4298 LLVMTypeRef vec_type = bld_base->base.vec_type;
4299 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4300 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4301 bld->inputs_array = lp_build_array_alloca(gallivm,
4302 vec_type, array_size,
4303 "input_array");
4304
4305 assert(bld_base->info->num_inputs
4306 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4307
4308 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4309 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4310 LLVMValueRef lindex =
4311 lp_build_const_int32(gallivm, index * 4 + chan);
4312 LLVMValueRef input_ptr =
4313 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4314 &lindex, 1, "");
4315 LLVMValueRef value = bld->inputs[index][chan];
4316 if (value)
4317 LLVMBuildStore(gallivm->builder, value, input_ptr);
4318 }
4319 }
4320 }
4321
4322 if (bld->gs_iface) {
4323 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4324 bld->emitted_prims_vec_ptr =
4325 lp_build_alloca(gallivm,
4326 uint_bld->vec_type,
4327 "emitted_prims_ptr");
4328 bld->emitted_vertices_vec_ptr =
4329 lp_build_alloca(gallivm,
4330 uint_bld->vec_type,
4331 "emitted_vertices_ptr");
4332 bld->total_emitted_vertices_vec_ptr =
4333 lp_build_alloca(gallivm,
4334 uint_bld->vec_type,
4335 "total_emitted_vertices_ptr");
4336
4337 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4338 bld->emitted_prims_vec_ptr);
4339 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4340 bld->emitted_vertices_vec_ptr);
4341 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4342 bld->total_emitted_vertices_vec_ptr);
4343 }
4344
4345 if (DEBUG_EXECUTION) {
4346 lp_build_printf(gallivm, "\n");
4347 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4348 if (!bld->gs_iface)
4349 emit_dump_file(bld, TGSI_FILE_INPUT);
4350 }
4351 }
4352
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4353 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4354 {
4355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4356
4357 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4358 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4359 }
4360 }
4361
emit_epilogue(struct lp_build_tgsi_context * bld_base)4362 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4363 {
4364 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4365 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4366
4367 if (DEBUG_EXECUTION) {
4368 /* for debugging */
4369 if (0) {
4370 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4371 }
4372 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4373 lp_build_printf(bld_base->base.gallivm, "\n");
4374 }
4375
4376 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4377 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4378 }
4379
4380 /* If we have indirect addressing in outputs we need to copy our alloca array
4381 * to the outputs slots specified by the caller */
4382 if (bld->gs_iface) {
4383 LLVMValueRef total_emitted_vertices_vec;
4384 LLVMValueRef emitted_prims_vec;
4385 /* implicit end_primitives, needed in case there are any unflushed
4386 vertices in the cache. Note must not call end_primitive here
4387 since the exec_mask is not valid at this point. */
4388 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4389
4390 total_emitted_vertices_vec =
4391 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4392 emitted_prims_vec =
4393 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4394
4395 bld->gs_iface->gs_epilogue(bld->gs_iface,
4396 total_emitted_vertices_vec,
4397 emitted_prims_vec, 0);
4398 } else {
4399 gather_outputs(bld);
4400 }
4401 }
4402
4403 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4404 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4405 const struct tgsi_token *tokens,
4406 const struct lp_build_tgsi_params *params,
4407 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4408 {
4409 struct lp_build_tgsi_soa_context bld;
4410 struct lp_type type = params->type;
4411 struct lp_type res_type;
4412
4413 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4414 memset(&res_type, 0, sizeof res_type);
4415 res_type.width = type.width;
4416 res_type.length = type.length;
4417 res_type.sign = 1;
4418
4419 /* Setup build context */
4420 memset(&bld, 0, sizeof bld);
4421 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4422 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4423 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4424 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4425 {
4426 struct lp_type dbl_type;
4427 dbl_type = type;
4428 dbl_type.width *= 2;
4429 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4430 }
4431 {
4432 struct lp_type uint64_type;
4433 uint64_type = lp_uint_type(type);
4434 uint64_type.width *= 2;
4435 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4436 }
4437 {
4438 struct lp_type int64_type;
4439 int64_type = lp_int_type(type);
4440 int64_type.width *= 2;
4441 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4442 }
4443 bld.mask = params->mask;
4444 bld.inputs = params->inputs;
4445 bld.outputs = outputs;
4446 bld.consts_ptr = params->consts_ptr;
4447 bld.const_sizes_ptr = params->const_sizes_ptr;
4448 bld.ssbo_ptr = params->ssbo_ptr;
4449 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4450 bld.sampler = params->sampler;
4451 bld.bld_base.info = params->info;
4452 bld.indirect_files = params->info->indirect_files;
4453 bld.context_ptr = params->context_ptr;
4454 bld.thread_data_ptr = params->thread_data_ptr;
4455 bld.image = params->image;
4456 bld.shared_ptr = params->shared_ptr;
4457 bld.coro = params->coro;
4458
4459 /*
4460 * If the number of temporaries is rather large then we just
4461 * allocate them as an array right from the start and treat
4462 * like indirect temporaries.
4463 */
4464 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4465 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4466 }
4467 /*
4468 * For performance reason immediates are always backed in a static
4469 * array, but if their number is too great, we have to use just
4470 * a dynamically allocated array.
4471 */
4472 bld.use_immediates_array =
4473 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4474 if (bld.use_immediates_array) {
4475 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4476 }
4477
4478
4479 bld.bld_base.soa = TRUE;
4480 bld.bld_base.emit_debug = emit_debug;
4481 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4482 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4483 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4484 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4485 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4486
4487 bld.bld_base.emit_store = emit_store;
4488 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4489 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4490 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4491
4492 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4493 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4494
4495 bld.bld_base.emit_prologue = emit_prologue;
4496 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4497 bld.bld_base.emit_epilogue = emit_epilogue;
4498
4499 /* Set opcode actions */
4500 lp_set_default_actions_cpu(&bld.bld_base);
4501
4502 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4503 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4504 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4505 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4506 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4507 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4508 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4509 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4510 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4511 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4512 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4513 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4514 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4515 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4516 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4517 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4518 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4519 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4520 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4521 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4522 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4523 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4524 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4525 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4526 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4527 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4528 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4529 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4530 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4531 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4532 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4533 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4534 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4535 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4536 /* DX10 sampling ops */
4537 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4538 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4539 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4540 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4541 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4542 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4543 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4544 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4545 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4546 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4547 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4548
4549 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4550 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4551 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4552
4553 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4554 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4555 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4556 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4557 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4558 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4559 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4560 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4561 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4562 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4563
4564 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4565 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4566
4567 if (params->gs_iface) {
4568 /* There's no specific value for this because it should always
4569 * be set, but apps using ext_geometry_shader4 quite often
4570 * were forgetting so we're using MAX_VERTEX_VARYING from
4571 * that spec even though we could debug_assert if it's not
4572 * set, but that's a lot uglier. */
4573 uint max_output_vertices;
4574
4575 /* inputs are always indirect with gs */
4576 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4577 bld.gs_iface = params->gs_iface;
4578 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4579 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4580 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4581
4582 max_output_vertices =
4583 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4584 if (!max_output_vertices)
4585 max_output_vertices = 32;
4586
4587 bld.max_output_vertices_vec =
4588 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4589 max_output_vertices);
4590 }
4591
4592 if (params->tes_iface) {
4593 /* inputs are always indirect with tes */
4594 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4595 bld.tes_iface = params->tes_iface;
4596 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4597 }
4598
4599 if (params->tcs_iface) {
4600 bld.tcs_iface = params->tcs_iface;
4601 /* outputs and inputs are always indirect with tcs */
4602 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4603 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4604 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4605 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4606 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4607 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4608 }
4609
4610 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4611
4612 bld.system_values = *params->system_values;
4613
4614 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4615
4616 if (0) {
4617 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4618 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4619 debug_printf("11111111111111111111111111111 \n");
4620 tgsi_dump(tokens, 0);
4621 lp_debug_dump_value(function);
4622 debug_printf("2222222222222222222222222222 \n");
4623 }
4624
4625 if (0) {
4626 LLVMModuleRef module = LLVMGetGlobalParent(
4627 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4628 LLVMDumpModule(module);
4629
4630 }
4631 lp_exec_mask_fini(&bld.exec_mask);
4632 }
4633