1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc = 0;
309 boolean default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = TRUE;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449
450 switch (file) {
451 case TGSI_FILE_TEMPORARY:
452 array_of_vars = bld->temps;
453 var_of_array = bld->temps_array;
454 break;
455 case TGSI_FILE_OUTPUT:
456 array_of_vars = bld->outputs;
457 var_of_array = bld->outputs_array;
458 break;
459 default:
460 assert(0);
461 return NULL;
462 }
463
464 assert(chan < 4);
465
466 if (bld->indirect_files & (1 << file)) {
467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469 LLVMValueRef gep[2];
470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471 gep[1] = lindex;
472 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473 } else {
474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475 }
476 }
477 else {
478 assert(index <= bld->bld_base.info->file_max[file]);
479 return array_of_vars[index][chan];
480 }
481 }
482
483
484 /**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index which temporary register
488 * \param chan which channel of the temp register.
489 */
490 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492 unsigned index,
493 unsigned chan)
494 {
495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497
498 /**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index which output register
502 * \param chan which channel of the output register.
503 */
504 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506 unsigned index,
507 unsigned chan)
508 {
509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511
512 /*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521 unsigned index, chan;
522 assert(bld->bld_base.info->num_outputs <=
523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527 }
528 }
529 }
530 }
531
532 /**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)538 build_gather(struct lp_build_tgsi_context *bld_base,
539 LLVMValueRef base_ptr,
540 LLVMValueRef indexes,
541 LLVMValueRef overflow_mask,
542 LLVMValueRef indexes2)
543 {
544 struct gallivm_state *gallivm = bld_base->base.gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *uint_bld = &bld_base->uint_bld;
547 struct lp_build_context *bld = &bld_base->base;
548 LLVMValueRef res;
549 unsigned i;
550
551 if (indexes2)
552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553 else
554 res = bld->undef;
555 /*
556 * overflow_mask is a vector telling us which channels
557 * in the vector overflowed. We use the overflow behavior for
558 * constant buffers which is defined as:
559 * Out of bounds access to constant buffer returns 0 in all
560 * components. Out of bounds behavior is always with respect
561 * to the size of the buffer bound at that slot.
562 */
563
564 if (overflow_mask) {
565 /*
566 * We avoid per-element control flow here (also due to llvm going crazy,
567 * though I suspect it's better anyway since overflow is likely rare).
568 * Note that since we still fetch from buffers even if num_elements was
569 * zero (in this case we'll fetch from index zero) the jit func callers
570 * MUST provide valid fake constant buffers of size 4x32 (the values do
571 * not matter), otherwise we'd still need (not per element though)
572 * control flow.
573 */
574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575 if (indexes2)
576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577 }
578
579 /*
580 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581 */
582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583 LLVMValueRef si, di;
584 LLVMValueRef index;
585 LLVMValueRef scalar_ptr, scalar;
586
587 di = lp_build_const_int32(bld->gallivm, i);
588 if (indexes2)
589 si = lp_build_const_int32(bld->gallivm, i >> 1);
590 else
591 si = di;
592
593 if (indexes2 && (i & 1)) {
594 index = LLVMBuildExtractElement(builder,
595 indexes2, si, "");
596 } else {
597 index = LLVMBuildExtractElement(builder,
598 indexes, si, "");
599 }
600 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601 &index, 1, "gather_ptr");
602 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605 }
606
607 if (overflow_mask) {
608 if (indexes2) {
609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611 bld_base->dbl_bld.int_vec_type, "");
612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613 bld_base->dbl_bld.zero, res);
614 } else
615 res = lp_build_select(bld, overflow_mask, bld->zero, res);
616 }
617
618 return res;
619 }
620
621
622 /**
623 * Scatter/store vector.
624 */
625 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627 LLVMValueRef base_ptr,
628 LLVMValueRef indexes,
629 LLVMValueRef values,
630 struct lp_exec_mask *mask)
631 {
632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633 LLVMBuilderRef builder = gallivm->builder;
634 unsigned i;
635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637 /*
638 * Loop over elements of index_vec, store scalar value.
639 */
640 for (i = 0; i < bld->bld_base.base.type.length; i++) {
641 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645 LLVMValueRef scalar_pred = pred ?
646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648 if (0)
649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650 ii, val, index, scalar_ptr);
651
652 if (scalar_pred) {
653 LLVMValueRef real_val, dst_val;
654 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656 LLVMBuildStore(builder, real_val, scalar_ptr);
657 }
658 else {
659 LLVMBuildStore(builder, val, scalar_ptr);
660 }
661 }
662 }
663
664
665 /**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673 unsigned reg_file, unsigned reg_index,
674 const struct tgsi_ind_register *indirect_reg,
675 int index_limit)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679 /* always use X component of address register */
680 unsigned swizzle = indirect_reg->Swizzle;
681 LLVMValueRef base;
682 LLVMValueRef rel;
683 LLVMValueRef max_index;
684 LLVMValueRef index;
685
686 assert(bld->indirect_files & (1 << reg_file));
687
688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690 assert(swizzle < 4);
691 switch (indirect_reg->File) {
692 case TGSI_FILE_ADDRESS:
693 rel = LLVMBuildLoad(builder,
694 bld->addr[indirect_reg->Index][swizzle],
695 "load addr reg");
696 /* ADDR LLVM values already have LLVM integer type. */
697 break;
698 case TGSI_FILE_TEMPORARY:
699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700 rel = LLVMBuildLoad(builder, rel, "load temp reg");
701 /* TEMP LLVM values always have LLVM float type, but for indirection, the
702 * value actually stored is expected to be an integer */
703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704 break;
705 default:
706 assert(0);
707 rel = uint_bld->zero;
708 }
709
710 index = lp_build_add(uint_bld, base, rel);
711
712 /*
713 * emit_fetch_constant handles constant buffer overflow so this code
714 * is pointless for them.
715 * Furthermore the D3D10 spec in section 6.5 says:
716 * If the constant buffer bound to a slot is larger than the size
717 * declared in the shader for that slot, implementations are allowed
718 * to return incorrect data (not necessarily 0) for indices that are
719 * larger than the declared size but smaller than the buffer size.
720 */
721 if (reg_file != TGSI_FILE_CONSTANT) {
722 assert(index_limit >= 0);
723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724 uint_bld->type, index_limit);
725
726 assert(!uint_bld->type.sign);
727 index = lp_build_min(uint_bld, index, max_index);
728 }
729
730 return index;
731 }
732
733 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 enum tgsi_opcode_type stype)
736 {
737 struct lp_build_context *bld_fetch;
738
739 switch (stype) {
740 case TGSI_TYPE_FLOAT:
741 case TGSI_TYPE_UNTYPED:
742 bld_fetch = &bld_base->base;
743 break;
744 case TGSI_TYPE_UNSIGNED:
745 bld_fetch = &bld_base->uint_bld;
746 break;
747 case TGSI_TYPE_SIGNED:
748 bld_fetch = &bld_base->int_bld;
749 break;
750 case TGSI_TYPE_DOUBLE:
751 bld_fetch = &bld_base->dbl_bld;
752 break;
753 case TGSI_TYPE_UNSIGNED64:
754 bld_fetch = &bld_base->uint64_bld;
755 break;
756 case TGSI_TYPE_SIGNED64:
757 bld_fetch = &bld_base->int64_bld;
758 break;
759 case TGSI_TYPE_VOID:
760 default:
761 assert(0);
762 bld_fetch = NULL;
763 break;
764 }
765 return bld_fetch;
766 }
767
768 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770 LLVMValueRef indirect_index,
771 unsigned chan_index,
772 boolean need_perelement_offset)
773 {
774 struct gallivm_state *gallivm = uint_bld->gallivm;
775 LLVMValueRef chan_vec =
776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777 LLVMValueRef length_vec =
778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779 LLVMValueRef index_vec;
780
781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786 if (need_perelement_offset) {
787 LLVMValueRef pixel_offsets;
788 unsigned i;
789 /* build pixel offset vector: {0, 1, 2, 3, ...} */
790 pixel_offsets = uint_bld->undef;
791 for (i = 0; i < uint_bld->type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794 ii, ii, "");
795 }
796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797 }
798 return index_vec;
799 }
800
801 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)802 emit_fetch_constant(
803 struct lp_build_tgsi_context * bld_base,
804 const struct tgsi_full_src_register * reg,
805 enum tgsi_opcode_type stype,
806 unsigned swizzle_in)
807 {
808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809 struct gallivm_state *gallivm = bld_base->base.gallivm;
810 LLVMBuilderRef builder = gallivm->builder;
811 struct lp_build_context *uint_bld = &bld_base->uint_bld;
812 unsigned dimension = 0;
813 LLVMValueRef consts_ptr;
814 LLVMValueRef num_consts;
815 LLVMValueRef res;
816 unsigned swizzle = swizzle_in & 0xffff;
817
818 /* XXX: Handle fetching xyzw components as a vector */
819 assert(swizzle != ~0u);
820
821 if (reg->Register.Dimension) {
822 assert(!reg->Dimension.Indirect);
823 dimension = reg->Dimension.Index;
824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825 }
826
827 consts_ptr = bld->consts[dimension];
828 num_consts = bld->consts_sizes[dimension];
829
830 if (reg->Register.Indirect) {
831 LLVMValueRef indirect_index;
832 LLVMValueRef swizzle_vec =
833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834 LLVMValueRef index_vec; /* index into the const buffer */
835 LLVMValueRef overflow_mask;
836 LLVMValueRef index_vec2 = NULL;
837
838 indirect_index = get_indirect_index(bld,
839 reg->Register.File,
840 reg->Register.Index,
841 ®->Indirect,
842 bld->bld_base.info->file_max[reg->Register.File]);
843
844 /* All fetches are from the same constant buffer, so
845 * we need to propagate the size to a vector to do a
846 * vector comparison */
847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848 /* Construct a boolean vector telling us which channels
849 * overflow the bound constant buffer */
850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851 indirect_index, num_consts);
852
853 /* index_vec = indirect_index * 4 + swizzle */
854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857 if (tgsi_type_is_64bit(stype)) {
858 LLVMValueRef swizzle_vec2;
859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862 }
863 /* Gather values from the constant buffer */
864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865 }
866 else {
867 LLVMValueRef index; /* index into the const buffer */
868 LLVMValueRef scalar, scalar_ptr;
869 struct lp_build_context *bld_broad = &bld_base->base;
870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873 &index, 1, "");
874
875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877 LLVMValueRef scalar2, scalar2_ptr;
878 LLVMValueRef shuffles[2];
879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882 &index, 1, "");
883
884 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886 shuffles[0] = lp_build_const_int32(gallivm, 0);
887 shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892 } else {
893 if (stype == TGSI_TYPE_DOUBLE) {
894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896 bld_broad = &bld_base->dbl_bld;
897 } else if (stype == TGSI_TYPE_UNSIGNED64) {
898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900 bld_broad = &bld_base->uint64_bld;
901 } else if (stype == TGSI_TYPE_SIGNED64) {
902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904 bld_broad = &bld_base->int64_bld;
905 }
906 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907 res = lp_build_broadcast_scalar(bld_broad, scalar);
908 }
909
910 }
911
912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915 }
916
917 return res;
918 }
919
920 /**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)928 emit_fetch_64bit(
929 struct lp_build_tgsi_context * bld_base,
930 enum tgsi_opcode_type stype,
931 LLVMValueRef input,
932 LLVMValueRef input2)
933 {
934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936 LLVMBuilderRef builder = gallivm->builder;
937 LLVMValueRef res;
938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939 int i;
940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941 int len = bld_base->base.type.length * 2;
942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947 }
948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952
953 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)954 emit_fetch_immediate(
955 struct lp_build_tgsi_context * bld_base,
956 const struct tgsi_full_src_register * reg,
957 enum tgsi_opcode_type stype,
958 unsigned swizzle_in)
959 {
960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962 LLVMBuilderRef builder = gallivm->builder;
963 LLVMValueRef res = NULL;
964 unsigned swizzle = swizzle_in & 0xffff;
965
966 if (bld->use_immediates_array || reg->Register.Indirect) {
967 LLVMValueRef imms_array;
968 LLVMTypeRef fptr_type;
969
970 /* cast imms_array pointer to float* */
971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974 if (reg->Register.Indirect) {
975 LLVMValueRef indirect_index;
976 LLVMValueRef index_vec; /* index into the immediate register array */
977 LLVMValueRef index_vec2 = NULL;
978 indirect_index = get_indirect_index(bld,
979 reg->Register.File,
980 reg->Register.Index,
981 ®->Indirect,
982 bld->bld_base.info->file_max[reg->Register.File]);
983 /*
984 * Unlike for other reg classes, adding pixel offsets is unnecessary -
985 * immediates are stored as full vectors (FIXME??? - might be better
986 * to store them the same as constants) but all elements are the same
987 * in any case.
988 */
989 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990 indirect_index,
991 swizzle,
992 FALSE);
993 if (tgsi_type_is_64bit(stype))
994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995 indirect_index,
996 swizzle_in >> 16,
997 FALSE);
998 /* Gather values from the immediate register array */
999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000 } else {
1001 LLVMValueRef gep[2];
1002 gep[0] = lp_build_const_int32(gallivm, 0);
1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005 bld->imms_array, gep, 2, "");
1006 res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008 if (tgsi_type_is_64bit(stype)) {
1009 LLVMValueRef imms_ptr2;
1010 LLVMValueRef res2;
1011 gep[1] = lp_build_const_int32(gallivm,
1012 reg->Register.Index * 4 + (swizzle_in >> 16));
1013 imms_ptr2 = LLVMBuildGEP(builder,
1014 bld->imms_array, gep, 2, "");
1015 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016 res = emit_fetch_64bit(bld_base, stype, res, res2);
1017 }
1018 }
1019 }
1020 else {
1021 res = bld->immediates[reg->Register.Index][swizzle];
1022 if (tgsi_type_is_64bit(stype))
1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024 }
1025
1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029 }
1030 return res;
1031 }
1032
1033 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1034 emit_fetch_input(
1035 struct lp_build_tgsi_context * bld_base,
1036 const struct tgsi_full_src_register * reg,
1037 enum tgsi_opcode_type stype,
1038 unsigned swizzle_in)
1039 {
1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 LLVMValueRef res;
1044 unsigned swizzle = swizzle_in & 0xffff;
1045
1046 if (reg->Register.Indirect) {
1047 LLVMValueRef indirect_index;
1048 LLVMValueRef index_vec; /* index into the input reg array */
1049 LLVMValueRef index_vec2 = NULL;
1050 LLVMValueRef inputs_array;
1051 LLVMTypeRef fptr_type;
1052
1053 indirect_index = get_indirect_index(bld,
1054 reg->Register.File,
1055 reg->Register.Index,
1056 ®->Indirect,
1057 bld->bld_base.info->file_max[reg->Register.File]);
1058
1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060 indirect_index,
1061 swizzle,
1062 TRUE);
1063 if (tgsi_type_is_64bit(stype)) {
1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065 indirect_index,
1066 swizzle_in >> 16,
1067 TRUE);
1068 }
1069 /* cast inputs_array pointer to float* */
1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073 /* Gather values from the input register array */
1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075 } else {
1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078 reg->Register.Index * 4 + swizzle);
1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080 bld->inputs_array, &lindex, 1, "");
1081
1082 res = LLVMBuildLoad(builder, input_ptr, "");
1083 if (tgsi_type_is_64bit(stype)) {
1084 LLVMValueRef lindex1;
1085 LLVMValueRef input_ptr2;
1086 LLVMValueRef res2;
1087
1088 lindex1 = lp_build_const_int32(gallivm,
1089 reg->Register.Index * 4 + (swizzle_in >> 16));
1090 input_ptr2 = LLVMBuildGEP(builder,
1091 bld->inputs_array, &lindex1, 1, "");
1092 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093 res = emit_fetch_64bit(bld_base, stype, res, res2);
1094 }
1095 }
1096 else {
1097 res = bld->inputs[reg->Register.Index][swizzle];
1098 if (tgsi_type_is_64bit(stype))
1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100 }
1101 }
1102
1103 assert(res);
1104
1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle_in)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 const struct tgsi_shader_info *info = bld->bld_base.info;
1124 LLVMBuilderRef builder = gallivm->builder;
1125 LLVMValueRef attrib_index = NULL;
1126 LLVMValueRef vertex_index = NULL;
1127 unsigned swizzle = swizzle_in & 0xffff;
1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129 LLVMValueRef res;
1130
1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132 /* This is really a system value not a regular input */
1133 assert(!reg->Register.Indirect);
1134 assert(!reg->Dimension.Indirect);
1135 res = bld->system_values.prim_id;
1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138 }
1139 return res;
1140 }
1141
1142 if (reg->Register.Indirect) {
1143 /*
1144 * XXX: this is possibly not quite the right value, since file_max may be
1145 * larger than the max attrib index, due to it being the max of declared
1146 * inputs AND the max vertices per prim (which is 6 for tri adj).
1147 * It should however be safe to use (since we always allocate
1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149 */
1150 int index_limit = info->file_max[reg->Register.File];
1151 attrib_index = get_indirect_index(bld,
1152 reg->Register.File,
1153 reg->Register.Index,
1154 ®->Indirect,
1155 index_limit);
1156 } else {
1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158 }
1159
1160 if (reg->Dimension.Indirect) {
1161 /*
1162 * A fixed 6 should do as well (which is what we allocate).
1163 */
1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165 vertex_index = get_indirect_index(bld,
1166 reg->Register.File,
1167 reg->Dimension.Index,
1168 ®->DimIndirect,
1169 index_limit);
1170 } else {
1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172 }
1173
1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175 reg->Dimension.Indirect,
1176 vertex_index,
1177 reg->Register.Indirect,
1178 attrib_index,
1179 swizzle_index);
1180
1181 assert(res);
1182 if (tgsi_type_is_64bit(stype)) {
1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184 LLVMValueRef res2;
1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186 reg->Dimension.Indirect,
1187 vertex_index,
1188 reg->Register.Indirect,
1189 attrib_index,
1190 swizzle_index);
1191 assert(res2);
1192 res = emit_fetch_64bit(bld_base, stype, res, res2);
1193 } else if (stype == TGSI_TYPE_UNSIGNED) {
1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195 } else if (stype == TGSI_TYPE_SIGNED) {
1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197 }
1198
1199 return res;
1200 }
1201
1202 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1203 emit_fetch_tcs_input(
1204 struct lp_build_tgsi_context * bld_base,
1205 const struct tgsi_full_src_register * reg,
1206 enum tgsi_opcode_type stype,
1207 unsigned swizzle_in)
1208 {
1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211 const struct tgsi_shader_info *info = bld->bld_base.info;
1212 LLVMBuilderRef builder = gallivm->builder;
1213 LLVMValueRef attrib_index = NULL;
1214 LLVMValueRef vertex_index = NULL;
1215 unsigned swizzle = swizzle_in & 0xffff;
1216 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217 LLVMValueRef res;
1218
1219 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220 /* This is really a system value not a regular input */
1221 assert(!reg->Register.Indirect);
1222 assert(!reg->Dimension.Indirect);
1223 res = bld->system_values.prim_id;
1224 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226 }
1227 return res;
1228 }
1229
1230 if (reg->Register.Indirect) {
1231 int index_limit = info->file_max[reg->Register.File];
1232 attrib_index = get_indirect_index(bld,
1233 reg->Register.File,
1234 reg->Register.Index,
1235 ®->Indirect,
1236 index_limit);
1237 } else {
1238 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239 }
1240
1241 if (reg->Dimension.Indirect) {
1242 vertex_index = get_indirect_index(bld,
1243 reg->Register.File,
1244 reg->Dimension.Index,
1245 ®->DimIndirect,
1246 PIPE_MAX_SHADER_INPUTS);
1247 } else {
1248 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249 }
1250
1251 // TCS can read from its own outputs
1252 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254 reg->Dimension.Indirect,
1255 vertex_index,
1256 reg->Register.Indirect,
1257 attrib_index,
1258 FALSE,
1259 swizzle_index,
1260 bld_base->info->output_semantic_name[reg->Register.Index]);
1261 } else {
1262 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1263 reg->Dimension.Indirect,
1264 vertex_index,
1265 reg->Register.Indirect,
1266 attrib_index,
1267 FALSE,
1268 swizzle_index);
1269 }
1270
1271
1272 assert(res);
1273 if (tgsi_type_is_64bit(stype)) {
1274 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1275 LLVMValueRef res2;
1276 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1277 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1278 reg->Dimension.Indirect,
1279 vertex_index,
1280 reg->Register.Indirect,
1281 attrib_index,
1282 FALSE,
1283 swizzle_index,
1284 bld_base->info->output_semantic_name[reg->Register.Index]);
1285 } else {
1286 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1287 reg->Dimension.Indirect,
1288 vertex_index,
1289 reg->Register.Indirect,
1290 attrib_index,
1291 FALSE,
1292 swizzle_index);
1293 }
1294 assert(res2);
1295 res = emit_fetch_64bit(bld_base, stype, res, res2);
1296 } else if (stype == TGSI_TYPE_UNSIGNED) {
1297 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1298 } else if (stype == TGSI_TYPE_SIGNED) {
1299 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1300 }
1301
1302 return res;
1303 }
1304
1305 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1306 emit_fetch_tes_input(
1307 struct lp_build_tgsi_context * bld_base,
1308 const struct tgsi_full_src_register * reg,
1309 enum tgsi_opcode_type stype,
1310 unsigned swizzle_in)
1311 {
1312 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314 const struct tgsi_shader_info *info = bld->bld_base.info;
1315 LLVMBuilderRef builder = gallivm->builder;
1316 LLVMValueRef attrib_index = NULL;
1317 LLVMValueRef vertex_index = NULL;
1318 unsigned swizzle = swizzle_in & 0xffff;
1319 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1320 LLVMValueRef res;
1321
1322 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1323 /* This is really a system value not a regular input */
1324 assert(!reg->Register.Indirect);
1325 assert(!reg->Dimension.Indirect);
1326 res = bld->system_values.prim_id;
1327 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1328 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1329 }
1330 return res;
1331 }
1332
1333 if (reg->Register.Indirect) {
1334 int index_limit = info->file_max[reg->Register.File];
1335 attrib_index = get_indirect_index(bld,
1336 reg->Register.File,
1337 reg->Register.Index,
1338 ®->Indirect,
1339 index_limit);
1340 } else {
1341 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1342 }
1343
1344 if (reg->Dimension.Indirect) {
1345 vertex_index = get_indirect_index(bld,
1346 reg->Register.File,
1347 reg->Dimension.Index,
1348 ®->DimIndirect,
1349 PIPE_MAX_SHADER_INPUTS);
1350 } else {
1351 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1352 }
1353
1354 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1355 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1356 reg->Register.Indirect,
1357 attrib_index,
1358 swizzle_index);
1359 } else {
1360 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1361 reg->Dimension.Indirect,
1362 vertex_index,
1363 reg->Register.Indirect,
1364 attrib_index,
1365 FALSE,
1366 swizzle_index);
1367 }
1368
1369 assert(res);
1370 if (tgsi_type_is_64bit(stype)) {
1371 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1372 LLVMValueRef res2;
1373 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1374 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1375 reg->Register.Indirect,
1376 attrib_index,
1377 swizzle_index);
1378 }
1379 else {
1380 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1381 reg->Dimension.Indirect,
1382 vertex_index,
1383 reg->Register.Indirect,
1384 attrib_index,
1385 FALSE,
1386 swizzle_index);
1387 }
1388 assert(res2);
1389 res = emit_fetch_64bit(bld_base, stype, res, res2);
1390 } else if (stype == TGSI_TYPE_UNSIGNED) {
1391 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1392 } else if (stype == TGSI_TYPE_SIGNED) {
1393 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1394 }
1395
1396 return res;
1397 }
1398
1399
1400
1401 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1402 emit_fetch_temporary(
1403 struct lp_build_tgsi_context * bld_base,
1404 const struct tgsi_full_src_register * reg,
1405 enum tgsi_opcode_type stype,
1406 unsigned swizzle_in)
1407 {
1408 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1409 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1410 LLVMBuilderRef builder = gallivm->builder;
1411 LLVMValueRef res;
1412 unsigned swizzle = swizzle_in & 0xffff;
1413
1414 if (reg->Register.Indirect) {
1415 LLVMValueRef indirect_index;
1416 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1417 LLVMValueRef temps_array;
1418 LLVMTypeRef fptr_type;
1419
1420 indirect_index = get_indirect_index(bld,
1421 reg->Register.File,
1422 reg->Register.Index,
1423 ®->Indirect,
1424 bld->bld_base.info->file_max[reg->Register.File]);
1425
1426 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1427 indirect_index,
1428 swizzle,
1429 TRUE);
1430 if (tgsi_type_is_64bit(stype)) {
1431 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1432 indirect_index,
1433 swizzle_in >> 16,
1434 TRUE);
1435 }
1436
1437 /* cast temps_array pointer to float* */
1438 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1439 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1440
1441 /* Gather values from the temporary register array */
1442 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1443 }
1444 else {
1445 LLVMValueRef temp_ptr;
1446 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1447 res = LLVMBuildLoad(builder, temp_ptr, "");
1448
1449 if (tgsi_type_is_64bit(stype)) {
1450 LLVMValueRef temp_ptr2, res2;
1451
1452 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1453 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1454 res = emit_fetch_64bit(bld_base, stype, res, res2);
1455 }
1456 }
1457
1458 if (stype == TGSI_TYPE_SIGNED ||
1459 stype == TGSI_TYPE_UNSIGNED ||
1460 stype == TGSI_TYPE_DOUBLE ||
1461 stype == TGSI_TYPE_SIGNED64 ||
1462 stype == TGSI_TYPE_UNSIGNED64) {
1463 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1464 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1465 }
1466
1467 return res;
1468 }
1469
1470 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1471 emit_fetch_system_value(
1472 struct lp_build_tgsi_context * bld_base,
1473 const struct tgsi_full_src_register * reg,
1474 enum tgsi_opcode_type stype,
1475 unsigned swizzle_in)
1476 {
1477 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1478 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1479 const struct tgsi_shader_info *info = bld->bld_base.info;
1480 LLVMBuilderRef builder = gallivm->builder;
1481 LLVMValueRef res;
1482 enum tgsi_opcode_type atype; // Actual type of the value
1483 unsigned swizzle = swizzle_in & 0xffff;
1484
1485 assert(!reg->Register.Indirect);
1486
1487 switch (info->system_value_semantic_name[reg->Register.Index]) {
1488 case TGSI_SEMANTIC_INSTANCEID:
1489 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1490 atype = TGSI_TYPE_UNSIGNED;
1491 break;
1492
1493 case TGSI_SEMANTIC_VERTEXID:
1494 res = bld->system_values.vertex_id;
1495 atype = TGSI_TYPE_UNSIGNED;
1496 break;
1497
1498 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1499 res = bld->system_values.vertex_id_nobase;
1500 atype = TGSI_TYPE_UNSIGNED;
1501 break;
1502
1503 case TGSI_SEMANTIC_BASEVERTEX:
1504 res = bld->system_values.basevertex;
1505 atype = TGSI_TYPE_UNSIGNED;
1506 break;
1507
1508 case TGSI_SEMANTIC_BASEINSTANCE:
1509 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1510 atype = TGSI_TYPE_UNSIGNED;
1511 break;
1512
1513 case TGSI_SEMANTIC_PRIMID:
1514 res = bld->system_values.prim_id;
1515 atype = TGSI_TYPE_UNSIGNED;
1516 break;
1517
1518 case TGSI_SEMANTIC_INVOCATIONID:
1519 if (info->processor == PIPE_SHADER_TESS_CTRL)
1520 res = bld->system_values.invocation_id;
1521 else
1522 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1523 atype = TGSI_TYPE_UNSIGNED;
1524 break;
1525
1526 case TGSI_SEMANTIC_HELPER_INVOCATION:
1527 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1528 atype = TGSI_TYPE_UNSIGNED;
1529 break;
1530
1531 case TGSI_SEMANTIC_THREAD_ID:
1532 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1533 atype = TGSI_TYPE_UNSIGNED;
1534 break;
1535
1536 case TGSI_SEMANTIC_BLOCK_ID:
1537 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1538 atype = TGSI_TYPE_UNSIGNED;
1539 break;
1540
1541 case TGSI_SEMANTIC_GRID_SIZE:
1542 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1543 atype = TGSI_TYPE_UNSIGNED;
1544 break;
1545
1546 case TGSI_SEMANTIC_TESSCOORD:
1547 {
1548 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1549 LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1550 res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1551 }
1552 atype = TGSI_TYPE_FLOAT;
1553 break;
1554
1555 case TGSI_SEMANTIC_FACE:
1556 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1557 atype = TGSI_TYPE_UNSIGNED;
1558 break;
1559
1560 case TGSI_SEMANTIC_DRAWID:
1561 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1562 atype = TGSI_TYPE_UNSIGNED;
1563 break;
1564
1565 case TGSI_SEMANTIC_SAMPLEID:
1566 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1567 atype = TGSI_TYPE_UNSIGNED;
1568 break;
1569
1570 case TGSI_SEMANTIC_TESSOUTER:
1571 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1572 bld->system_values.tess_outer,
1573 lp_build_const_int32(gallivm, swizzle_in));
1574 atype = TGSI_TYPE_FLOAT;
1575 break;
1576
1577 case TGSI_SEMANTIC_TESSINNER:
1578 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1579 bld->system_values.tess_inner,
1580 lp_build_const_int32(gallivm, swizzle_in));
1581 atype = TGSI_TYPE_FLOAT;
1582 break;
1583
1584 case TGSI_SEMANTIC_VERTICESIN:
1585 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1586 atype = TGSI_TYPE_UNSIGNED;
1587 break;
1588
1589 default:
1590 assert(!"unexpected semantic in emit_fetch_system_value");
1591 res = bld_base->base.zero;
1592 atype = TGSI_TYPE_FLOAT;
1593 break;
1594 }
1595
1596 if (atype != stype) {
1597 if (stype == TGSI_TYPE_FLOAT) {
1598 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1599 } else if (stype == TGSI_TYPE_UNSIGNED) {
1600 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1601 } else if (stype == TGSI_TYPE_SIGNED) {
1602 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1603 }
1604 }
1605
1606 return res;
1607 }
1608
1609 /**
1610 * Register fetch with derivatives.
1611 */
1612 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1613 emit_fetch_deriv(
1614 struct lp_build_tgsi_soa_context *bld,
1615 LLVMValueRef src,
1616 LLVMValueRef *res,
1617 LLVMValueRef *ddx,
1618 LLVMValueRef *ddy)
1619 {
1620 if (res)
1621 *res = src;
1622
1623 /* TODO: use interpolation coeffs for inputs */
1624
1625 if (ddx)
1626 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1627
1628 if (ddy)
1629 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1630 }
1631
1632 /**
1633 * store an array of vec-length 64-bit into two arrays of vec_length floats
1634 * i.e.
1635 * value is d0, d1, d2, d3 etc.
1636 * each 64-bit has high and low pieces x, y
1637 * so gets stored into the separate channels as:
1638 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1639 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1640 */
1641 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1642 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1643 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1644 LLVMValueRef value)
1645 {
1646 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1647 struct gallivm_state *gallivm = bld_base->base.gallivm;
1648 LLVMBuilderRef builder = gallivm->builder;
1649 struct lp_build_context *float_bld = &bld_base->base;
1650 unsigned i;
1651 LLVMValueRef temp, temp2;
1652 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1653 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1654
1655 for (i = 0; i < bld_base->base.type.length; i++) {
1656 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1657 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1658 }
1659
1660 temp = LLVMBuildShuffleVector(builder, value,
1661 LLVMGetUndef(LLVMTypeOf(value)),
1662 LLVMConstVector(shuffles,
1663 bld_base->base.type.length),
1664 "");
1665 temp2 = LLVMBuildShuffleVector(builder, value,
1666 LLVMGetUndef(LLVMTypeOf(value)),
1667 LLVMConstVector(shuffles2,
1668 bld_base->base.type.length),
1669 "");
1670
1671 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1672 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1673 }
1674
1675 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1676 emit_store_output(struct lp_build_tgsi_context *bld_base,
1677 enum tgsi_opcode_type dtype,
1678 const struct tgsi_full_dst_register *reg,
1679 unsigned index,
1680 unsigned chan_index,
1681 LLVMValueRef indirect_index,
1682 LLVMValueRef value)
1683 {
1684 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1685 struct gallivm_state *gallivm = bld_base->base.gallivm;
1686 LLVMBuilderRef builder = gallivm->builder;
1687 struct lp_build_context *float_bld = &bld_base->base;
1688
1689 /* Outputs are always stored as floats */
1690 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1691
1692 if (reg->Register.Indirect) {
1693 LLVMValueRef index_vec; /* indexes into the output registers */
1694 LLVMValueRef outputs_array;
1695 LLVMTypeRef fptr_type;
1696
1697 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1698 indirect_index,
1699 chan_index,
1700 TRUE);
1701
1702 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1703 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1704
1705 /* Scatter store values into output registers */
1706 emit_mask_scatter(bld, outputs_array, index_vec, value,
1707 &bld->exec_mask);
1708 }
1709 else {
1710 assert(LLVMTypeOf(value) == float_bld->vec_type);
1711 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1712 chan_index);
1713
1714 if (tgsi_type_is_64bit(dtype)) {
1715 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1716 chan_index + 1);
1717 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1718 value);
1719 } else
1720 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1721 }
1722 }
1723
1724 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1725 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1726 enum tgsi_opcode_type dtype,
1727 const struct tgsi_full_dst_register *reg,
1728 unsigned index,
1729 unsigned chan_index,
1730 LLVMValueRef indirect_index,
1731 LLVMValueRef value)
1732 {
1733 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1734 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1735 const struct tgsi_shader_info *info = bld->bld_base.info;
1736 LLVMValueRef attrib_index = NULL;
1737 LLVMValueRef vertex_index = NULL;
1738 LLVMValueRef channel_index = NULL;
1739
1740 if (reg->Register.Indirect) {
1741 /*
1742 * XXX: this is possibly not quite the right value, since file_max may be
1743 * larger than the max attrib index, due to it being the max of declared
1744 * inputs AND the max vertices per prim (which is 6 for tri adj).
1745 * It should however be safe to use (since we always allocate
1746 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1747 */
1748 int index_limit = info->file_max[reg->Register.File];
1749 attrib_index = get_indirect_index(bld,
1750 reg->Register.File,
1751 reg->Register.Index,
1752 ®->Indirect,
1753 index_limit);
1754 } else {
1755 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1756 }
1757
1758 if (reg->Dimension.Indirect) {
1759 vertex_index = get_indirect_index(bld,
1760 reg->Register.File,
1761 reg->Dimension.Index,
1762 ®->DimIndirect,
1763 PIPE_MAX_SHADER_OUTPUTS);
1764 } else {
1765 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1766 }
1767
1768 channel_index = lp_build_const_int32(gallivm, chan_index);
1769
1770 assert(bld->tcs_iface->emit_store_output);
1771 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1772 bld_base->info->output_semantic_name[reg->Register.Index],
1773 reg->Dimension.Indirect,
1774 vertex_index,
1775 reg->Register.Indirect,
1776 attrib_index,
1777 false,
1778 channel_index,
1779 value,
1780 mask_vec(bld_base));
1781 }
1782
1783 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1784 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1785 enum tgsi_opcode_type dtype,
1786 const struct tgsi_full_dst_register *reg,
1787 unsigned index,
1788 unsigned chan_index,
1789 LLVMValueRef indirect_index,
1790 LLVMValueRef value)
1791 {
1792 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1793 struct gallivm_state *gallivm = bld_base->base.gallivm;
1794 LLVMBuilderRef builder = gallivm->builder;
1795 struct lp_build_context *float_bld = &bld_base->base;
1796
1797 /* Temporaries are always stored as floats */
1798 if (!tgsi_type_is_64bit(dtype))
1799 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1800 else
1801 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1802
1803 if (reg->Register.Indirect) {
1804 LLVMValueRef index_vec; /* indexes into the temp registers */
1805 LLVMValueRef temps_array;
1806 LLVMTypeRef fptr_type;
1807
1808 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1809 indirect_index,
1810 chan_index,
1811 TRUE);
1812
1813 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1814 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1815
1816 /* Scatter store values into temp registers */
1817 emit_mask_scatter(bld, temps_array, index_vec, value,
1818 &bld->exec_mask);
1819 }
1820 else {
1821 LLVMValueRef temp_ptr;
1822 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1823
1824 if (tgsi_type_is_64bit(dtype)) {
1825 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1826 reg->Register.Index,
1827 chan_index + 1);
1828 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1829 value);
1830 }
1831 else
1832 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1833 }
1834 }
1835
1836 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1837 emit_store_address(struct lp_build_tgsi_context *bld_base,
1838 enum tgsi_opcode_type dtype,
1839 const struct tgsi_full_dst_register *reg,
1840 unsigned index,
1841 unsigned chan_index,
1842 LLVMValueRef indirect_index,
1843 LLVMValueRef value)
1844 {
1845 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1846 struct gallivm_state *gallivm = bld_base->base.gallivm;
1847 LLVMBuilderRef builder = gallivm->builder;
1848 struct lp_build_context *int_bld = &bld_base->int_bld;
1849
1850 assert(dtype == TGSI_TYPE_SIGNED);
1851 assert(LLVMTypeOf(value) == int_bld->vec_type);
1852 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1853 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1854 bld->addr[reg->Register.Index][chan_index]);
1855 }
1856
1857 /**
1858 * Register store.
1859 */
1860 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1861 emit_store_chan(
1862 struct lp_build_tgsi_context *bld_base,
1863 const struct tgsi_full_instruction *inst,
1864 unsigned index,
1865 unsigned chan_index,
1866 LLVMValueRef value)
1867 {
1868 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1869 struct gallivm_state *gallivm = bld_base->base.gallivm;
1870 LLVMBuilderRef builder = gallivm->builder;
1871 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1872 struct lp_build_context *float_bld = &bld_base->base;
1873 LLVMValueRef indirect_index = NULL;
1874 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1875
1876 /*
1877 * Apply saturation.
1878 *
1879 * It is always assumed to be float.
1880 */
1881 if (inst->Instruction.Saturate) {
1882 assert(dtype == TGSI_TYPE_FLOAT ||
1883 dtype == TGSI_TYPE_UNTYPED);
1884 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1885 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1886 }
1887
1888 if (reg->Register.Indirect) {
1889 /*
1890 * Currently the mesa/st doesn't generate indirect stores
1891 * to 64-bit values, it normally uses MOV to do indirect stores.
1892 */
1893 assert(!tgsi_type_is_64bit(dtype));
1894 indirect_index = get_indirect_index(bld,
1895 reg->Register.File,
1896 reg->Register.Index,
1897 ®->Indirect,
1898 bld->bld_base.info->file_max[reg->Register.File]);
1899 } else {
1900 assert(reg->Register.Index <=
1901 bld_base->info->file_max[reg->Register.File]);
1902 }
1903
1904 if (DEBUG_EXECUTION) {
1905 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1906 }
1907
1908 assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1909 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1910 dtype,
1911 reg,
1912 index,
1913 chan_index,
1914 indirect_index,
1915 value);
1916
1917 (void)dtype;
1918 }
1919
1920 /*
1921 * Called at the beginning of the translation of each TGSI instruction, to
1922 * emit some debug code.
1923 */
1924 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1925 emit_debug(
1926 struct lp_build_tgsi_context * bld_base,
1927 const struct tgsi_full_instruction * inst,
1928 const struct tgsi_opcode_info * info)
1929
1930 {
1931 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1932
1933 if (DEBUG_EXECUTION) {
1934 /*
1935 * Dump the TGSI instruction.
1936 */
1937
1938 struct gallivm_state *gallivm = bld_base->base.gallivm;
1939 char buf[512];
1940 buf[0] = '$';
1941 buf[1] = ' ';
1942 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1943 lp_build_printf(gallivm, buf);
1944
1945 /* Dump the execution mask.
1946 */
1947 if (bld->exec_mask.has_mask) {
1948 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1949 }
1950 }
1951 }
1952
1953 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1954 emit_store(
1955 struct lp_build_tgsi_context * bld_base,
1956 const struct tgsi_full_instruction * inst,
1957 const struct tgsi_opcode_info * info,
1958 unsigned index,
1959 LLVMValueRef dst[4])
1960
1961 {
1962 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1963
1964 unsigned writemask = inst->Dst[index].Register.WriteMask;
1965 while (writemask) {
1966 unsigned chan_index = u_bit_scan(&writemask);
1967 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1968 continue;
1969 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1970 }
1971 }
1972
1973 static unsigned
tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)1974 tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)
1975 {
1976 switch (tgsi_target) {
1977 case TGSI_TEXTURE_BUFFER:
1978 return PIPE_BUFFER;
1979 case TGSI_TEXTURE_1D:
1980 case TGSI_TEXTURE_SHADOW1D:
1981 return PIPE_TEXTURE_1D;
1982 case TGSI_TEXTURE_2D:
1983 case TGSI_TEXTURE_SHADOW2D:
1984 case TGSI_TEXTURE_2D_MSAA:
1985 return PIPE_TEXTURE_2D;
1986 case TGSI_TEXTURE_3D:
1987 return PIPE_TEXTURE_3D;
1988 case TGSI_TEXTURE_CUBE:
1989 case TGSI_TEXTURE_SHADOWCUBE:
1990 return PIPE_TEXTURE_CUBE;
1991 case TGSI_TEXTURE_RECT:
1992 case TGSI_TEXTURE_SHADOWRECT:
1993 return PIPE_TEXTURE_RECT;
1994 case TGSI_TEXTURE_1D_ARRAY:
1995 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1996 return PIPE_TEXTURE_1D_ARRAY;
1997 case TGSI_TEXTURE_2D_ARRAY:
1998 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1999 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2000 return PIPE_TEXTURE_2D_ARRAY;
2001 case TGSI_TEXTURE_CUBE_ARRAY:
2002 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2003 return PIPE_TEXTURE_CUBE_ARRAY;
2004 default:
2005 assert(0);
2006 return PIPE_BUFFER;
2007 }
2008 }
2009
2010
2011 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2012 lp_build_lod_property(
2013 struct lp_build_tgsi_context *bld_base,
2014 const struct tgsi_full_instruction *inst,
2015 unsigned src_op)
2016 {
2017 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2018 enum lp_sampler_lod_property lod_property;
2019
2020 /*
2021 * Not much we can do here. We could try catching inputs declared
2022 * with constant interpolation but not sure it's worth it - since for
2023 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2024 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2025 * like the constant/immediate recognition below.
2026 * What seems to be of more value would be to recognize temps holding
2027 * broadcasted scalars but no way we can do it.
2028 * Tried asking llvm but without any success (using LLVMIsConstant
2029 * even though this isn't exactly what we'd need), even as simple as
2030 * IMM[0] UINT32 (0,-1,0,0)
2031 * MOV TEMP[0] IMM[0].yyyy
2032 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2033 * doesn't work.
2034 * This means there's ZERO chance this will ever catch a scalar lod
2035 * with traditional tex opcodes as well as texel fetches, since the lod
2036 * comes from the same reg as coords (except some test shaders using
2037 * constant coords maybe).
2038 * There's at least hope for sample opcodes as well as size queries.
2039 */
2040 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2041 reg->Register.File == TGSI_FILE_CONSTANT ||
2042 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2043 lod_property = LP_SAMPLER_LOD_SCALAR;
2044 }
2045 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2046 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2047 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2048 }
2049 else {
2050 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2051 }
2052 }
2053 else {
2054 /* never use scalar (per-quad) lod the results are just too wrong. */
2055 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2056 }
2057 return lod_property;
2058 }
2059
2060
2061 /**
2062 * High-level instruction translators.
2063 */
2064
2065 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2066 emit_tex( struct lp_build_tgsi_soa_context *bld,
2067 const struct tgsi_full_instruction *inst,
2068 enum lp_build_tex_modifier modifier,
2069 LLVMValueRef *texel,
2070 unsigned sampler_reg,
2071 enum lp_sampler_op_type sampler_op)
2072 {
2073 unsigned unit = inst->Src[sampler_reg].Register.Index;
2074 LLVMValueRef oow = NULL;
2075 LLVMValueRef lod = NULL;
2076 LLVMValueRef coords[5];
2077 LLVMValueRef offsets[3] = { NULL };
2078 struct lp_derivatives derivs;
2079 struct lp_sampler_params params;
2080 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2081 unsigned num_derivs, num_offsets, i;
2082 unsigned shadow_coord = 0;
2083 unsigned layer_coord = 0;
2084 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2085
2086 memset(¶ms, 0, sizeof(params));
2087
2088 if (!bld->sampler) {
2089 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2090 for (i = 0; i < 4; i++) {
2091 texel[i] = bld->bld_base.base.undef;
2092 }
2093 return;
2094 }
2095
2096 switch (inst->Texture.Texture) {
2097 case TGSI_TEXTURE_1D_ARRAY:
2098 layer_coord = 1;
2099 FALLTHROUGH;
2100 case TGSI_TEXTURE_1D:
2101 num_offsets = 1;
2102 num_derivs = 1;
2103 break;
2104 case TGSI_TEXTURE_2D_ARRAY:
2105 layer_coord = 2;
2106 FALLTHROUGH;
2107 case TGSI_TEXTURE_2D:
2108 case TGSI_TEXTURE_RECT:
2109 num_offsets = 2;
2110 num_derivs = 2;
2111 break;
2112 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2113 layer_coord = 1;
2114 FALLTHROUGH;
2115 case TGSI_TEXTURE_SHADOW1D:
2116 shadow_coord = 2;
2117 num_offsets = 1;
2118 num_derivs = 1;
2119 break;
2120 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2121 layer_coord = 2;
2122 shadow_coord = 3;
2123 num_offsets = 2;
2124 num_derivs = 2;
2125 break;
2126 case TGSI_TEXTURE_SHADOW2D:
2127 case TGSI_TEXTURE_SHADOWRECT:
2128 shadow_coord = 2;
2129 num_offsets = 2;
2130 num_derivs = 2;
2131 break;
2132 case TGSI_TEXTURE_CUBE:
2133 num_offsets = 2;
2134 num_derivs = 3;
2135 break;
2136 case TGSI_TEXTURE_3D:
2137 num_offsets = 3;
2138 num_derivs = 3;
2139 break;
2140 case TGSI_TEXTURE_SHADOWCUBE:
2141 shadow_coord = 3;
2142 num_offsets = 2;
2143 num_derivs = 3;
2144 break;
2145 case TGSI_TEXTURE_CUBE_ARRAY:
2146 num_offsets = 2;
2147 num_derivs = 3;
2148 layer_coord = 3;
2149 break;
2150 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2151 num_offsets = 2;
2152 num_derivs = 3;
2153 layer_coord = 3;
2154 shadow_coord = 4; /* shadow coord special different reg */
2155 break;
2156 case TGSI_TEXTURE_2D_MSAA:
2157 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2158 default:
2159 assert(0);
2160 return;
2161 }
2162
2163 /* Note lod and especially projected are illegal in a LOT of cases */
2164 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2165 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2166 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2167 lod = bld->bld_base.base.zero;
2168 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2169 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2170 /* note that shadow cube array with bias/explicit lod does not exist */
2171 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2172 }
2173 else {
2174 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2175 }
2176 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2177 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2178 }
2179 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2180 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2181 }
2182 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2183 }
2184
2185 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2186 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2187 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2188 }
2189 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2190 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2191 oow = lp_build_rcp(&bld->bld_base.base, oow);
2192 }
2193
2194 for (i = 0; i < num_derivs; i++) {
2195 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2196 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2197 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2198 }
2199 for (i = num_derivs; i < 5; i++) {
2200 coords[i] = bld->bld_base.base.undef;
2201 }
2202
2203 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2204 if (layer_coord) {
2205 if (layer_coord == 3) {
2206 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2207 }
2208 else {
2209 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2210 }
2211 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2212 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2213 }
2214 /* Shadow coord occupies always 5th slot. */
2215 if (shadow_coord) {
2216 sample_key |= LP_SAMPLER_SHADOW;
2217 if (shadow_coord == 4) {
2218 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2219 }
2220 else {
2221 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2222 }
2223 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2224 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2225 }
2226
2227 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2228 unsigned dim;
2229 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2230 for (dim = 0; dim < num_derivs; ++dim) {
2231 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2232 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2233 }
2234 params.derivs = &derivs;
2235 /*
2236 * could also check all src regs if constant but I doubt such
2237 * cases exist in practice.
2238 */
2239 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2240 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2241 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2242 }
2243 else {
2244 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2245 }
2246 }
2247 else {
2248 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2249 }
2250 }
2251 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2252
2253 /* we don't handle the 4 offset version of tg4 */
2254 if (inst->Texture.NumOffsets == 1) {
2255 unsigned dim;
2256 sample_key |= LP_SAMPLER_OFFSETS;
2257 for (dim = 0; dim < num_offsets; dim++) {
2258 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2259 }
2260 }
2261
2262 params.type = bld->bld_base.base.type;
2263 params.sample_key = sample_key;
2264 params.texture_index = unit;
2265 params.sampler_index = unit;
2266 params.context_ptr = bld->context_ptr;
2267 params.thread_data_ptr = bld->thread_data_ptr;
2268 params.coords = coords;
2269 params.offsets = offsets;
2270 params.lod = lod;
2271 params.texel = texel;
2272
2273 bld->sampler->emit_tex_sample(bld->sampler,
2274 bld->bld_base.base.gallivm,
2275 ¶ms);
2276 }
2277
2278 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2279 emit_sample(struct lp_build_tgsi_soa_context *bld,
2280 const struct tgsi_full_instruction *inst,
2281 enum lp_build_tex_modifier modifier,
2282 boolean compare,
2283 enum lp_sampler_op_type sample_type,
2284 LLVMValueRef *texel)
2285 {
2286 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2287 unsigned texture_unit, sampler_unit;
2288 LLVMValueRef lod = NULL;
2289 LLVMValueRef coords[5];
2290 LLVMValueRef offsets[3] = { NULL };
2291 struct lp_derivatives derivs;
2292 struct lp_sampler_params params;
2293 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2294
2295 unsigned num_offsets, num_derivs, i;
2296 unsigned layer_coord = 0;
2297 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2298
2299 memset(¶ms, 0, sizeof(params));
2300
2301 if (!bld->sampler) {
2302 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2303 for (i = 0; i < 4; i++) {
2304 texel[i] = bld->bld_base.base.undef;
2305 }
2306 return;
2307 }
2308
2309 /*
2310 * unlike old-style tex opcodes the texture/sampler indices
2311 * always come from src1 and src2 respectively.
2312 */
2313 texture_unit = inst->Src[1].Register.Index;
2314 sampler_unit = inst->Src[2].Register.Index;
2315
2316 /*
2317 * Note inst->Texture.Texture will contain the number of offsets,
2318 * however the target information is NOT there and comes from the
2319 * declared sampler views instead.
2320 */
2321 switch (bld->sv[texture_unit].Resource) {
2322 case TGSI_TEXTURE_1D:
2323 num_offsets = 1;
2324 num_derivs = 1;
2325 break;
2326 case TGSI_TEXTURE_1D_ARRAY:
2327 layer_coord = 1;
2328 num_offsets = 1;
2329 num_derivs = 1;
2330 break;
2331 case TGSI_TEXTURE_2D:
2332 case TGSI_TEXTURE_RECT:
2333 num_offsets = 2;
2334 num_derivs = 2;
2335 break;
2336 case TGSI_TEXTURE_2D_ARRAY:
2337 layer_coord = 2;
2338 num_offsets = 2;
2339 num_derivs = 2;
2340 break;
2341 case TGSI_TEXTURE_CUBE:
2342 num_offsets = 2;
2343 num_derivs = 3;
2344 break;
2345 case TGSI_TEXTURE_3D:
2346 num_offsets = 3;
2347 num_derivs = 3;
2348 break;
2349 case TGSI_TEXTURE_CUBE_ARRAY:
2350 layer_coord = 3;
2351 num_offsets = 2;
2352 num_derivs = 3;
2353 break;
2354 default:
2355 assert(0);
2356 return;
2357 }
2358
2359 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2360 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2361 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2362 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2363 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2364 }
2365 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2366 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2367 }
2368 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2369 }
2370 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2371 /* XXX might be better to explicitly pass the level zero information */
2372 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2373 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2374 }
2375
2376 for (i = 0; i < num_derivs; i++) {
2377 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2378 }
2379 for (i = num_derivs; i < 5; i++) {
2380 coords[i] = bld->bld_base.base.undef;
2381 }
2382
2383 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2384 if (layer_coord) {
2385 if (layer_coord == 3)
2386 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2387 else
2388 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2389 }
2390 /* Shadow coord occupies always 5th slot. */
2391 if (compare) {
2392 sample_key |= LP_SAMPLER_SHADOW;
2393 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2394 }
2395
2396 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2397 unsigned dim;
2398 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2399 for (dim = 0; dim < num_derivs; ++dim) {
2400 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2401 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2402 }
2403 params.derivs = &derivs;
2404 /*
2405 * could also check all src regs if constant but I doubt such
2406 * cases exist in practice.
2407 */
2408 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2409 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2410 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2411 }
2412 else {
2413 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2414 }
2415 }
2416 else {
2417 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2418 }
2419 }
2420
2421 /* some advanced gather instructions (txgo) would require 4 offsets */
2422 if (inst->Texture.NumOffsets == 1) {
2423 unsigned dim;
2424 sample_key |= LP_SAMPLER_OFFSETS;
2425 for (dim = 0; dim < num_offsets; dim++) {
2426 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2427 }
2428 }
2429 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2430
2431 params.type = bld->bld_base.base.type;
2432 params.sample_key = sample_key;
2433 params.texture_index = texture_unit;
2434 params.sampler_index = sampler_unit;
2435 params.context_ptr = bld->context_ptr;
2436 params.thread_data_ptr = bld->thread_data_ptr;
2437 params.coords = coords;
2438 params.offsets = offsets;
2439 params.lod = lod;
2440 params.texel = texel;
2441
2442 bld->sampler->emit_tex_sample(bld->sampler,
2443 bld->bld_base.base.gallivm,
2444 ¶ms);
2445
2446 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2447 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2448 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2449 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2450 unsigned char swizzles[4];
2451 swizzles[0] = inst->Src[1].Register.SwizzleX;
2452 swizzles[1] = inst->Src[1].Register.SwizzleY;
2453 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2454 swizzles[3] = inst->Src[1].Register.SwizzleW;
2455
2456 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2457 }
2458 }
2459
2460 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2461 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2462 const struct tgsi_full_instruction *inst,
2463 LLVMValueRef *texel,
2464 boolean is_samplei)
2465 {
2466 unsigned unit, target;
2467 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2468 LLVMValueRef explicit_lod = NULL;
2469 LLVMValueRef coords[5];
2470 LLVMValueRef offsets[3] = { NULL };
2471 LLVMValueRef ms_index = NULL;
2472 struct lp_sampler_params params;
2473 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2474 unsigned dims, i;
2475 unsigned layer_coord = 0;
2476 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2477
2478 memset(¶ms, 0, sizeof(params));
2479
2480 if (!bld->sampler) {
2481 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2482 for (i = 0; i < 4; i++) {
2483 texel[i] = coord_undef;
2484 }
2485 return;
2486 }
2487
2488 unit = inst->Src[1].Register.Index;
2489
2490 if (is_samplei) {
2491 target = bld->sv[unit].Resource;
2492 }
2493 else {
2494 target = inst->Texture.Texture;
2495 }
2496
2497 switch (target) {
2498 case TGSI_TEXTURE_1D:
2499 case TGSI_TEXTURE_BUFFER:
2500 dims = 1;
2501 break;
2502 case TGSI_TEXTURE_1D_ARRAY:
2503 layer_coord = 1;
2504 dims = 1;
2505 break;
2506 case TGSI_TEXTURE_2D:
2507 case TGSI_TEXTURE_RECT:
2508 case TGSI_TEXTURE_2D_MSAA:
2509 dims = 2;
2510 break;
2511 case TGSI_TEXTURE_2D_ARRAY:
2512 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2513 layer_coord = 2;
2514 dims = 2;
2515 break;
2516 case TGSI_TEXTURE_3D:
2517 dims = 3;
2518 break;
2519 default:
2520 assert(0);
2521 return;
2522 }
2523
2524 /* always have lod except for buffers and msaa targets ? */
2525 if (target != TGSI_TEXTURE_BUFFER &&
2526 target != TGSI_TEXTURE_2D_MSAA &&
2527 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2528 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2529 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2530 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2531 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2532 }
2533
2534 if (target == TGSI_TEXTURE_2D_MSAA ||
2535 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2536 sample_key |= LP_SAMPLER_FETCH_MS;
2537 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538 }
2539
2540 /*
2541 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2542 * would be the sample index.
2543 */
2544
2545 for (i = 0; i < dims; i++) {
2546 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2547 }
2548 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2549 for (i = dims; i < 5; i++) {
2550 coords[i] = coord_undef;
2551 }
2552 if (layer_coord)
2553 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2554
2555 if (inst->Texture.NumOffsets == 1) {
2556 unsigned dim;
2557 sample_key |= LP_SAMPLER_OFFSETS;
2558 for (dim = 0; dim < dims; dim++) {
2559 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2560 }
2561 }
2562 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2563
2564 params.type = bld->bld_base.base.type;
2565 params.sample_key = sample_key;
2566 params.texture_index = unit;
2567 /*
2568 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2569 * and trigger some assertions with d3d10 where the sampler view number
2570 * can exceed this.
2571 */
2572 params.sampler_index = 0;
2573 params.context_ptr = bld->context_ptr;
2574 params.thread_data_ptr = bld->thread_data_ptr;
2575 params.coords = coords;
2576 params.offsets = offsets;
2577 params.derivs = NULL;
2578 params.lod = explicit_lod;
2579 params.texel = texel;
2580 params.ms_index = ms_index;
2581
2582 bld->sampler->emit_tex_sample(bld->sampler,
2583 bld->bld_base.base.gallivm,
2584 ¶ms);
2585
2586 if (is_samplei &&
2587 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2588 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2589 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2590 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2591 unsigned char swizzles[4];
2592 swizzles[0] = inst->Src[1].Register.SwizzleX;
2593 swizzles[1] = inst->Src[1].Register.SwizzleY;
2594 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2595 swizzles[3] = inst->Src[1].Register.SwizzleW;
2596
2597 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2598 }
2599 }
2600
2601 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2602 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2603 const struct tgsi_full_instruction *inst,
2604 LLVMValueRef *sizes_out,
2605 boolean is_sviewinfo)
2606 {
2607 LLVMValueRef explicit_lod;
2608 enum lp_sampler_lod_property lod_property;
2609 unsigned has_lod;
2610 unsigned i;
2611 unsigned unit = inst->Src[1].Register.Index;
2612 enum tgsi_texture_type target;
2613 enum pipe_texture_target pipe_target;
2614 struct lp_sampler_size_query_params params;
2615
2616 if (is_sviewinfo) {
2617 target = bld->sv[unit].Resource;
2618 }
2619 else {
2620 target = inst->Texture.Texture;
2621 }
2622 switch (target) {
2623 case TGSI_TEXTURE_BUFFER:
2624 case TGSI_TEXTURE_RECT:
2625 case TGSI_TEXTURE_SHADOWRECT:
2626 has_lod = 0;
2627 break;
2628 default:
2629 has_lod = 1;
2630 break;
2631 }
2632
2633 if (!bld->sampler) {
2634 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2635 for (i = 0; i < 4; i++)
2636 sizes_out[i] = bld->bld_base.int_bld.undef;
2637 return;
2638 }
2639
2640 if (has_lod) {
2641 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2642 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2643 }
2644 else {
2645 explicit_lod = NULL;
2646 lod_property = LP_SAMPLER_LOD_SCALAR;
2647 }
2648
2649
2650 pipe_target = tgsi_to_pipe_tex_target(target);
2651
2652 params.int_type = bld->bld_base.int_bld.type;
2653 params.texture_unit = unit;
2654 params.target = pipe_target;
2655 params.context_ptr = bld->context_ptr;
2656 params.is_sviewinfo = TRUE;
2657 params.lod_property = lod_property;
2658 params.explicit_lod = explicit_lod;
2659 params.sizes_out = sizes_out;
2660 params.samples_only = false;
2661
2662 bld->sampler->emit_size_query(bld->sampler,
2663 bld->bld_base.base.gallivm,
2664 ¶ms);
2665 }
2666
2667 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2668 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2669 int pc)
2670 {
2671 unsigned i;
2672
2673 for (i = 0; i < 5; i++) {
2674 enum tgsi_opcode opcode;
2675
2676 if (pc + i >= bld->bld_base.info->num_instructions)
2677 return TRUE;
2678
2679 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2680
2681 if (opcode == TGSI_OPCODE_END)
2682 return TRUE;
2683
2684 if (opcode == TGSI_OPCODE_TEX ||
2685 opcode == TGSI_OPCODE_TXP ||
2686 opcode == TGSI_OPCODE_TXD ||
2687 opcode == TGSI_OPCODE_TXB ||
2688 opcode == TGSI_OPCODE_TXL ||
2689 opcode == TGSI_OPCODE_TXF ||
2690 opcode == TGSI_OPCODE_TXQ ||
2691 opcode == TGSI_OPCODE_TEX2 ||
2692 opcode == TGSI_OPCODE_TXB2 ||
2693 opcode == TGSI_OPCODE_TXL2 ||
2694 opcode == TGSI_OPCODE_SAMPLE ||
2695 opcode == TGSI_OPCODE_SAMPLE_B ||
2696 opcode == TGSI_OPCODE_SAMPLE_C ||
2697 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2698 opcode == TGSI_OPCODE_SAMPLE_D ||
2699 opcode == TGSI_OPCODE_SAMPLE_I ||
2700 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2701 opcode == TGSI_OPCODE_SAMPLE_L ||
2702 opcode == TGSI_OPCODE_SVIEWINFO ||
2703 opcode == TGSI_OPCODE_CAL ||
2704 opcode == TGSI_OPCODE_IF ||
2705 opcode == TGSI_OPCODE_UIF ||
2706 opcode == TGSI_OPCODE_BGNLOOP ||
2707 opcode == TGSI_OPCODE_SWITCH)
2708 return FALSE;
2709 }
2710
2711 return TRUE;
2712 }
2713
2714
2715
2716 /**
2717 * Kill fragment if any of the src register values are negative.
2718 */
2719 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2720 emit_kill_if(
2721 struct lp_build_tgsi_soa_context *bld,
2722 const struct tgsi_full_instruction *inst,
2723 int pc)
2724 {
2725 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2726 const struct tgsi_full_src_register *reg = &inst->Src[0];
2727 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2728 LLVMValueRef mask;
2729 unsigned chan_index;
2730
2731 memset(&terms, 0, sizeof terms);
2732
2733 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2734 unsigned swizzle;
2735
2736 /* Unswizzle channel */
2737 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2738
2739 /* Check if the component has not been already tested. */
2740 assert(swizzle < TGSI_NUM_CHANNELS);
2741 if( !terms[swizzle] )
2742 /* TODO: change the comparison operator instead of setting the sign */
2743 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2744 }
2745
2746 mask = NULL;
2747 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2748 if(terms[chan_index]) {
2749 LLVMValueRef chan_mask;
2750
2751 /*
2752 * If term < 0 then mask = 0 else mask = ~0.
2753 */
2754 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2755
2756 if(mask)
2757 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2758 else
2759 mask = chan_mask;
2760 }
2761 }
2762
2763 if (bld->exec_mask.has_mask) {
2764 LLVMValueRef invmask;
2765 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2766 mask = LLVMBuildOr(builder, mask, invmask, "");
2767 }
2768
2769 lp_build_mask_update(bld->mask, mask);
2770 if (!near_end_of_shader(bld, pc))
2771 lp_build_mask_check(bld->mask);
2772 }
2773
2774
2775 /**
2776 * Unconditional fragment kill.
2777 * The only predication is the execution mask which will apply if
2778 * we're inside a loop or conditional.
2779 */
2780 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2781 emit_kill(struct lp_build_tgsi_soa_context *bld,
2782 int pc)
2783 {
2784 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2785 LLVMValueRef mask;
2786
2787 /* For those channels which are "alive", disable fragment shader
2788 * execution.
2789 */
2790 if (bld->exec_mask.has_mask) {
2791 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2792 }
2793 else {
2794 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2795 mask = zero;
2796 }
2797
2798 lp_build_mask_update(bld->mask, mask);
2799
2800 if (!near_end_of_shader(bld, pc))
2801 lp_build_mask_check(bld->mask);
2802 }
2803
2804
2805 /**
2806 * Emit code which will dump the value of all the temporary registers
2807 * to stdout.
2808 */
2809 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2810 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2811 unsigned file)
2812 {
2813 const struct tgsi_shader_info *info = bld->bld_base.info;
2814 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2815 LLVMBuilderRef builder = gallivm->builder;
2816 LLVMValueRef reg_ptr;
2817 int index;
2818 int max_index = info->file_max[file];
2819
2820 /*
2821 * Some register files, particularly constants, can be very large,
2822 * and dumping everything could make this unusably slow.
2823 */
2824 max_index = MIN2(max_index, 32);
2825
2826 for (index = 0; index <= max_index; index++) {
2827 LLVMValueRef res;
2828 unsigned mask;
2829 int chan;
2830
2831 if (index < 8 * sizeof(unsigned) &&
2832 (info->file_mask[file] & (1u << index)) == 0) {
2833 /* This was not declared.*/
2834 continue;
2835 }
2836
2837 if (file == TGSI_FILE_INPUT) {
2838 mask = info->input_usage_mask[index];
2839 } else {
2840 mask = TGSI_WRITEMASK_XYZW;
2841 }
2842
2843 for (chan = 0; chan < 4; chan++) {
2844 if ((mask & (1 << chan)) == 0) {
2845 /* This channel is not used.*/
2846 continue;
2847 }
2848
2849 if (file == TGSI_FILE_CONSTANT) {
2850 struct tgsi_full_src_register reg;
2851 memset(®, 0, sizeof reg);
2852 reg.Register.File = file;
2853 reg.Register.Index = index;
2854 reg.Register.SwizzleX = 0;
2855 reg.Register.SwizzleY = 1;
2856 reg.Register.SwizzleZ = 2;
2857 reg.Register.SwizzleW = 3;
2858
2859 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
2860 if (!res) {
2861 continue;
2862 }
2863 } else if (file == TGSI_FILE_INPUT) {
2864 res = bld->inputs[index][chan];
2865 if (!res) {
2866 continue;
2867 }
2868 } else if (file == TGSI_FILE_TEMPORARY) {
2869 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2870 assert(reg_ptr);
2871 res = LLVMBuildLoad(builder, reg_ptr, "");
2872 } else if (file == TGSI_FILE_OUTPUT) {
2873 reg_ptr = lp_get_output_ptr(bld, index, chan);
2874 assert(reg_ptr);
2875 res = LLVMBuildLoad(builder, reg_ptr, "");
2876 } else {
2877 assert(0);
2878 continue;
2879 }
2880
2881 emit_dump_reg(gallivm, file, index, chan, res);
2882 }
2883 }
2884 }
2885
2886
2887
2888 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2889 lp_emit_declaration_soa(
2890 struct lp_build_tgsi_context *bld_base,
2891 const struct tgsi_full_declaration *decl)
2892 {
2893 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2894 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2895 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2896 const unsigned first = decl->Range.First;
2897 const unsigned last = decl->Range.Last;
2898 unsigned idx, i;
2899
2900 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2901
2902 switch (decl->Declaration.File) {
2903 case TGSI_FILE_TEMPORARY:
2904 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2905 assert(last < LP_MAX_INLINED_TEMPS);
2906 for (idx = first; idx <= last; ++idx) {
2907 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2908 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2909 }
2910 }
2911 break;
2912
2913 case TGSI_FILE_OUTPUT:
2914 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2915 for (idx = first; idx <= last; ++idx) {
2916 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2917 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2918 vec_type, "output");
2919 }
2920 }
2921 break;
2922
2923 case TGSI_FILE_ADDRESS:
2924 /* ADDR registers are only allocated with an integer LLVM IR type,
2925 * as they are guaranteed to always have integers.
2926 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2927 * an ADDR register for that matter).
2928 */
2929 assert(last < LP_MAX_TGSI_ADDRS);
2930 for (idx = first; idx <= last; ++idx) {
2931 assert(idx < LP_MAX_TGSI_ADDRS);
2932 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2933 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2934 }
2935 break;
2936
2937 case TGSI_FILE_SAMPLER_VIEW:
2938 /*
2939 * The target stored here MUST match whatever there actually
2940 * is in the set sampler views (what about return type?).
2941 */
2942 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2943 for (idx = first; idx <= last; ++idx) {
2944 bld->sv[idx] = decl->SamplerView;
2945 }
2946 break;
2947
2948 case TGSI_FILE_CONSTANT:
2949 {
2950 /*
2951 * We could trivially fetch the per-buffer pointer when fetching the
2952 * constant, relying on llvm to figure out it's always the same pointer
2953 * anyway. However, doing so results in a huge (more than factor of 10)
2954 * slowdown in llvm compilation times for some (but not all) shaders
2955 * (more specifically, the IR optimization spends way more time in
2956 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2957 */
2958 unsigned idx2D = decl->Dim.Index2D;
2959 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2960 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2961 bld->consts[idx2D] =
2962 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2963 bld->consts_sizes[idx2D] =
2964 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2965 }
2966 break;
2967 case TGSI_FILE_BUFFER:
2968 {
2969 unsigned idx = decl->Range.First;
2970 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2971 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2972 bld->ssbos[idx] =
2973 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2974 bld->ssbo_sizes[idx] =
2975 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2976
2977 }
2978 break;
2979 case TGSI_FILE_MEMORY:
2980 break;
2981 default:
2982 /* don't need to declare other vars */
2983 break;
2984 }
2985 }
2986
2987
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)2988 void lp_emit_immediate_soa(
2989 struct lp_build_tgsi_context *bld_base,
2990 const struct tgsi_full_immediate *imm)
2991 {
2992 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2993 struct gallivm_state * gallivm = bld_base->base.gallivm;
2994 LLVMValueRef imms[4];
2995 unsigned i;
2996 const uint size = imm->Immediate.NrTokens - 1;
2997 assert(size <= 4);
2998 switch (imm->Immediate.DataType) {
2999 case TGSI_IMM_FLOAT32:
3000 for( i = 0; i < size; ++i )
3001 imms[i] =
3002 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3003
3004 break;
3005 case TGSI_IMM_FLOAT64:
3006 case TGSI_IMM_UINT64:
3007 case TGSI_IMM_INT64:
3008 case TGSI_IMM_UINT32:
3009 for( i = 0; i < size; ++i ) {
3010 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3011 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3012 }
3013
3014 break;
3015 case TGSI_IMM_INT32:
3016 for( i = 0; i < size; ++i ) {
3017 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3018 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3019 }
3020
3021 break;
3022 }
3023 for( i = size; i < 4; ++i )
3024 imms[i] = bld_base->base.undef;
3025
3026 if (bld->use_immediates_array) {
3027 unsigned index = bld->num_immediates;
3028 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3029 LLVMBuilderRef builder = gallivm->builder;
3030 LLVMValueRef gep[2];
3031 gep[0] = lp_build_const_int32(gallivm, 0);
3032
3033 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3034 for (i = 0; i < 4; ++i ) {
3035 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3036 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3037 bld->imms_array, gep, 2, "");
3038 LLVMBuildStore(builder, imms[i], imm_ptr);
3039 }
3040 } else {
3041 /* simply copy the immediate values into the next immediates[] slot */
3042 unsigned i;
3043 assert(imm->Immediate.NrTokens - 1 <= 4);
3044 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3045
3046 for(i = 0; i < 4; ++i )
3047 bld->immediates[bld->num_immediates][i] = imms[i];
3048
3049 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3050 unsigned index = bld->num_immediates;
3051 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3052 LLVMBuilderRef builder = gallivm->builder;
3053 LLVMValueRef gep[2];
3054 gep[0] = lp_build_const_int32(gallivm, 0);
3055 for (i = 0; i < 4; ++i ) {
3056 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3057 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3058 bld->imms_array, gep, 2, "");
3059 LLVMBuildStore(builder,
3060 bld->immediates[index][i],
3061 imm_ptr);
3062 }
3063 }
3064 }
3065
3066 bld->num_immediates++;
3067 }
3068
3069 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3070 ddx_emit(
3071 const struct lp_build_tgsi_action * action,
3072 struct lp_build_tgsi_context * bld_base,
3073 struct lp_build_emit_data * emit_data)
3074 {
3075 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3076
3077 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3078 &emit_data->output[emit_data->chan], NULL);
3079 }
3080
3081 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3082 ddy_emit(
3083 const struct lp_build_tgsi_action * action,
3084 struct lp_build_tgsi_context * bld_base,
3085 struct lp_build_emit_data * emit_data)
3086 {
3087 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3088
3089 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3090 &emit_data->output[emit_data->chan]);
3091 }
3092
3093 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3094 kill_emit(
3095 const struct lp_build_tgsi_action * action,
3096 struct lp_build_tgsi_context * bld_base,
3097 struct lp_build_emit_data * emit_data)
3098 {
3099 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3100
3101 emit_kill(bld, bld_base->pc - 1);
3102 }
3103
3104 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3105 kill_if_emit(
3106 const struct lp_build_tgsi_action * action,
3107 struct lp_build_tgsi_context * bld_base,
3108 struct lp_build_emit_data * emit_data)
3109 {
3110 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3111
3112 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3113 }
3114
3115 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3116 tex_emit(
3117 const struct lp_build_tgsi_action * action,
3118 struct lp_build_tgsi_context * bld_base,
3119 struct lp_build_emit_data * emit_data)
3120 {
3121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3122
3123 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3124 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3125 }
3126
3127 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3128 tex2_emit(
3129 const struct lp_build_tgsi_action * action,
3130 struct lp_build_tgsi_context * bld_base,
3131 struct lp_build_emit_data * emit_data)
3132 {
3133 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3134
3135 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3136 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3137 }
3138
3139 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3140 txb_emit(
3141 const struct lp_build_tgsi_action * action,
3142 struct lp_build_tgsi_context * bld_base,
3143 struct lp_build_emit_data * emit_data)
3144 {
3145 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3146
3147 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3148 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3149 }
3150
3151 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3152 txb2_emit(
3153 const struct lp_build_tgsi_action * action,
3154 struct lp_build_tgsi_context * bld_base,
3155 struct lp_build_emit_data * emit_data)
3156 {
3157 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3158
3159 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3160 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3161 }
3162
3163 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3164 txd_emit(
3165 const struct lp_build_tgsi_action * action,
3166 struct lp_build_tgsi_context * bld_base,
3167 struct lp_build_emit_data * emit_data)
3168 {
3169 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3170
3171 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3172 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3173 }
3174
3175 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3176 txl_emit(
3177 const struct lp_build_tgsi_action * action,
3178 struct lp_build_tgsi_context * bld_base,
3179 struct lp_build_emit_data * emit_data)
3180 {
3181 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3182
3183 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3184 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3185 }
3186
3187 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3188 txl2_emit(
3189 const struct lp_build_tgsi_action * action,
3190 struct lp_build_tgsi_context * bld_base,
3191 struct lp_build_emit_data * emit_data)
3192 {
3193 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3194
3195 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3196 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3197 }
3198
3199 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3200 txp_emit(
3201 const struct lp_build_tgsi_action * action,
3202 struct lp_build_tgsi_context * bld_base,
3203 struct lp_build_emit_data * emit_data)
3204 {
3205 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3206
3207 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3208 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3209 }
3210
3211 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3212 tg4_emit(
3213 const struct lp_build_tgsi_action * action,
3214 struct lp_build_tgsi_context * bld_base,
3215 struct lp_build_emit_data * emit_data)
3216 {
3217 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3218
3219 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3220 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3221 }
3222
3223 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3224 lodq_emit(
3225 const struct lp_build_tgsi_action * action,
3226 struct lp_build_tgsi_context * bld_base,
3227 struct lp_build_emit_data * emit_data)
3228 {
3229 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3230
3231 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3232 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3233 }
3234
3235 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3236 txq_emit(
3237 const struct lp_build_tgsi_action * action,
3238 struct lp_build_tgsi_context * bld_base,
3239 struct lp_build_emit_data * emit_data)
3240 {
3241 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3242
3243 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3244 }
3245
3246 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3247 txf_emit(
3248 const struct lp_build_tgsi_action * action,
3249 struct lp_build_tgsi_context * bld_base,
3250 struct lp_build_emit_data * emit_data)
3251 {
3252 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3253
3254 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3255 }
3256
3257 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3258 sample_i_emit(
3259 const struct lp_build_tgsi_action * action,
3260 struct lp_build_tgsi_context * bld_base,
3261 struct lp_build_emit_data * emit_data)
3262 {
3263 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3264
3265 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3266 }
3267
3268 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3269 sample_emit(
3270 const struct lp_build_tgsi_action * action,
3271 struct lp_build_tgsi_context * bld_base,
3272 struct lp_build_emit_data * emit_data)
3273 {
3274 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3275
3276 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3277 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3278 }
3279
3280 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3281 sample_b_emit(
3282 const struct lp_build_tgsi_action * action,
3283 struct lp_build_tgsi_context * bld_base,
3284 struct lp_build_emit_data * emit_data)
3285 {
3286 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3287
3288 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3289 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3290 }
3291
3292 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3293 sample_c_emit(
3294 const struct lp_build_tgsi_action * action,
3295 struct lp_build_tgsi_context * bld_base,
3296 struct lp_build_emit_data * emit_data)
3297 {
3298 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3299
3300 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3301 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3302 }
3303
3304 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3305 sample_c_lz_emit(
3306 const struct lp_build_tgsi_action * action,
3307 struct lp_build_tgsi_context * bld_base,
3308 struct lp_build_emit_data * emit_data)
3309 {
3310 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3311
3312 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3313 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3314 }
3315
3316 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3317 sample_d_emit(
3318 const struct lp_build_tgsi_action * action,
3319 struct lp_build_tgsi_context * bld_base,
3320 struct lp_build_emit_data * emit_data)
3321 {
3322 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3323
3324 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3325 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3326 }
3327
3328 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3329 sample_l_emit(
3330 const struct lp_build_tgsi_action * action,
3331 struct lp_build_tgsi_context * bld_base,
3332 struct lp_build_emit_data * emit_data)
3333 {
3334 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3335
3336 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3337 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3338 }
3339
3340 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3341 gather4_emit(
3342 const struct lp_build_tgsi_action * action,
3343 struct lp_build_tgsi_context * bld_base,
3344 struct lp_build_emit_data * emit_data)
3345 {
3346 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3347
3348 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3349 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3350 }
3351
3352 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3353 sviewinfo_emit(
3354 const struct lp_build_tgsi_action * action,
3355 struct lp_build_tgsi_context * bld_base,
3356 struct lp_build_emit_data * emit_data)
3357 {
3358 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3359
3360 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3361 }
3362
3363 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3364 lod_emit(
3365 const struct lp_build_tgsi_action * action,
3366 struct lp_build_tgsi_context * bld_base,
3367 struct lp_build_emit_data * emit_data)
3368 {
3369 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3370
3371 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3372 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3373 }
3374
3375 static void
target_to_dims_layer(enum tgsi_texture_type target,unsigned * dims,unsigned * layer_coord)3376 target_to_dims_layer(enum tgsi_texture_type target,
3377 unsigned *dims,
3378 unsigned *layer_coord)
3379 {
3380 *layer_coord = 0;
3381 switch (target) {
3382 case TGSI_TEXTURE_1D:
3383 case TGSI_TEXTURE_BUFFER:
3384 *dims = 1;
3385 break;
3386 case TGSI_TEXTURE_1D_ARRAY:
3387 *layer_coord = 1;
3388 *dims = 1;
3389 break;
3390 case TGSI_TEXTURE_2D:
3391 case TGSI_TEXTURE_RECT:
3392 *dims = 2;
3393 break;
3394 case TGSI_TEXTURE_2D_ARRAY:
3395 *layer_coord = 2;
3396 *dims = 2;
3397 break;
3398 case TGSI_TEXTURE_3D:
3399 case TGSI_TEXTURE_CUBE:
3400 case TGSI_TEXTURE_CUBE_ARRAY:
3401 *dims = 3;
3402 break;
3403 default:
3404 assert(0);
3405 *dims = 0;
3406 return;
3407 }
3408 }
3409
3410 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3411 img_load_emit(
3412 const struct lp_build_tgsi_action * action,
3413 struct lp_build_tgsi_context * bld_base,
3414 struct lp_build_emit_data * emit_data)
3415 {
3416 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3417 struct lp_img_params params;
3418 LLVMValueRef coords[5];
3419 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3420 unsigned dims;
3421 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3422 unsigned layer_coord;
3423
3424 target_to_dims_layer(target, &dims, &layer_coord);
3425
3426 for (unsigned i = 0; i < dims; i++) {
3427 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3428 }
3429 for (unsigned i = dims; i < 5; i++) {
3430 coords[i] = coord_undef;
3431 }
3432 if (layer_coord)
3433 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3434
3435 memset(¶ms, 0, sizeof(params));
3436
3437 params.type = bld->bld_base.base.type;
3438 params.context_ptr = bld->context_ptr;
3439 params.thread_data_ptr = bld->thread_data_ptr;
3440 params.coords = coords;
3441 params.outdata = emit_data->output;
3442 params.target = tgsi_to_pipe_tex_target(target);
3443 params.image_index = emit_data->inst->Src[0].Register.Index;
3444 params.img_op = LP_IMG_LOAD;
3445 bld->image->emit_op(bld->image,
3446 bld->bld_base.base.gallivm,
3447 ¶ms);
3448 }
3449
3450 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3451 load_emit(
3452 const struct lp_build_tgsi_action * action,
3453 struct lp_build_tgsi_context * bld_base,
3454 struct lp_build_emit_data * emit_data)
3455 {
3456 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3457 struct gallivm_state * gallivm = bld_base->base.gallivm;
3458 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3459 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3460 unsigned buf = bufreg->Register.Index;
3461 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3462 bufreg->Register.File == TGSI_FILE_IMAGE ||
3463 bufreg->Register.File == TGSI_FILE_MEMORY ||
3464 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3465 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3466 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3467
3468 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3469 img_load_emit(action, bld_base, emit_data);
3470 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3471 LLVMValueRef consts_ptr = bld->consts[buf];
3472 LLVMValueRef num_consts = bld->consts_sizes[buf];
3473
3474 LLVMValueRef indirect_index;
3475 LLVMValueRef overflow_mask;
3476
3477 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3478 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3479
3480 /* All fetches are from the same constant buffer, so
3481 * we need to propagate the size to a vector to do a
3482 * vector comparison */
3483 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3484
3485 /* Gather values from the constant buffer */
3486 unsigned chan_index;
3487 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3488 /* Construct a boolean vector telling us which channels
3489 * overflow the bound constant buffer */
3490 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3491 indirect_index, num_consts);
3492
3493 /* index_vec = indirect_index * 4 */
3494 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3495 index_vec = lp_build_add(uint_bld, index_vec,
3496 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3497
3498 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3499 }
3500 } else if (0) {
3501 /* for indirect support with ARB_gpu_shader5 */
3502 } else {
3503 LLVMValueRef index;
3504 LLVMValueRef scalar, scalar_ptr;
3505 unsigned chan_index;
3506
3507 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3508 index = lp_build_shr_imm(uint_bld, index, 2);
3509
3510 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3511
3512 LLVMValueRef ssbo_limit = NULL;
3513
3514 if (!is_shared) {
3515 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3516 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3517 }
3518
3519 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3520 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3521
3522 LLVMValueRef exec_mask = mask_vec(bld_base);
3523 if (!is_shared) {
3524 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3525 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3526 }
3527
3528 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3529 struct lp_build_loop_state loop_state;
3530 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3531
3532 struct lp_build_if_state ifthen;
3533 LLVMValueRef cond, temp_res;
3534
3535 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3536 loop_state.counter, "");
3537
3538 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3539 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3540
3541 lp_build_if(&ifthen, gallivm, cond);
3542 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3543
3544 temp_res = LLVMBuildLoad(builder, result, "");
3545 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3546 LLVMBuildStore(builder, temp_res, result);
3547 lp_build_else(&ifthen);
3548 temp_res = LLVMBuildLoad(builder, result, "");
3549 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3550 LLVMBuildStore(builder, temp_res, result);
3551 lp_build_endif(&ifthen);
3552 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3553 NULL, LLVMIntUGE);
3554 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3555 }
3556 }
3557 }
3558
3559 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3560 img_store_emit(
3561 const struct lp_build_tgsi_action * action,
3562 struct lp_build_tgsi_context * bld_base,
3563 struct lp_build_emit_data * emit_data)
3564 {
3565 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3566 struct lp_img_params params;
3567 LLVMValueRef coords[5];
3568 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3569 unsigned dims;
3570 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3571 unsigned layer_coord;
3572
3573 target_to_dims_layer(target, &dims, &layer_coord);
3574 for (unsigned i = 0; i < dims; i++) {
3575 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3576 }
3577 for (unsigned i = dims; i < 5; i++) {
3578 coords[i] = coord_undef;
3579 }
3580 if (layer_coord)
3581 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3582 memset(¶ms, 0, sizeof(params));
3583
3584 params.type = bld->bld_base.base.type;
3585 params.context_ptr = bld->context_ptr;
3586 params.thread_data_ptr = bld->thread_data_ptr;
3587 params.coords = coords;
3588 params.outdata = NULL;
3589 params.exec_mask = mask_vec(bld_base);
3590 params.target = tgsi_to_pipe_tex_target(target);
3591 params.image_index = emit_data->inst->Dst[0].Register.Index;
3592 params.img_op = LP_IMG_STORE;
3593 for (unsigned i = 0; i < 4; i++)
3594 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3595
3596 bld->image->emit_op(bld->image,
3597 bld->bld_base.base.gallivm,
3598 ¶ms);
3599 }
3600
3601 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3602 store_emit(
3603 const struct lp_build_tgsi_action * action,
3604 struct lp_build_tgsi_context * bld_base,
3605 struct lp_build_emit_data * emit_data)
3606 {
3607 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3608 struct gallivm_state * gallivm = bld_base->base.gallivm;
3609 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3610 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3611 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3612 unsigned buf = bufreg->Register.Index;
3613 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3614 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3615
3616 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3617 img_store_emit(action, bld_base, emit_data);
3618 } else if (0) {
3619
3620 } else {
3621 LLVMValueRef index; /* index into the const buffer */
3622 LLVMValueRef scalar_ptr;
3623 LLVMValueRef value;
3624 unsigned chan_index;
3625
3626 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3627 index = lp_build_shr_imm(uint_bld, index, 2);
3628
3629 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3630
3631 LLVMValueRef ssbo_limit = NULL;
3632
3633 if (!is_shared) {
3634 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3635 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3636 }
3637
3638 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3639 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3640
3641 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3642
3643 LLVMValueRef exec_mask = mask_vec(bld_base);
3644 if (!is_shared) {
3645 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3646 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3647 }
3648
3649 struct lp_build_loop_state loop_state;
3650 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3651
3652 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3653 loop_state.counter, "");
3654 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3655
3656 struct lp_build_if_state ifthen;
3657 LLVMValueRef cond;
3658
3659 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3660 loop_state.counter, "");
3661
3662 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3663 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3664 lp_build_if(&ifthen, gallivm, cond);
3665
3666 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3667
3668 lp_build_endif(&ifthen);
3669 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3670 NULL, LLVMIntUGE);
3671 }
3672 }
3673 }
3674
3675 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3676 resq_emit(
3677 const struct lp_build_tgsi_action * action,
3678 struct lp_build_tgsi_context * bld_base,
3679 struct lp_build_emit_data * emit_data)
3680 {
3681 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3682 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3683 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3684
3685 unsigned buf = bufreg->Register.Index;
3686 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3687
3688 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3689 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3690 struct lp_sampler_size_query_params params = { 0 };
3691 params.int_type = bld->bld_base.int_bld.type;
3692 params.texture_unit = buf;
3693 params.target = tgsi_to_pipe_tex_target(target);
3694 params.context_ptr = bld->context_ptr;
3695 params.sizes_out = emit_data->output;
3696
3697 bld->image->emit_size_query(bld->image,
3698 bld->bld_base.base.gallivm,
3699 ¶ms);
3700 } else {
3701 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3702
3703 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3704 }
3705 }
3706
3707 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3708 img_atomic_emit(
3709 const struct lp_build_tgsi_action * action,
3710 struct lp_build_tgsi_context * bld_base,
3711 struct lp_build_emit_data * emit_data,
3712 LLVMAtomicRMWBinOp op)
3713 {
3714 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3715 struct lp_img_params params;
3716 LLVMValueRef coords[5];
3717 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3718 unsigned dims;
3719 unsigned layer_coord;
3720 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3721
3722 target_to_dims_layer(target, &dims, &layer_coord);
3723
3724 for (unsigned i = 0; i < dims; i++) {
3725 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3726 }
3727 for (unsigned i = dims; i < 5; i++) {
3728 coords[i] = coord_undef;
3729 }
3730 if (layer_coord)
3731 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3732 memset(¶ms, 0, sizeof(params));
3733
3734 params.type = bld->bld_base.base.type;
3735 params.context_ptr = bld->context_ptr;
3736 params.thread_data_ptr = bld->thread_data_ptr;
3737 params.exec_mask = mask_vec(bld_base);
3738 params.image_index = emit_data->inst->Src[0].Register.Index;
3739 params.coords = coords;
3740 params.target = tgsi_to_pipe_tex_target(target);
3741 params.op = op;
3742 params.outdata = emit_data->output;
3743 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3744
3745 for (unsigned i = 0; i < 4; i++)
3746 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3747 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3748 for (unsigned i = 0; i < 4; i++)
3749 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3750 }
3751 bld->image->emit_op(bld->image,
3752 bld->bld_base.base.gallivm,
3753 ¶ms);
3754 }
3755
3756 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3757 atomic_emit(
3758 const struct lp_build_tgsi_action * action,
3759 struct lp_build_tgsi_context * bld_base,
3760 struct lp_build_emit_data * emit_data)
3761 {
3762 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3763 struct gallivm_state * gallivm = bld_base->base.gallivm;
3764 LLVMBuilderRef builder = gallivm->builder;
3765 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3766 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3767
3768 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3769 unsigned buf = bufreg->Register.Index;
3770 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3771
3772 LLVMAtomicRMWBinOp op = -1;
3773 switch (emit_data->inst->Instruction.Opcode) {
3774 case TGSI_OPCODE_ATOMUADD:
3775 op = LLVMAtomicRMWBinOpAdd;
3776 break;
3777 case TGSI_OPCODE_ATOMXCHG:
3778 op = LLVMAtomicRMWBinOpXchg;
3779 break;
3780 case TGSI_OPCODE_ATOMAND:
3781 op = LLVMAtomicRMWBinOpAnd;
3782 break;
3783 case TGSI_OPCODE_ATOMOR:
3784 op = LLVMAtomicRMWBinOpOr;
3785 break;
3786 case TGSI_OPCODE_ATOMXOR:
3787 op = LLVMAtomicRMWBinOpXor;
3788 break;
3789 case TGSI_OPCODE_ATOMUMIN:
3790 op = LLVMAtomicRMWBinOpUMin;
3791 break;
3792 case TGSI_OPCODE_ATOMUMAX:
3793 op = LLVMAtomicRMWBinOpUMax;
3794 break;
3795 case TGSI_OPCODE_ATOMIMIN:
3796 op = LLVMAtomicRMWBinOpMin;
3797 break;
3798 case TGSI_OPCODE_ATOMIMAX:
3799 op = LLVMAtomicRMWBinOpMax;
3800 break;
3801 case TGSI_OPCODE_ATOMCAS:
3802 break;
3803 default:
3804 assert(0);
3805 return;
3806 }
3807
3808 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3809 img_atomic_emit(action, bld_base, emit_data, op);
3810 } else if (0) {
3811 } else {
3812 LLVMValueRef index; /* index into the const buffer */
3813 LLVMValueRef scalar, scalar_ptr;
3814 LLVMValueRef value;
3815
3816 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3817 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3818
3819 index = lp_build_shr_imm(uint_bld, index, 2);
3820
3821 if (!is_shared) {
3822 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3823 scalar_ptr = bld->ssbos[buf];
3824 } else
3825 scalar_ptr = bld->shared_ptr;
3826
3827 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3828 uint_bld->vec_type, "");
3829
3830 LLVMValueRef ssbo_limit;
3831 if (!is_shared) {
3832 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3833 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3834 }
3835
3836 LLVMValueRef exec_mask = mask_vec(bld_base);
3837
3838 if (!is_shared) {
3839 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3840 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3841 }
3842
3843 struct lp_build_loop_state loop_state;
3844 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3845
3846 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3847 loop_state.counter, "");
3848 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3849
3850 index = LLVMBuildExtractElement(gallivm->builder, index,
3851 loop_state.counter, "");
3852
3853 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3854 &index, 1, "");
3855
3856 struct lp_build_if_state ifthen;
3857 LLVMValueRef cond, temp_res;
3858
3859 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3860 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3861 lp_build_if(&ifthen, gallivm, cond);
3862
3863 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3864 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3865 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3866 loop_state.counter, "");
3867 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3868 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3869 cas_src_ptr,
3870 LLVMAtomicOrderingSequentiallyConsistent,
3871 LLVMAtomicOrderingSequentiallyConsistent,
3872 false);
3873 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3874 } else {
3875 scalar = LLVMBuildAtomicRMW(builder, op,
3876 scalar_ptr, value_ptr,
3877 LLVMAtomicOrderingSequentiallyConsistent,
3878 false);
3879 }
3880 temp_res = LLVMBuildLoad(builder, atom_res, "");
3881 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3882 LLVMBuildStore(builder, temp_res, atom_res);
3883 lp_build_else(&ifthen);
3884 temp_res = LLVMBuildLoad(builder, atom_res, "");
3885 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3886 LLVMBuildStore(builder, temp_res, atom_res);
3887 lp_build_endif(&ifthen);
3888
3889 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3890 NULL, LLVMIntUGE);
3891 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3892 }
3893 }
3894
3895 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3896 barrier_emit(
3897 const struct lp_build_tgsi_action * action,
3898 struct lp_build_tgsi_context * bld_base,
3899 struct lp_build_emit_data * emit_data)
3900 {
3901 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3902 struct gallivm_state * gallivm = bld_base->base.gallivm;
3903
3904 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3905
3906 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3907 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3908 }
3909
3910 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3911 membar_emit(
3912 const struct lp_build_tgsi_action * action,
3913 struct lp_build_tgsi_context * bld_base,
3914 struct lp_build_emit_data * emit_data)
3915 {
3916 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3917 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3918 }
3919
3920 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3921 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3922 LLVMValueRef ptr,
3923 LLVMValueRef mask)
3924 {
3925 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3926 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3927
3928 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3929
3930 LLVMBuildStore(builder, current_vec, ptr);
3931 }
3932
3933 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3934 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3935 LLVMValueRef ptr,
3936 LLVMValueRef mask)
3937 {
3938 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3939 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3940
3941 current_vec = lp_build_select(&bld_base->uint_bld,
3942 mask,
3943 bld_base->uint_bld.zero,
3944 current_vec);
3945
3946 LLVMBuildStore(builder, current_vec, ptr);
3947 }
3948
3949 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3950 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3951 LLVMValueRef current_mask_vec,
3952 LLVMValueRef total_emitted_vertices_vec)
3953 {
3954 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3955 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3956 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3957 total_emitted_vertices_vec,
3958 bld->max_output_vertices_vec);
3959
3960 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3961 }
3962
3963 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3964 emit_vertex(
3965 const struct lp_build_tgsi_action * action,
3966 struct lp_build_tgsi_context * bld_base,
3967 struct lp_build_emit_data * emit_data)
3968 {
3969 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3970 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3971
3972 if (bld->gs_iface->emit_vertex) {
3973 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3974 TGSI_TYPE_UNSIGNED,
3975 emit_data->inst->Src[0].Register.SwizzleX);
3976 LLVMValueRef mask = mask_vec(bld_base);
3977 LLVMValueRef total_emitted_vertices_vec =
3978 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3979
3980 mask = clamp_mask_to_max_output_vertices(bld, mask,
3981 total_emitted_vertices_vec);
3982 gather_outputs(bld);
3983 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3984 bld->outputs,
3985 total_emitted_vertices_vec,
3986 mask,
3987 stream_id);
3988 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3989 mask);
3990 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3991 mask);
3992 #if DUMP_GS_EMITS
3993 lp_build_print_value(bld->bld_base.base.gallivm,
3994 " +++ emit vertex masked ones = ",
3995 mask);
3996 lp_build_print_value(bld->bld_base.base.gallivm,
3997 " +++ emit vertex emitted = ",
3998 total_emitted_vertices_vec);
3999 #endif
4000 }
4001 }
4002
4003
4004 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)4005 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4006 LLVMValueRef mask)
4007 {
4008 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4009 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4010
4011 if (bld->gs_iface->end_primitive) {
4012 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4013 LLVMValueRef emitted_vertices_vec =
4014 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4015 LLVMValueRef emitted_prims_vec =
4016 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4017 LLVMValueRef total_emitted_vertices_vec =
4018 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4019 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4020 emitted_vertices_vec,
4021 uint_bld->zero);
4022 /* We need to combine the current execution mask with the mask
4023 telling us which, if any, execution slots actually have
4024 unemitted primitives, this way we make sure that end_primitives
4025 executes only on the paths that have unflushed vertices */
4026 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4027
4028 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4029 total_emitted_vertices_vec,
4030 emitted_vertices_vec,
4031 emitted_prims_vec,
4032 mask_vec(bld_base), 0);
4033
4034 #if DUMP_GS_EMITS
4035 lp_build_print_value(bld->bld_base.base.gallivm,
4036 " +++ end prim masked ones = ",
4037 mask);
4038 lp_build_print_value(bld->bld_base.base.gallivm,
4039 " +++ end prim emitted verts1 = ",
4040 emitted_vertices_vec);
4041 lp_build_print_value(bld->bld_base.base.gallivm,
4042 " +++ end prim emitted prims1 = ",
4043 LLVMBuildLoad(builder,
4044 bld->emitted_prims_vec_ptr, ""));
4045 #endif
4046 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4047 mask);
4048 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4049 mask);
4050 #if DUMP_GS_EMITS
4051 lp_build_print_value(bld->bld_base.base.gallivm,
4052 " +++ end prim emitted verts2 = ",
4053 LLVMBuildLoad(builder,
4054 bld->emitted_vertices_vec_ptr, ""));
4055 #endif
4056 }
4057
4058 }
4059
4060 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4061 end_primitive(
4062 const struct lp_build_tgsi_action * action,
4063 struct lp_build_tgsi_context * bld_base,
4064 struct lp_build_emit_data * emit_data)
4065 {
4066 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4067
4068 if (bld->gs_iface->end_primitive) {
4069 LLVMValueRef mask = mask_vec(bld_base);
4070 end_primitive_masked(bld_base, mask);
4071 }
4072 }
4073
4074 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4075 barrier_emit_tcs(
4076 const struct lp_build_tgsi_action * action,
4077 struct lp_build_tgsi_context * bld_base,
4078 struct lp_build_emit_data * emit_data)
4079 {
4080 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4081
4082 if (bld->tcs_iface->emit_barrier) {
4083 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4084 }
4085 }
4086
4087
4088 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4089 cal_emit(
4090 const struct lp_build_tgsi_action * action,
4091 struct lp_build_tgsi_context * bld_base,
4092 struct lp_build_emit_data * emit_data)
4093 {
4094 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4095
4096 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4097 &bld_base->pc);
4098 }
4099
4100 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4101 ret_emit(
4102 const struct lp_build_tgsi_action * action,
4103 struct lp_build_tgsi_context * bld_base,
4104 struct lp_build_emit_data * emit_data)
4105 {
4106 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4107
4108 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4109 }
4110
4111 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4112 brk_emit(
4113 const struct lp_build_tgsi_action * action,
4114 struct lp_build_tgsi_context * bld_base,
4115 struct lp_build_emit_data * emit_data)
4116 {
4117 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4118
4119 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4120 }
4121
4122 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4123 if_emit(
4124 const struct lp_build_tgsi_action * action,
4125 struct lp_build_tgsi_context * bld_base,
4126 struct lp_build_emit_data * emit_data)
4127 {
4128 LLVMValueRef tmp;
4129 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4130
4131 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4132 emit_data->args[0], bld->bld_base.base.zero);
4133 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4134 }
4135
4136 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4137 uif_emit(
4138 const struct lp_build_tgsi_action * action,
4139 struct lp_build_tgsi_context * bld_base,
4140 struct lp_build_emit_data * emit_data)
4141 {
4142 LLVMValueRef tmp;
4143 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4144 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4145
4146 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4147 emit_data->args[0], uint_bld->zero);
4148 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4149 }
4150
4151 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4152 case_emit(
4153 const struct lp_build_tgsi_action * action,
4154 struct lp_build_tgsi_context * bld_base,
4155 struct lp_build_emit_data * emit_data)
4156 {
4157 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4158
4159 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4160 }
4161
4162 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4163 default_emit(
4164 const struct lp_build_tgsi_action * action,
4165 struct lp_build_tgsi_context * bld_base,
4166 struct lp_build_emit_data * emit_data)
4167 {
4168 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4169
4170 lp_exec_default(&bld->exec_mask, bld_base);
4171 }
4172
4173 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4174 switch_emit(
4175 const struct lp_build_tgsi_action * action,
4176 struct lp_build_tgsi_context * bld_base,
4177 struct lp_build_emit_data * emit_data)
4178 {
4179 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4180
4181 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4182 }
4183
4184 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4185 endswitch_emit(
4186 const struct lp_build_tgsi_action * action,
4187 struct lp_build_tgsi_context * bld_base,
4188 struct lp_build_emit_data * emit_data)
4189 {
4190 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4191
4192 lp_exec_endswitch(&bld->exec_mask, bld_base);
4193 }
4194
4195 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4196 bgnloop_emit(
4197 const struct lp_build_tgsi_action * action,
4198 struct lp_build_tgsi_context * bld_base,
4199 struct lp_build_emit_data * emit_data)
4200 {
4201 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4202
4203 lp_exec_bgnloop(&bld->exec_mask, true);
4204 }
4205
4206 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4207 bgnsub_emit(
4208 const struct lp_build_tgsi_action * action,
4209 struct lp_build_tgsi_context * bld_base,
4210 struct lp_build_emit_data * emit_data)
4211 {
4212 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4213
4214 lp_exec_mask_bgnsub(&bld->exec_mask);
4215 }
4216
4217 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4218 else_emit(
4219 const struct lp_build_tgsi_action * action,
4220 struct lp_build_tgsi_context * bld_base,
4221 struct lp_build_emit_data * emit_data)
4222 {
4223 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4224
4225 lp_exec_mask_cond_invert(&bld->exec_mask);
4226 }
4227
4228 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4229 endif_emit(
4230 const struct lp_build_tgsi_action * action,
4231 struct lp_build_tgsi_context * bld_base,
4232 struct lp_build_emit_data * emit_data)
4233 {
4234 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4235
4236 lp_exec_mask_cond_pop(&bld->exec_mask);
4237 }
4238
4239 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4240 endloop_emit(
4241 const struct lp_build_tgsi_action * action,
4242 struct lp_build_tgsi_context * bld_base,
4243 struct lp_build_emit_data * emit_data)
4244 {
4245 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4246
4247 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4248 }
4249
4250 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4251 endsub_emit(
4252 const struct lp_build_tgsi_action * action,
4253 struct lp_build_tgsi_context * bld_base,
4254 struct lp_build_emit_data * emit_data)
4255 {
4256 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4257
4258 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4259 }
4260
4261 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4262 cont_emit(
4263 const struct lp_build_tgsi_action * action,
4264 struct lp_build_tgsi_context * bld_base,
4265 struct lp_build_emit_data * emit_data)
4266 {
4267 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4268
4269 lp_exec_continue(&bld->exec_mask);
4270 }
4271
emit_prologue(struct lp_build_tgsi_context * bld_base)4272 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4273 {
4274 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4275 struct gallivm_state * gallivm = bld_base->base.gallivm;
4276
4277 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4278 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4279 bld->temps_array = lp_build_alloca_undef(gallivm,
4280 LLVMArrayType(bld_base->base.vec_type, array_size),
4281 "temp_array");
4282 }
4283
4284 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4285 LLVMValueRef array_size =
4286 lp_build_const_int32(gallivm,
4287 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4288 bld->outputs_array = lp_build_array_alloca(gallivm,
4289 bld_base->base.vec_type, array_size,
4290 "output_array");
4291 }
4292
4293 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4294 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4295 bld->imms_array = lp_build_alloca_undef(gallivm,
4296 LLVMArrayType(bld_base->base.vec_type, array_size),
4297 "imms_array");
4298 }
4299
4300 /* If we have indirect addressing in inputs we need to copy them into
4301 * our alloca array to be able to iterate over them */
4302 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4303 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4304 unsigned index, chan;
4305 LLVMTypeRef vec_type = bld_base->base.vec_type;
4306 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4307 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4308 bld->inputs_array = lp_build_array_alloca(gallivm,
4309 vec_type, array_size,
4310 "input_array");
4311
4312 assert(bld_base->info->num_inputs
4313 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4314
4315 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4316 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4317 LLVMValueRef lindex =
4318 lp_build_const_int32(gallivm, index * 4 + chan);
4319 LLVMValueRef input_ptr =
4320 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4321 &lindex, 1, "");
4322 LLVMValueRef value = bld->inputs[index][chan];
4323 if (value)
4324 LLVMBuildStore(gallivm->builder, value, input_ptr);
4325 }
4326 }
4327 }
4328
4329 if (bld->gs_iface) {
4330 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4331 bld->emitted_prims_vec_ptr =
4332 lp_build_alloca(gallivm,
4333 uint_bld->vec_type,
4334 "emitted_prims_ptr");
4335 bld->emitted_vertices_vec_ptr =
4336 lp_build_alloca(gallivm,
4337 uint_bld->vec_type,
4338 "emitted_vertices_ptr");
4339 bld->total_emitted_vertices_vec_ptr =
4340 lp_build_alloca(gallivm,
4341 uint_bld->vec_type,
4342 "total_emitted_vertices_ptr");
4343
4344 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4345 bld->emitted_prims_vec_ptr);
4346 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4347 bld->emitted_vertices_vec_ptr);
4348 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4349 bld->total_emitted_vertices_vec_ptr);
4350 }
4351
4352 if (DEBUG_EXECUTION) {
4353 lp_build_printf(gallivm, "\n");
4354 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4355 if (!bld->gs_iface)
4356 emit_dump_file(bld, TGSI_FILE_INPUT);
4357 }
4358 }
4359
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4360 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4361 {
4362 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4363
4364 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4365 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4366 }
4367 }
4368
emit_epilogue(struct lp_build_tgsi_context * bld_base)4369 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4370 {
4371 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4372 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4373
4374 if (DEBUG_EXECUTION) {
4375 /* for debugging */
4376 if (0) {
4377 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4378 }
4379 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4380 lp_build_printf(bld_base->base.gallivm, "\n");
4381 }
4382
4383 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4384 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4385 }
4386
4387 /* If we have indirect addressing in outputs we need to copy our alloca array
4388 * to the outputs slots specified by the caller */
4389 if (bld->gs_iface) {
4390 LLVMValueRef total_emitted_vertices_vec;
4391 LLVMValueRef emitted_prims_vec;
4392 /* implicit end_primitives, needed in case there are any unflushed
4393 vertices in the cache. Note must not call end_primitive here
4394 since the exec_mask is not valid at this point. */
4395 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4396
4397 total_emitted_vertices_vec =
4398 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4399 emitted_prims_vec =
4400 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4401
4402 bld->gs_iface->gs_epilogue(bld->gs_iface,
4403 total_emitted_vertices_vec,
4404 emitted_prims_vec, 0);
4405 } else {
4406 gather_outputs(bld);
4407 }
4408 }
4409
4410 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4411 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4412 const struct tgsi_token *tokens,
4413 const struct lp_build_tgsi_params *params,
4414 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4415 {
4416 struct lp_build_tgsi_soa_context bld;
4417 struct lp_type type = params->type;
4418 struct lp_type res_type;
4419
4420 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4421 memset(&res_type, 0, sizeof res_type);
4422 res_type.width = type.width;
4423 res_type.length = type.length;
4424 res_type.sign = 1;
4425
4426 /* Setup build context */
4427 memset(&bld, 0, sizeof bld);
4428 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4429 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4430 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4431 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4432 {
4433 struct lp_type dbl_type;
4434 dbl_type = type;
4435 dbl_type.width *= 2;
4436 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4437 }
4438 {
4439 struct lp_type uint64_type;
4440 uint64_type = lp_uint_type(type);
4441 uint64_type.width *= 2;
4442 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4443 }
4444 {
4445 struct lp_type int64_type;
4446 int64_type = lp_int_type(type);
4447 int64_type.width *= 2;
4448 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4449 }
4450 bld.mask = params->mask;
4451 bld.inputs = params->inputs;
4452 bld.outputs = outputs;
4453 bld.consts_ptr = params->consts_ptr;
4454 bld.const_sizes_ptr = params->const_sizes_ptr;
4455 bld.ssbo_ptr = params->ssbo_ptr;
4456 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4457 bld.sampler = params->sampler;
4458 bld.bld_base.info = params->info;
4459 bld.indirect_files = params->info->indirect_files;
4460 bld.context_ptr = params->context_ptr;
4461 bld.thread_data_ptr = params->thread_data_ptr;
4462 bld.image = params->image;
4463 bld.shared_ptr = params->shared_ptr;
4464 bld.coro = params->coro;
4465
4466 /*
4467 * If the number of temporaries is rather large then we just
4468 * allocate them as an array right from the start and treat
4469 * like indirect temporaries.
4470 */
4471 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4472 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4473 }
4474 /*
4475 * For performance reason immediates are always backed in a static
4476 * array, but if their number is too great, we have to use just
4477 * a dynamically allocated array.
4478 */
4479 bld.use_immediates_array =
4480 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4481 if (bld.use_immediates_array) {
4482 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4483 }
4484
4485
4486 bld.bld_base.soa = TRUE;
4487 bld.bld_base.emit_debug = emit_debug;
4488 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4489 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4490 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4491 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4492 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4493
4494 bld.bld_base.emit_store = emit_store;
4495 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4496 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4497 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4498
4499 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4500 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4501
4502 bld.bld_base.emit_prologue = emit_prologue;
4503 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4504 bld.bld_base.emit_epilogue = emit_epilogue;
4505
4506 /* Set opcode actions */
4507 lp_set_default_actions_cpu(&bld.bld_base);
4508
4509 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4510 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4511 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4512 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4513 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4514 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4515 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4516 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4517 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4518 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4519 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4520 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4521 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4522 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4523 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4524 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4525 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4526 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4527 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4528 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4529 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4530 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4531 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4532 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4533 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4534 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4535 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4536 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4537 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4538 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4539 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4540 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4541 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4542 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4543 /* DX10 sampling ops */
4544 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4545 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4546 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4547 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4548 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4549 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4550 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4551 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4552 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4553 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4554 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4555
4556 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4557 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4558 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4559
4560 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4561 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4562 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4563 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4564 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4565 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4566 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4567 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4568 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4569 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4570
4571 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4572 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4573
4574 if (params->gs_iface) {
4575 /* There's no specific value for this because it should always
4576 * be set, but apps using ext_geometry_shader4 quite often
4577 * were forgetting so we're using MAX_VERTEX_VARYING from
4578 * that spec even though we could assert if it's not
4579 * set, but that's a lot uglier. */
4580 uint max_output_vertices;
4581
4582 /* inputs are always indirect with gs */
4583 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4584 bld.gs_iface = params->gs_iface;
4585 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4586 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4587 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4588
4589 max_output_vertices =
4590 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4591 if (!max_output_vertices)
4592 max_output_vertices = 32;
4593
4594 bld.max_output_vertices_vec =
4595 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4596 max_output_vertices);
4597 }
4598
4599 if (params->tes_iface) {
4600 /* inputs are always indirect with tes */
4601 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4602 bld.tes_iface = params->tes_iface;
4603 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4604 }
4605
4606 if (params->tcs_iface) {
4607 bld.tcs_iface = params->tcs_iface;
4608 /* outputs and inputs are always indirect with tcs */
4609 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4610 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4611 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4612 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4613 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4614 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4615 }
4616
4617 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4618
4619 bld.system_values = *params->system_values;
4620
4621 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4622
4623 if (0) {
4624 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4625 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4626 debug_printf("11111111111111111111111111111 \n");
4627 tgsi_dump(tokens, 0);
4628 lp_debug_dump_value(function);
4629 debug_printf("2222222222222222222222222222 \n");
4630 }
4631
4632 if (0) {
4633 LLVMModuleRef module = LLVMGetGlobalParent(
4634 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4635 LLVMDumpModule(module);
4636
4637 }
4638 lp_exec_mask_fini(&bld.exec_mask);
4639 }
4640